From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 3 May 2015 21:34:46 -0700
Subject: tcp: provide SYN headers for passive connections

This patch allows a server application to get the TCP SYN headers for
its passive connections.  This is useful if the server is doing
fingerprinting of clients based on SYN packet contents.

Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN.

The first is used on a socket to enable saving the SYN headers
for child connections. This can be set before or after the listen()
call.

The latter is used to retrieve the SYN headers for passive connections,
if the parent listener has enabled TCP_SAVE_SYN.

TCP_SAVED_SYN is read once, it frees the saved SYN headers.

The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP
headers.

Original patch was written by Tom Herbert, I changed it to not hold
a full skb (and associated dst and conntracking reference).

We have used such patch for about 3 years at Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46efa03d2b11..ecccfdc50d76 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2482,6 +2482,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			icsk->icsk_syn_retries = val;
 		break;
 
+	case TCP_SAVE_SYN:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->save_syn = val;
+		break;
+
 	case TCP_LINGER2:
 		if (val < 0)
 			tp->linger2 = -1;
@@ -2818,6 +2825,34 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_NOTSENT_LOWAT:
 		val = tp->notsent_lowat;
 		break;
+	case TCP_SAVE_SYN:
+		val = tp->save_syn;
+		break;
+	case TCP_SAVED_SYN: {
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (tp->saved_syn) {
+			len = min_t(unsigned int, tp->saved_syn[0], len);
+			if (put_user(len, optlen)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			tcp_saved_syn_free(tp);
+			release_sock(sk);
+		} else {
+			release_sock(sk);
+			len = 0;
+			if (put_user(len, optlen))
+				return -EFAULT;
+		}
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 8e4d980ac21596a9b91d8e720c77ad081975a0a8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 May 2015 12:39:28 -0700
Subject: tcp: fix behavior for epoll edge trigger

Under memory pressure, tcp_sendmsg() can fail to queue a packet
while no packet is present in write queue. If we return -EAGAIN
with no packet in write queue, no ACK packet will ever come
to raise EPOLLOUT.

We need to allow one skb per TCP socket, and make sure that
tcp sockets can release their forward allocations under pressure.

This is a followup to commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
under memory pressure")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ecccfdc50d76..9eabfd3e0925 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -815,9 +815,20 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 	/* The TCP header must be at least 32-bit aligned.  */
 	size = ALIGN(size, 4);
 
+	if (unlikely(tcp_under_memory_pressure(sk)))
+		sk_mem_reclaim_partial(sk);
+
 	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
-	if (skb) {
-		if (sk_wmem_schedule(sk, skb->truesize)) {
+	if (likely(skb)) {
+		bool mem_schedule;
+
+		if (skb_queue_len(&sk->sk_write_queue) == 0) {
+			mem_schedule = true;
+			sk_forced_mem_schedule(sk, skb->truesize);
+		} else {
+			mem_schedule = sk_wmem_schedule(sk, skb->truesize);
+		}
+		if (likely(mem_schedule)) {
 			skb_reserve(skb, sk->sk_prot->max_header);
 			/*
 			 * Make sure that we have exactly size bytes
-- 
cgit v1.2.3


From b66e91ccbc34ebd5a2f90f9e1bc1597e2924a500 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 May 2015 12:39:30 -0700
Subject: tcp: halves tcp_mem[] limits

Allowing tcp to use ~19% of physical memory is way too much,
and allowed bugs to be hidden. Add to this that some drivers use a full
page per incoming frame, so real cost can be twice the advertized one.

Reduce tcp_mem by 50 % as a first step to sanity.

tcp_mem[0,1,2] defaults are now 4.68%, 6.25%, 9.37% of physical memory.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9eabfd3e0925..c724195e5862 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3068,11 +3068,12 @@ __setup("thash_entries=", set_thash_entries);
 
 static void __init tcp_init_mem(void)
 {
-	unsigned long limit = nr_free_buffer_pages() / 8;
+	unsigned long limit = nr_free_buffer_pages() / 16;
+
 	limit = max(limit, 128UL);
-	sysctl_tcp_mem[0] = limit / 4 * 3;
-	sysctl_tcp_mem[1] = limit;
-	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
+	sysctl_tcp_mem[0] = limit / 4 * 3;		/* 4.68 % */
+	sysctl_tcp_mem[1] = limit;			/* 6.25 % */
+	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;	/* 9.37 % */
 }
 
 void __init tcp_init(void)
-- 
cgit v1.2.3


From aea0929e516a1ff4c4458203d1f3375eee9acc26 Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@akamai.com>
Date: Mon, 18 May 2015 14:35:58 -0400
Subject: tcp: Return error instead of partial read for saved syn headers

Currently the getsockopt() requesting the cached contents of the syn
packet headers will fail silently if the caller uses a buffer that is
too small to contain the requested data.  Rather than fail silently and
discard the headers, getsockopt() should return an error and report the
required size to hold the data.

Signed-off-by: Eric B Munson <emunson@akamai.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c724195e5862..bb9bb844204f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2845,7 +2845,15 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 
 		lock_sock(sk);
 		if (tp->saved_syn) {
-			len = min_t(unsigned int, tp->saved_syn[0], len);
+			if (len < tp->saved_syn[0]) {
+				if (put_user(tp->saved_syn[0], optlen)) {
+					release_sock(sk);
+					return -EFAULT;
+				}
+				release_sock(sk);
+				return -EINVAL;
+			}
+			len = tp->saved_syn[0];
 			if (put_user(len, optlen)) {
 				release_sock(sk);
 				return -EFAULT;
-- 
cgit v1.2.3


From eb9344781a2f8381ed60cd9e662d9ced2d168ecb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 May 2015 13:26:55 -0700
Subject: tcp: add a force_schedule argument to sk_stream_alloc_skb()

In commit 8e4d980ac215 ("tcp: fix behavior for epoll edge trigger")
we fixed a possible hang of TCP sockets under memory pressure,
by allowing sk_stream_alloc_skb() to use sk_forced_mem_schedule()
if no packet is in socket write queue.

It turns out there are other cases where we want to force memory
schedule :

tcp_fragment() & tso_fragment() need to split a big TSO packet into
two smaller ones. If we block here because of TCP memory pressure,
we can effectively block TCP socket from sending new data.
If no further ACK is coming, this hang would be definitive, and socket
has no chance to effectively reduce its memory usage.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bb9bb844204f..ca1d476c80ef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -808,7 +808,8 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 }
 EXPORT_SYMBOL(tcp_splice_read);
 
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+				    bool force_schedule)
 {
 	struct sk_buff *skb;
 
@@ -820,15 +821,15 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 
 	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
 	if (likely(skb)) {
-		bool mem_schedule;
+		bool mem_scheduled;
 
-		if (skb_queue_len(&sk->sk_write_queue) == 0) {
-			mem_schedule = true;
+		if (force_schedule) {
+			mem_scheduled = true;
 			sk_forced_mem_schedule(sk, skb->truesize);
 		} else {
-			mem_schedule = sk_wmem_schedule(sk, skb->truesize);
+			mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
 		}
-		if (likely(mem_schedule)) {
+		if (likely(mem_scheduled)) {
 			skb_reserve(skb, sk->sk_prot->max_header);
 			/*
 			 * Make sure that we have exactly size bytes
@@ -918,7 +919,8 @@ new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
-			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
+						  skb_queue_empty(&sk->sk_write_queue));
 			if (!skb)
 				goto wait_for_memory;
 
@@ -1154,7 +1156,8 @@ new_segment:
 
 			skb = sk_stream_alloc_skb(sk,
 						  select_size(sk, sg),
-						  sk->sk_allocation);
+						  sk->sk_allocation,
+						  skb_queue_empty(&sk->sk_write_queue));
 			if (!skb)
 				goto wait_for_memory;
 
-- 
cgit v1.2.3


From ce5ec440994b7b2b714633b516b9e0a4f6a98dfe Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 20 May 2015 15:52:53 +0000
Subject: tcp: ensure epoll edge trigger wakeup when write queue is empty

We currently rely on the setting of SOCK_NOSPACE in the write()
path to ensure that we wake up any epoll edge trigger waiters when
acks return to free space in the write queue. However, if we fail
to allocate even a single skb in the write queue, we could end up
waiting indefinitely.

Fix this by explicitly issuing a wakeup when we detect the condition
of an empty write queue and a return value of -EAGAIN. This allows
userspace to re-try as we expect this to be a temporary failure.

I've tested this approach by artificially making
sk_stream_alloc_skb() return NULL periodically. In that case,
epoll edge trigger waiters will hang indefinitely in epoll_wait()
without this patch.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ca1d476c80ef..0a3f9a00565b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,6 +999,9 @@ do_error:
 	if (copied)
 		goto out;
 out_err:
+	/* make sure we wake any epoll edge trigger waiter */
+	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+		sk->sk_write_space(sk);
 	return sk_stream_error(sk, flags, err);
 }
 
@@ -1288,6 +1291,9 @@ do_error:
 		goto out;
 out_err:
 	err = sk_stream_error(sk, flags, err);
+	/* make sure we wake any epoll edge trigger waiter */
+	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+		sk->sk_write_space(sk);
 	release_sock(sk);
 	return err;
 }
-- 
cgit v1.2.3


From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Wed, 20 May 2015 16:35:41 -0700
Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info

This patch tracks the total number of inbound and outbound segments on a
TCP socket. One may use this number to have an idea on connection
quality when compared against the retransmissions.

RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut

These are a 32bit field each and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->segs_out was placed near tp->snd_nxt for good data
locality and minimal performance impact, while tp->segs_in was placed
near tp->bytes_received for the same reason.

Join work with Eric Dumazet.

Note that received SYN are accounted on the listener, but sent SYNACK
are not accounted.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a3f9a00565b..7f3e721b9e69 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2695,6 +2695,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	spin_lock_bh(&sk->sk_lock.slock);
 	info->tcpi_bytes_acked = tp->bytes_acked;
 	info->tcpi_bytes_received = tp->bytes_received;
+	info->tcpi_segs_out = tp->segs_out;
+	info->tcpi_segs_in = tp->segs_in;
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
-- 
cgit v1.2.3


From a60e3cc7c92973a31fad0fd04dc5cf4355d3d1ef Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 21 May 2015 17:00:00 +0200
Subject: net: make skb_splice_bits more configureable

Prepare skb_splice_bits to be able to deal with AF_UNIX sockets.

AF_UNIX sockets don't use lock_sock/release_sock and thus we have to
use a callback to make the locking and unlocking configureable.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 90afcec3f427..65f791f74845 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 	struct tcp_splice_state *tss = rd_desc->arg.data;
 	int ret;
 
-	ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
-			      tss->flags);
+	ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
+			      min(rd_desc->count, len), tss->flags,
+			      skb_socket_splice);
 	if (ret > 0)
 		rd_desc->count -= ret;
 	return ret;
-- 
cgit v1.2.3


From 35ac838a9b96470f999db04320f53a2033642bfb Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:20 -0400
Subject: sock_diag: implement a get_info handler for inet

This get_info handler will simply dispatch to the appropriate
existing inet protocol handler.

This patch also includes a new netlink attribute
(INET_DIAG_PROTOCOL).  This attribute is currently only used
for multicast messages.  Without this attribute, there is no
way of knowing the IP protocol used by the socket information
being broadcast.  This attribute is not necessary in the 'dump'
variant of this protocol (though it could easily be added)
because dump requests are issued for specific family/protocol
pairs.

Tested: ss -E (note, the -E option has not yet been merged into
the upstream version of ss).

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65f791f74845..697b86dd45b3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 /* Return information about state of tcp endpoint in API format. */
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 	unsigned int start;
 	u32 rate;
 
 	memset(info, 0, sizeof(*info));
+	if (sk->sk_type != SOCK_STREAM)
+		return;
 
 	info->tcpi_state = sk->sk_state;
 	info->tcpi_ca_state = icsk->icsk_ca_state;
-- 
cgit v1.2.3


From dfea2aa654243f70dc53b8648d0bbdeec55a7df1 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <cpaasch@apple.com>
Date: Thu, 18 Jun 2015 09:15:34 -0700
Subject: tcp: Do not call tcp_fastopen_reset_cipher from interrupt context

tcp_fastopen_reset_cipher really cannot be called from interrupt
context. It allocates the tcp_fastopen_context with GFP_KERNEL and
calls crypto_alloc_cipher, which allocates all kind of stuff with
GFP_KERNEL.

Thus, we might sleep when the key-generation is triggered by an
incoming TFO cookie-request which would then happen in interrupt-
context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP:

[   36.001813] BUG: sleeping function called from invalid context at mm/slub.c:1266
[   36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill
[   36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14
[   36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
[   36.008250]  00000000000004f2 ffff88007f8838a8 ffffffff8171d53a ffff880075a084a8
[   36.009630]  ffff880075a08000 ffff88007f8838c8 ffffffff810967d3 ffff88007f883928
[   36.011076]  0000000000000000 ffff88007f8838f8 ffffffff81096892 ffff88007f89be00
[   36.012494] Call Trace:
[   36.012953]  <IRQ>  [<ffffffff8171d53a>] dump_stack+0x4f/0x6d
[   36.014085]  [<ffffffff810967d3>] ___might_sleep+0x103/0x170
[   36.015117]  [<ffffffff81096892>] __might_sleep+0x52/0x90
[   36.016117]  [<ffffffff8118e887>] kmem_cache_alloc_trace+0x47/0x190
[   36.017266]  [<ffffffff81680d82>] ? tcp_fastopen_reset_cipher+0x42/0x130
[   36.018485]  [<ffffffff81680d82>] tcp_fastopen_reset_cipher+0x42/0x130
[   36.019679]  [<ffffffff81680f01>] tcp_fastopen_init_key_once+0x61/0x70
[   36.020884]  [<ffffffff81680f2c>] __tcp_fastopen_cookie_gen+0x1c/0x60
[   36.022058]  [<ffffffff816814ff>] tcp_try_fastopen+0x58f/0x730
[   36.023118]  [<ffffffff81671788>] tcp_conn_request+0x3e8/0x7b0
[   36.024185]  [<ffffffff810e3872>] ? __module_text_address+0x12/0x60
[   36.025327]  [<ffffffff8167b2e1>] tcp_v4_conn_request+0x51/0x60
[   36.026410]  [<ffffffff816727e0>] tcp_rcv_state_process+0x190/0xda0
[   36.027556]  [<ffffffff81661f97>] ? __inet_lookup_established+0x47/0x170
[   36.028784]  [<ffffffff8167c2ad>] tcp_v4_do_rcv+0x16d/0x3d0
[   36.029832]  [<ffffffff812e6806>] ? security_sock_rcv_skb+0x16/0x20
[   36.030936]  [<ffffffff8167cc8a>] tcp_v4_rcv+0x77a/0x7b0
[   36.031875]  [<ffffffff816af8c3>] ? iptable_filter_hook+0x33/0x70
[   36.032953]  [<ffffffff81657d22>] ip_local_deliver_finish+0x92/0x1f0
[   36.034065]  [<ffffffff81657f1a>] ip_local_deliver+0x9a/0xb0
[   36.035069]  [<ffffffff81657c90>] ? ip_rcv+0x3d0/0x3d0
[   36.035963]  [<ffffffff81657569>] ip_rcv_finish+0x119/0x330
[   36.036950]  [<ffffffff81657ba7>] ip_rcv+0x2e7/0x3d0
[   36.037847]  [<ffffffff81610652>] __netif_receive_skb_core+0x552/0x930
[   36.038994]  [<ffffffff81610a57>] __netif_receive_skb+0x27/0x70
[   36.040033]  [<ffffffff81610b72>] process_backlog+0xd2/0x1f0
[   36.041025]  [<ffffffff81611482>] net_rx_action+0x122/0x310
[   36.042007]  [<ffffffff81076743>] __do_softirq+0x103/0x2f0
[   36.042978]  [<ffffffff81723e3c>] do_softirq_own_stack+0x1c/0x30

This patch moves the call to tcp_fastopen_init_key_once to the places
where a listener socket creates its TFO-state, which always happens in
user-context (either from the setsockopt, or implicitly during the
listen()-call)

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Fixes: 222e83d2e0ae ("tcp: switch tcp_fastopen key generation to net_get_random_once")
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp.c')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1377f2a0472..bb2ce74f6004 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2545,10 +2545,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 
 	case TCP_FASTOPEN:
 		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
-		    TCPF_LISTEN)))
+		    TCPF_LISTEN))) {
+			tcp_fastopen_init_key_once(true);
+
 			err = fastopen_init_queue(sk, val);
-		else
+		} else {
 			err = -EINVAL;
+		}
 		break;
 	case TCP_TIMESTAMP:
 		if (!tp->repair)
-- 
cgit v1.2.3