summaryrefslogtreecommitdiff
path: root/include/linux/tcp.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-10-21 07:00:59 -0700
committerDavid S. Miller <davem@davemloft.net>2015-10-21 07:00:59 -0700
commiteb9fae328faff9807a4ab5c1834b19f34dd155d4 (patch)
tree86f37587abdfeee11ad36c75dcae37adf8aa091f /include/linux/tcp.h
parentc8fdc324916a864de753db6de6423b048c20cc0f (diff)
parent4f41b1c58a32537542f14c1150099131613a5e8a (diff)
Merge branch 'tcp-rack'
Yuchung Cheng says: ==================== RACK loss detection RACK (Recent ACK) loss recovery uses the notion of time instead of packet sequence (FACK) or counts (dupthresh). It's inspired by the FACK heuristic in tcp_mark_lost_retrans(): when a limited transmit (new data packet) is sacked in recovery, then any retransmission sent before that newly sacked packet was sent must have been lost, since at least one round trip time has elapsed. But that existing heuristic from tcp_mark_lost_retrans() has several limitations: 1) it can't detect tail drops since it depends on limited transmit 2) it's disabled upon reordering (assumes no reordering) 3) it's only enabled in fast recovery but not timeout recovery RACK addresses these limitations with a core idea: an unacknowledged packet P1 is deemed lost if a packet P2 that was sent later is is s/acked, since at least one round trip has passed. Since RACK cares about the time sequence instead of the data sequence of packets, it can detect tail drops when a later retransmission is s/acked, while FACK or dupthresh can't. For reordering RACK uses a dynamically adjusted reordering window ("reo_wnd") to reduce false positives on ever (small) degree of reordering, similar to the delayed Early Retransmit. In the current patch set RACK is only a supplemental loss detection and does not trigger fast recovery. However we are developing RACK to replace or consolidate FACK/dupthresh, early retransmit, and thin-dupack. These heuristics all implicitly bear the time notion. For example, the delayed Early Retransmit is simply applying RACK to trigger the fast recovery with small inflight. RACK requires measuring the minimum RTT. Tracking a global min is less robust due to traffic engineering pathing changes. Therefore it uses a windowed filter by Kathleen Nichols. The min RTT can also be useful for various other purposes like congestion control or stat monitoring. This patch has been used on Google servers for well over 1 year. RACK has also been implemented in the QUIC protocol. We are submitting an IETF draft as well. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux/tcp.h')
-rw-r--r--include/linux/tcp.h11
1 files changed, 9 insertions, 2 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 86a7edaa6797..5dce9705fe84 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -194,6 +194,12 @@ struct tcp_sock {
u32 window_clamp; /* Maximal window to advertise */
u32 rcv_ssthresh; /* Current window clamp */
+ /* Information of the most recently (s)acked skb */
+ struct tcp_rack {
+ struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+ u8 advanced; /* mstamp advanced since last lost marking */
+ u8 reord; /* reordering detected */
+ } rack;
u16 advmss; /* Advertised MSS */
u8 unused;
u8 nonagle : 4,/* Disable Nagle algorithm? */
@@ -217,6 +223,9 @@ struct tcp_sock {
u32 mdev_max_us; /* maximal mdev for the last rtt period */
u32 rttvar_us; /* smoothed mdev_max */
u32 rtt_seq; /* sequence number to update rttvar */
+ struct rtt_meas {
+ u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */
+ } rtt_min[3];
u32 packets_out; /* Packets which are "in flight" */
u32 retrans_out; /* Retransmitted packets out */
@@ -280,8 +289,6 @@ struct tcp_sock {
int lost_cnt_hint;
u32 retransmit_high; /* L-bits may be on up to this seqno */
- u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */
-
u32 prior_ssthresh; /* ssthresh saved at recovery start */
u32 high_seq; /* snd_nxt at onset of congestion */