From: Sepherosa Ziehau Date: Fri, 15 Jun 2012 09:54:59 +0000 (+0800) Subject: tcp: Add XMITNOW which bypasses the Nagle algorithm temporarily X-Git-Tag: v3.2.0~755^2~8 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/c1fabe8562a4226eed3605179909e4cf398eb6df tcp: Add XMITNOW which bypasses the Nagle algorithm temporarily This flag acts differently from ACKNOW that no pure ACK will be sent. It is currently used by the (extended) limited transmit and the SACK based fast recovery. This flag is intended to fix the following bug in the SACK based fast recovery: The NextSeg() requires that if the unACKed segments could not pass IsLost(), previously unsent segment should be selected. In the application limited period, the size of the previously unsent segment could be less than the MSS, thus it could not be sent immediately according to the Nagle algorithm. In our SACK based fast recovery implementation, if the tcp_output() sends no segments, the current recovery transmit process will stop immediately. This could stop ACK clock and cause timeout retransmit, which could be avoided, if the Nagle algorithm is bypassed temporarily for the small unsent segment selected by NextSeg(). When this flag is used with (extended) limited transmit, certain amount of spurious early retranmits could be avoided. --- diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index c11c95a3db..81fb6fc08b 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -3281,6 +3281,7 @@ tcp_sack_rexmt(struct tcpcb *tp, boolean_t force) old_snd_max = tp->snd_max; if (nextrexmt == tp->snd_una) tcp_callout_stop(tp, tp->tt_rexmt); + tp->t_flags |= TF_XMITNOW; error = tcp_output(tp); if (error != 0) { tp->rexmt_high = old_rexmt_high; @@ -3341,6 +3342,7 @@ tcp_sack_limitedxmit(struct tcpcb *tp) tp->snd_cwnd = tp->snd_nxt - tp->snd_una + rounddown(cwnd_left, tp->t_maxseg); + tp->t_flags |= TF_XMITNOW; tcp_output(tp); sent = tp->snd_nxt - next; @@ -3606,6 +3608,7 @@ fastretransmit: tp->snd_nxt = tp->snd_max; tp->snd_cwnd = ownd + (tp->t_dupacks - tp->snd_limited) * tp->t_maxseg; + tp->t_flags |= TF_XMITNOW; tcp_output(tp); if (SEQ_LT(oldsndnxt, oldsndmax)) { diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 492e33e46e..2b3e6649a9 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -306,7 +306,7 @@ again: flags &= ~TH_SYN; off--, len++; if (len > 0 && tp->t_state == TCPS_SYN_SENT) { - tp->t_flags &= ~TF_ACKNOW; + tp->t_flags &= ~(TF_ACKNOW | TF_XMITNOW); return 0; } } @@ -450,6 +450,8 @@ again: goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */ goto send; + if (tp->t_flags & TF_XMITNOW) + goto send; } /* @@ -554,6 +556,7 @@ again: /* * No reason to send a segment, just return. */ + tp->t_flags &= ~TF_XMITNOW; return (0); send: @@ -1065,7 +1068,7 @@ after_th: KASSERT(error != 0, ("no error, but th not set")); } if (error) { - tp->t_flags &= ~TF_ACKNOW; + tp->t_flags &= ~(TF_ACKNOW | TF_XMITNOW); /* * We know that the packet was lost, so back out the @@ -1135,7 +1138,7 @@ out: tp->t_flags &= ~TF_RXRESIZED; } tp->last_ack_sent = tp->rcv_nxt; - tp->t_flags &= ~TF_ACKNOW; + tp->t_flags &= ~(TF_ACKNOW | TF_XMITNOW); if (tcp_delack_enabled) tcp_callout_stop(tp, tp->tt_delack); if (sendalot) diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index fe1ec9c18c..c1691d8ab9 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -175,7 +175,7 @@ struct tcpcb { #define TF_RXWIN0SENT 0x00080000 /* sent a receiver win 0 in response */ #define TF_FASTRECOVERY 0x00100000 /* in Fast Recovery */ #define TF_QUEDFIN 0x00200000 /* FIN has been received */ -#define TF_UNUSED007 0x00400000 +#define TF_XMITNOW 0x00400000 /* Temporarily override Nagle */ #define TF_UNUSED008 0x00800000 #define TF_UNUSED009 0x01000000 #define TF_FORCE 0x02000000 /* Set if forcing out a byte */