tcp/sack: Use RFC3517bis IsLost(snd_una) as fallback of early retransmit
[dragonfly.git] / sys / netinet / tcp_input.c
index dc2f2b5..d687b68 100644 (file)
@@ -350,6 +350,19 @@ tcp_paws_canreasslast(const struct tcpcb *tp, const struct tcphdr *th, int tlen)
        return FALSE;
 }
 
+static __inline void
+tcp_ncr_update_rxtthresh(struct tcpcb *tp)
+{
+       int old_rxtthresh = tp->t_rxtthresh;
+       uint32_t ownd = tp->snd_max - tp->snd_una;
+
+       tp->t_rxtthresh = max(3, ((ownd / tp->t_maxseg) >> 1));
+       if (tp->t_rxtthresh != old_rxtthresh) {
+               tcp_sack_update_lostseq(&tp->scb, tp->snd_una,
+                   tp->t_maxseg, tp->t_rxtthresh);
+       }
+}
+
 static int
 tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
 {
@@ -3175,7 +3188,7 @@ tcp_sack_rexmt(struct tcpcb *tp)
                int error;
 
                old_rexmt_high = tp->rexmt_high;
-               if (!tcp_sack_nextseg(tp, &nextrexmt, &seglen, &rescue)) {
+               if (!tcp_sack_nextseg(tp, &nextrexmt, &seglen, &rescue)) {
                        tp->rexmt_high = old_rexmt_high;
                        break;
                }
@@ -3239,34 +3252,35 @@ tcp_sack_rexmt(struct tcpcb *tp)
        tp->snd_cwnd = ocwnd;
 }
 
+/*
+ * Return TRUE, if some new segments are sent
+ */
 static boolean_t
 tcp_sack_limitedxmit(struct tcpcb *tp)
 {
        tcp_seq oldsndnxt = tp->snd_nxt;
        tcp_seq oldsndmax = tp->snd_max;
        u_long ocwnd = tp->snd_cwnd;
-       uint32_t pipe;
+       uint32_t pipe, sent;
        boolean_t ret = FALSE;
+       tcp_seq_diff_t cwnd_left;
+       tcp_seq next;
 
        tp->rexmt_high = tp->snd_una - 1;
        pipe = tcp_sack_compute_pipe(tp);
-       while ((tcp_seq_diff_t)(ocwnd - pipe) >= (tcp_seq_diff_t)tp->t_maxseg) {
-               uint32_t sent;
-               tcp_seq next;
-               int error;
+       cwnd_left = (tcp_seq_diff_t)(ocwnd - pipe);
+       if (cwnd_left < (tcp_seq_diff_t)tp->t_maxseg)
+               return FALSE;
 
-               next = tp->snd_nxt = tp->snd_max;
-               tp->snd_cwnd = tp->snd_nxt - tp->snd_una + tp->t_maxseg;
+       next = tp->snd_nxt = tp->snd_max;
+       tp->snd_cwnd = tp->snd_nxt - tp->snd_una +
+           rounddown(cwnd_left, tp->t_maxseg);
 
-               error = tcp_output(tp);
-               if (error)
-                       break;
+       tcp_output(tp);
 
-               sent = tp->snd_nxt - next;
-               if (sent <= 0)
-                       break;
-               pipe += sent;
-               ++tcpstat.tcps_sndlimited;
+       sent = tp->snd_nxt - next;
+       if (sent > 0) {
+               tcpstat.tcps_sndlimited += howmany(sent, tp->t_maxseg);
                ret = TRUE;
        }
 
@@ -3277,6 +3291,9 @@ tcp_sack_limitedxmit(struct tcpcb *tp)
        }
        tp->snd_cwnd = ocwnd;
 
+       if (ret && TCP_DO_NCR(tp))
+               tcp_ncr_update_rxtthresh(tp);
+
        return ret;
 }
 
@@ -3362,6 +3379,8 @@ tcp_established(struct tcpcb *tp)
 static boolean_t
 tcp_fast_recovery(struct tcpcb *tp, tcp_seq th_ack, const struct tcpopt *to)
 {
+       boolean_t fast_sack_rexmt = TRUE;
+
        tcpstat.tcps_rcvdupack++;
 
        /*
@@ -3388,6 +3407,7 @@ tcp_fast_recovery(struct tcpcb *tp, tcp_seq th_ack, const struct tcpopt *to)
                        tp->snd_cwnd += tp->t_maxseg;
                        tcp_output(tp);
                }
+               return TRUE;
        } else if (SEQ_LT(th_ack, tp->snd_recover)) {
                tp->t_dupacks = 0;
                return FALSE;
@@ -3409,7 +3429,12 @@ tcp_fast_recovery(struct tcpcb *tp, tcp_seq th_ack, const struct tcpopt *to)
                 * retransmit.
                 */
                /* Do nothing; don't change t_dupacks */
-       } else if (++tp->t_dupacks == tp->t_rxtthresh) {
+               return TRUE;
+       } else if (tp->t_dupacks == 0 && TCP_DO_NCR(tp)) {
+               tcp_ncr_update_rxtthresh(tp);
+       }
+
+       if (++tp->t_dupacks == tp->t_rxtthresh) {
                tcp_seq old_snd_nxt;
                u_int win;
 
@@ -3444,26 +3469,40 @@ fastretransmit:
                        tp->snd_nxt = old_snd_nxt;
                KASSERT(tp->snd_limited <= 2, ("tp->snd_limited too big"));
                if (TCP_DO_SACK(tp)) {
-                       tcp_sack_rexmt(tp);
+                       if (fast_sack_rexmt)
+                               tcp_sack_rexmt(tp);
                } else {
                        tp->snd_cwnd += tp->t_maxseg *
                            (tp->t_dupacks - tp->snd_limited);
                }
-       } else if (tcp_do_rfc3517bis && TCP_DO_SACK(tp)) {
-               if (tcp_rfc3517bis_rxt &&
+       } else if ((tcp_do_rfc3517bis && TCP_DO_SACK(tp)) || TCP_DO_NCR(tp)) {
+               /*
+                * The RFC3517bis recommends to reduce the byte threshold,
+                * and enter fast retransmit if IsLost(snd_una).  However,
+                * if we use IsLost(snd_una) based fast retransmit here,
+                * segments reordering will cause spurious retransmit.  So
+                * we defer the IsLost(snd_una) based fast retransmit until
+                * the extended limited transmit can't send any segments and
+                * early retransmit can't be done.
+                */
+               if (tcp_rfc3517bis_rxt && tcp_do_rfc3517bis &&
                    tcp_sack_islost(&tp->scb, tp->snd_una))
                        goto fastretransmit;
-               if (tcp_do_limitedtransmit) {
-                       /* outstanding data */
-                       uint32_t ownd =
-                           tp->snd_max - tp->snd_una;
-
-                       if (!tcp_sack_limitedxmit(tp) &&
-                           need_early_retransmit(tp, ownd)) {
-                               ++tcpstat.tcps_sndearlyrexmit;
-                               tp->rxt_flags |=
-                                   TRXT_F_EARLYREXMT;
-                               goto fastretransmit;
+
+               if (tcp_do_limitedtransmit || TCP_DO_NCR(tp)) {
+                       if (!tcp_sack_limitedxmit(tp)) {
+                               /* outstanding data */
+                               uint32_t ownd = tp->snd_max - tp->snd_una;
+
+                               if (need_early_retransmit(tp, ownd)) {
+                                       ++tcpstat.tcps_sndearlyrexmit;
+                                       tp->rxt_flags |= TRXT_F_EARLYREXMT;
+                                       goto fastretransmit;
+                               } else if (tcp_do_rfc3517bis &&
+                                   tcp_sack_islost(&tp->scb, tp->snd_una)) {
+                                       fast_sack_rexmt = FALSE;
+                                       goto fastretransmit;
+                               }
                        }
                }
        } else if (tcp_do_limitedtransmit) {