Implement the Eifel Dectection Algorithm for TCP (RFC 3522).
authorJeffrey Hsu <hsu@dragonflybsd.org>
Wed, 13 Aug 2003 18:34:25 +0000 (18:34 +0000)
committerJeffrey Hsu <hsu@dragonflybsd.org>
Wed, 13 Aug 2003 18:34:25 +0000 (18:34 +0000)
sys/netinet/tcp_input.c
sys/netinet/tcp_timer.c
sys/netinet/tcp_var.h

index 908ce7f..6ed1a05 100644 (file)
@@ -32,7 +32,7 @@
  *
  *     @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
  * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.38 2003/05/21 04:46:41 cjc Exp $
- * $DragonFly: src/sys/netinet/tcp_input.c,v 1.7 2003/08/07 21:54:32 dillon Exp $
+ * $DragonFly: src/sys/netinet/tcp_input.c,v 1.8 2003/08/13 18:34:25 hsu Exp $
  */
 
 #include "opt_ipfw.h"          /* for ipfw_fwd         */
@@ -135,6 +135,10 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
     &tcp_do_rfc3390, 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
+static int tcp_do_eifel_detect = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel, CTLFLAG_RW,
+    &tcp_do_eifel_detect, 0, "Eifel detection algorithm (RFC 3522)");
+
 struct inpcbhead tcb;
 #define        tcb6    tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
@@ -358,6 +362,7 @@ tcp_input(m, off0, proto)
        struct rmxp_tao tao_noncached;  /* in case there's no cached entry */
        struct sockaddr_in *next_hop = NULL;
        int rstreason; /* For badport_bandlim accounting purposes */
+       int useTS;                      /* use timestamps in Eifel detection */
        struct ip6_hdr *ip6 = NULL;
 #ifdef INET6
        int isipv6;
@@ -964,8 +969,15 @@ after_listen:
                                /*
                                 * "bad retransmit" recovery
                                 */
-                               if (tp->t_rxtshift == 1 &&
-                                   ticks < tp->t_badrxtwin) {
+                               useTS = tcp_do_eifel_detect &&
+                                       (to.to_flags & TOF_TS) &&
+                                       to.to_tsecr;
+                               if ((useTS &&
+                                    (tp->t_flags & TF_FIRSTACCACK) &&
+                                    (to.to_tsecr < tp->t_rexmtTS)) ||
+                                   (!useTS &&
+                                    (tp->t_rxtshift == 1 &&
+                                     ticks < tp->t_badrxtwin))) {
                                        tp->snd_cwnd = tp->snd_cwnd_prev;
                                        tp->snd_ssthresh =
                                            tp->snd_ssthresh_prev;
@@ -974,7 +986,13 @@ after_listen:
                                            ENTER_FASTRECOVERY(tp);
                                        tp->snd_nxt = tp->snd_max;
                                        tp->t_badrxtwin = 0;
+                                       tp->t_rxtshift = 0;
+                                       if (tp->t_flags & TF_FASTREXMT)
+                                               ++tcpstat.tcps_sndfastrexmitbad;
+                                       else
+                                               ++tcpstat.tcps_sndrtobad;
                                }
+                               tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT);
                                /*
                                 * Recalculate the retransmit timer / rtt.
                                 *
@@ -1689,6 +1707,11 @@ trimthenstep6:
                                                tp->t_dupacks = 0;
                                                break;
                                        }
+                                       if (tcp_do_eifel_detect &&
+                                           (tp->t_flags & TF_RCVD_TSTMP)) {
+                                               tcp_save_congestion_state(tp);
+                                               tp->t_flags |= TF_FASTREXMT;
+                                       }
                                        win = min(tp->snd_wnd, tp->snd_cwnd) /
                                            2 / tp->t_maxseg;
                                        if (win < 2)
@@ -1813,14 +1836,24 @@ process_ACK:
                 * original cwnd and ssthresh, and proceed to transmit where
                 * we left off.
                 */
-               if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
+               useTS = tcp_do_eifel_detect && (to.to_flags & TOF_TS) &&
+                   to.to_tsecr;
+               if ((useTS && (tp->t_flags & TF_FIRSTACCACK) && acked &&
+                    (to.to_tsecr < tp->t_rexmtTS)) ||
+                   (!useTS &&
+                    (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin))) {
                        tp->snd_cwnd = tp->snd_cwnd_prev;
                        tp->snd_ssthresh = tp->snd_ssthresh_prev;
                        tp->snd_recover = tp->snd_recover_prev;
                        if (tp->t_flags & TF_WASFRECOVERY)
                                ENTER_FASTRECOVERY(tp);
                        tp->snd_nxt = tp->snd_max;
-                       tp->t_badrxtwin = 0;    /* XXX probably not required */ 
+                       tp->t_badrxtwin = 0;    /* XXX probably not required */
+                       tp->t_rxtshift = 0;
+                       if (tp->t_flags & TF_FASTREXMT)
+                               ++tcpstat.tcps_sndfastrexmitbad;
+                       else
+                               ++tcpstat.tcps_sndrtobad;
                }
 
                /*
@@ -1864,6 +1897,9 @@ process_ACK:
                if (acked == 0)
                        goto step6;
 
+               /* Stop looking for an acceptable ACK since one was received. */
+               tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT);
+
                /*
                 * When new data is acked, open the congestion window.
                 * If the window gives us less than ssthresh packets
index 49fb407..0292aff 100644 (file)
@@ -32,7 +32,7 @@
  *
  *     @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
  * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $
- * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.3 2003/07/23 06:21:01 dillon Exp $
+ * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.4 2003/08/13 18:34:25 hsu Exp $
  */
 
 #include "opt_compat.h"
@@ -351,6 +351,22 @@ out:
        splx(s);
 }
 
+void
+tcp_save_congestion_state(struct tcpcb *tp)
+{
+       tp->snd_cwnd_prev = tp->snd_cwnd;
+       tp->snd_ssthresh_prev = tp->snd_ssthresh;
+       tp->snd_recover_prev = tp->snd_recover;
+       if (IN_FASTRECOVERY(tp))
+         tp->t_flags |= TF_WASFRECOVERY;
+       else
+         tp->t_flags &= ~TF_WASFRECOVERY;
+       if (tp->t_flags & TF_RCVD_TSTMP) {
+               tp->t_rexmtTS = ticks;
+               tp->t_flags |= TF_FIRSTACCACK;
+       }
+}
+
 void
 tcp_timer_rexmt(xtp)
        void *xtp;
@@ -391,14 +407,9 @@ tcp_timer_rexmt(xtp)
                 * "On Estimating End-to-End Network Path Properties" by
                 * Allman and Paxson for more details.
                 */
-               tp->snd_cwnd_prev = tp->snd_cwnd;
-               tp->snd_ssthresh_prev = tp->snd_ssthresh;
-               tp->snd_recover_prev = tp->snd_recover;
-               if (IN_FASTRECOVERY(tp))
-                 tp->t_flags |= TF_WASFRECOVERY;
-               else
-                 tp->t_flags &= ~TF_WASFRECOVERY;
                tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+               tcp_save_congestion_state(tp);
+               tp->t_flags &= ~TF_FASTREXMT;
        }
        tcpstat.tcps_rexmttimeo++;
        if (tp->t_state == TCPS_SYN_SENT)
index ba24795..3e451dd 100644 (file)
@@ -32,7 +32,7 @@
  *
  *     @(#)tcp_var.h   8.4 (Berkeley) 5/24/95
  * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.13 2003/02/03 02:34:07 hsu Exp $
- * $DragonFly: src/sys/netinet/tcp_var.h,v 1.4 2003/07/24 01:31:07 dillon Exp $
+ * $DragonFly: src/sys/netinet/tcp_var.h,v 1.5 2003/08/13 18:34:25 hsu Exp $
  */
 
 #ifndef _NETINET_TCP_VAR_H_
@@ -105,6 +105,8 @@ struct tcpcb {
 #define        TF_RXWIN0SENT   0x080000        /* sent a receiver win 0 in response */
 #define        TF_FASTRECOVERY 0x100000        /* in NewReno Fast Recovery */
 #define        TF_WASFRECOVERY 0x200000        /* was in NewReno Fast Recovery */
+#define        TF_FIRSTACCACK  0x400000        /* Look for 1st acceptable ACK. */
+#define        TF_FASTREXMT    0x800000        /* Did Fast Retransmit. */
        int     t_force;                /* 1 if forcing out a byte */
 
        tcp_seq snd_una;                /* send unacknowledged */
@@ -178,6 +180,7 @@ struct tcpcb {
        u_long  snd_ssthresh_prev;      /* ssthresh prior to retransmit */
        tcp_seq snd_recover_prev;       /* snd_recover prior to retransmit */
        u_long  t_badrxtwin;            /* window for retransmit recovery */
+       u_long  t_rexmtTS;              /* timestamp of last retransmit */
        u_char  snd_limited;            /* segments limited transmitted */
 };
 
@@ -324,6 +327,8 @@ struct      tcpstat {
        u_long  tcps_sndbyte;           /* data bytes sent */
        u_long  tcps_sndrexmitpack;     /* data packets retransmitted */
        u_long  tcps_sndrexmitbyte;     /* data bytes retransmitted */
+       u_long  tcps_sndrtobad;         /* supurious RTO retransmissions */
+       u_long  tcps_sndfastrexmitbad;  /* supurious Fast Retransmissions */
        u_long  tcps_sndacks;           /* ack-only packets sent */
        u_long  tcps_sndprobe;          /* window probes sent */
        u_long  tcps_sndurg;            /* packets sent with URG only */
@@ -475,6 +480,7 @@ void         tcp_respond __P((struct tcpcb *, void *,
            struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int));
 struct rtentry *
         tcp_rtlookup __P((struct in_conninfo *));
+void    tcp_save_congestion_state(struct tcpcb *tp);
 void    tcp_setpersist __P((struct tcpcb *));
 void    tcp_slowtimo __P((void));
 struct tcptemp *