From efd4b327051d59abff64e02833b31a60945fbc17 Mon Sep 17 00:00:00 2001 From: Jeffrey Hsu Date: Wed, 13 Aug 2003 18:34:25 +0000 Subject: [PATCH] Implement the Eifel Dectection Algorithm for TCP (RFC 3522). --- sys/netinet/tcp_input.c | 46 ++++++++++++++++++++++++++++++++++++----- sys/netinet/tcp_timer.c | 27 +++++++++++++++++------- sys/netinet/tcp_var.h | 8 ++++++- 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 908ce7f9e5..6ed1a0501a 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -32,7 +32,7 @@ * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.38 2003/05/21 04:46:41 cjc Exp $ - * $DragonFly: src/sys/netinet/tcp_input.c,v 1.7 2003/08/07 21:54:32 dillon Exp $ + * $DragonFly: src/sys/netinet/tcp_input.c,v 1.8 2003/08/13 18:34:25 hsu Exp $ */ #include "opt_ipfw.h" /* for ipfw_fwd */ @@ -135,6 +135,10 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW, &tcp_do_rfc3390, 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +static int tcp_do_eifel_detect = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel, CTLFLAG_RW, + &tcp_do_eifel_detect, 0, "Eifel detection algorithm (RFC 3522)"); + struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; @@ -358,6 +362,7 @@ tcp_input(m, off0, proto) struct rmxp_tao tao_noncached; /* in case there's no cached entry */ struct sockaddr_in *next_hop = NULL; int rstreason; /* For badport_bandlim accounting purposes */ + int useTS; /* use timestamps in Eifel detection */ struct ip6_hdr *ip6 = NULL; #ifdef INET6 int isipv6; @@ -964,8 +969,15 @@ after_listen: /* * "bad retransmit" recovery */ - if (tp->t_rxtshift == 1 && - ticks < tp->t_badrxtwin) { + useTS = tcp_do_eifel_detect && + (to.to_flags & TOF_TS) && + to.to_tsecr; + if ((useTS && + (tp->t_flags & TF_FIRSTACCACK) && + (to.to_tsecr < tp->t_rexmtTS)) || + (!useTS && + (tp->t_rxtshift == 1 && + ticks < tp->t_badrxtwin))) { tp->snd_cwnd = tp->snd_cwnd_prev; tp->snd_ssthresh = tp->snd_ssthresh_prev; @@ -974,7 +986,13 @@ after_listen: ENTER_FASTRECOVERY(tp); tp->snd_nxt = tp->snd_max; tp->t_badrxtwin = 0; + tp->t_rxtshift = 0; + if (tp->t_flags & TF_FASTREXMT) + ++tcpstat.tcps_sndfastrexmitbad; + else + ++tcpstat.tcps_sndrtobad; } + tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT); /* * Recalculate the retransmit timer / rtt. * @@ -1689,6 +1707,11 @@ trimthenstep6: tp->t_dupacks = 0; break; } + if (tcp_do_eifel_detect && + (tp->t_flags & TF_RCVD_TSTMP)) { + tcp_save_congestion_state(tp); + tp->t_flags |= TF_FASTREXMT; + } win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) @@ -1813,14 +1836,24 @@ process_ACK: * original cwnd and ssthresh, and proceed to transmit where * we left off. */ - if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) { + useTS = tcp_do_eifel_detect && (to.to_flags & TOF_TS) && + to.to_tsecr; + if ((useTS && (tp->t_flags & TF_FIRSTACCACK) && acked && + (to.to_tsecr < tp->t_rexmtTS)) || + (!useTS && + (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin))) { tp->snd_cwnd = tp->snd_cwnd_prev; tp->snd_ssthresh = tp->snd_ssthresh_prev; tp->snd_recover = tp->snd_recover_prev; if (tp->t_flags & TF_WASFRECOVERY) ENTER_FASTRECOVERY(tp); tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; /* XXX probably not required */ + tp->t_badrxtwin = 0; /* XXX probably not required */ + tp->t_rxtshift = 0; + if (tp->t_flags & TF_FASTREXMT) + ++tcpstat.tcps_sndfastrexmitbad; + else + ++tcpstat.tcps_sndrtobad; } /* @@ -1864,6 +1897,9 @@ process_ACK: if (acked == 0) goto step6; + /* Stop looking for an acceptable ACK since one was received. */ + tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT); + /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 49fb407a10..0292aff1a9 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -32,7 +32,7 @@ * * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $ - * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.3 2003/07/23 06:21:01 dillon Exp $ + * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.4 2003/08/13 18:34:25 hsu Exp $ */ #include "opt_compat.h" @@ -351,6 +351,22 @@ out: splx(s); } +void +tcp_save_congestion_state(struct tcpcb *tp) +{ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->snd_recover_prev = tp->snd_recover; + if (IN_FASTRECOVERY(tp)) + tp->t_flags |= TF_WASFRECOVERY; + else + tp->t_flags &= ~TF_WASFRECOVERY; + if (tp->t_flags & TF_RCVD_TSTMP) { + tp->t_rexmtTS = ticks; + tp->t_flags |= TF_FIRSTACCACK; + } +} + void tcp_timer_rexmt(xtp) void *xtp; @@ -391,14 +407,9 @@ tcp_timer_rexmt(xtp) * "On Estimating End-to-End Network Path Properties" by * Allman and Paxson for more details. */ - tp->snd_cwnd_prev = tp->snd_cwnd; - tp->snd_ssthresh_prev = tp->snd_ssthresh; - tp->snd_recover_prev = tp->snd_recover; - if (IN_FASTRECOVERY(tp)) - tp->t_flags |= TF_WASFRECOVERY; - else - tp->t_flags &= ~TF_WASFRECOVERY; tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + tcp_save_congestion_state(tp); + tp->t_flags &= ~TF_FASTREXMT; } tcpstat.tcps_rexmttimeo++; if (tp->t_state == TCPS_SYN_SENT) diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index ba24795fd9..3e451dda65 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -32,7 +32,7 @@ * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.13 2003/02/03 02:34:07 hsu Exp $ - * $DragonFly: src/sys/netinet/tcp_var.h,v 1.4 2003/07/24 01:31:07 dillon Exp $ + * $DragonFly: src/sys/netinet/tcp_var.h,v 1.5 2003/08/13 18:34:25 hsu Exp $ */ #ifndef _NETINET_TCP_VAR_H_ @@ -105,6 +105,8 @@ struct tcpcb { #define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ #define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ #define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ +#define TF_FIRSTACCACK 0x400000 /* Look for 1st acceptable ACK. */ +#define TF_FASTREXMT 0x800000 /* Did Fast Retransmit. */ int t_force; /* 1 if forcing out a byte */ tcp_seq snd_una; /* send unacknowledged */ @@ -178,6 +180,7 @@ struct tcpcb { u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ u_long t_badrxtwin; /* window for retransmit recovery */ + u_long t_rexmtTS; /* timestamp of last retransmit */ u_char snd_limited; /* segments limited transmitted */ }; @@ -324,6 +327,8 @@ struct tcpstat { u_long tcps_sndbyte; /* data bytes sent */ u_long tcps_sndrexmitpack; /* data packets retransmitted */ u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ + u_long tcps_sndrtobad; /* supurious RTO retransmissions */ + u_long tcps_sndfastrexmitbad; /* supurious Fast Retransmissions */ u_long tcps_sndacks; /* ack-only packets sent */ u_long tcps_sndprobe; /* window probes sent */ u_long tcps_sndurg; /* packets sent with URG only */ @@ -475,6 +480,7 @@ void tcp_respond __P((struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int)); struct rtentry * tcp_rtlookup __P((struct in_conninfo *)); +void tcp_save_congestion_state(struct tcpcb *tp); void tcp_setpersist __P((struct tcpcb *)); void tcp_slowtimo __P((void)); struct tcptemp * -- 2.41.0