tcp: Fix window scaling for accecpted socket
authorSepherosa Ziehau <sephe@dragonflybsd.org>
Fri, 27 Apr 2012 06:49:07 +0000 (14:49 +0800)
committerSepherosa Ziehau <sephe@dragonflybsd.org>
Fri, 27 Apr 2012 07:57:14 +0000 (15:57 +0800)
- Retire tcpcb.requested_s_scale, use tcpcb.snd_scale directly.
- Set tcpcb.snd_wnd in SYN_SENT state only if the TCP flags contains SYN.
- Save other side advertised window into syncache, and setup tcpcb.snd_wnd
  according to the save value after the 3-way hand shake is done.
- Delay tiwin setup in tcp_input(), specificly after tcpcb.snd_scale is
  setup on the SO_ACCEPTCONN path.

This tends to fix the window scaling bug: when the sender accepts
connection and data only follow from sender to receiver.

sys/netinet/tcp_input.c
sys/netinet/tcp_syncache.c
sys/netinet/tcp_var.h

index 3e30024..2bd4c82 100644 (file)
@@ -850,12 +850,6 @@ findpcb:
        if (tp->t_state <= TCPS_CLOSED)
                goto drop;
 
-       /* Unscale the window into a 32-bit value. */
-       if (!(thflags & TH_SYN))
-               tiwin = th->th_win << tp->snd_scale;
-       else
-               tiwin = th->th_win;
-
        so = inp->inp_socket;
 
 #ifdef TCPDEBUG
@@ -939,14 +933,7 @@ findpcb:
                                tp->snd_up = tp->snd_una;
                                tp->snd_max = tp->snd_nxt = tp->iss + 1;
                                tp->last_ack_sent = tp->rcv_nxt;
-/*
- * XXX possible bug - it doesn't appear that tp->snd_wnd is unscaled
- * until the _second_ ACK is received:
- *    rcv SYN (set wscale opts)         --> send SYN/ACK, set snd_wnd = window.
- *    rcv ACK, calculate tiwin --> process SYN_RECEIVED, determine wscale,
- *       move to ESTAB, set snd_wnd to tiwin.
- */
-                               tp->snd_wnd = tiwin;    /* unscaled */
+
                                goto after_listen;
                        }
                        if (thflags & TH_RST) {
@@ -1069,6 +1056,12 @@ after_listen:
        KASSERT(tp->t_state != TCPS_LISTEN, ("tcp_input: TCPS_LISTEN state"));
        KKASSERT(so->so_port == &curthread->td_msgport);
 
+       /* Unscale the window into a 32-bit value. */
+       if (!(thflags & TH_SYN))
+               tiwin = th->th_win << tp->snd_scale;
+       else
+               tiwin = th->th_win;
+
        /*
         * This is the second part of the MSS DoS prevention code (after
         * minmss on the sending side) and it deals with too many too small
@@ -1094,10 +1087,16 @@ after_listen:
         */
        tcp_dooptions(&to, optp, optlen, (thflags & TH_SYN) != 0);
        if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
-               if (to.to_flags & TOF_SCALE) {
+               if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) {
                        tp->t_flags |= TF_RCVD_SCALE;
-                       tp->requested_s_scale = to.to_requested_s_scale;
+                       tp->snd_scale = to.to_requested_s_scale;
                }
+
+               /*
+                * Initial send window; will be updated upon next ACK
+                */
+               tp->snd_wnd = th->th_win;
+
                if (to.to_flags & TOF_TS) {
                        tp->t_flags |= TF_RCVD_TSTMP;
                        tp->ts_recent = to.to_tsval;
@@ -1446,7 +1445,6 @@ after_listen:
                }
                if (!(thflags & TH_SYN))
                        goto drop;
-               tp->snd_wnd = th->th_win;       /* initial send window */
 
                tp->irs = th->th_seq;
                tcp_rcvseqinit(tp);
@@ -1456,10 +1454,8 @@ after_listen:
                        soisconnected(so);
                        /* Do window scaling on this connection? */
                        if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
-                           (TF_RCVD_SCALE | TF_REQ_SCALE)) {
-                               tp->snd_scale = tp->requested_s_scale;
+                           (TF_RCVD_SCALE | TF_REQ_SCALE))
                                tp->rcv_scale = tp->request_r_scale;
-                       }
                        tp->rcv_adv += tp->rcv_wnd;
                        tp->snd_una++;          /* SYN is acked */
                        tcp_callout_stop(tp, tp->tt_rexmt);
@@ -1840,10 +1836,8 @@ after_listen:
                soisconnected(so);
                /* Do window scaling? */
                if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
-                   (TF_RCVD_SCALE | TF_REQ_SCALE)) {
-                       tp->snd_scale = tp->requested_s_scale;
+                   (TF_RCVD_SCALE | TF_REQ_SCALE))
                        tp->rcv_scale = tp->request_r_scale;
-               }
                /*
                 * Make transitions:
                 *      SYN-RECEIVED  -> ESTABLISHED
@@ -2056,10 +2050,8 @@ fastretransmit:
                        tp->snd_una++;
                        /* Do window scaling? */
                        if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
-                           (TF_RCVD_SCALE | TF_REQ_SCALE)) {
-                               tp->snd_scale = tp->requested_s_scale;
+                           (TF_RCVD_SCALE | TF_REQ_SCALE))
                                tp->rcv_scale = tp->request_r_scale;
-                       }
                }
 
 process_ACK:
index 142fbce..5eeaaa6 100644 (file)
@@ -851,6 +851,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
        tp->irs = sc->sc_irs;
        tcp_rcvseqinit(tp);
        tcp_sendseqinit(tp);
+       tp->snd_wnd = sc->sc_sndwnd;
        tp->snd_wl1 = sc->sc_irs;
        tp->rcv_up = sc->sc_irs + 1;
        tp->rcv_wnd = sc->sc_wnd;
@@ -861,7 +862,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
                tp->t_flags |= TF_NOOPT;
        if (sc->sc_flags & SCF_WINSCALE) {
                tp->t_flags |= TF_REQ_SCALE | TF_RCVD_SCALE;
-               tp->requested_s_scale = sc->sc_requested_s_scale;
+               tp->snd_scale = sc->sc_requested_s_scale;
                tp->request_r_scale = sc->sc_request_r_scale;
        }
        if (sc->sc_flags & SCF_TIMESTAMP) {
@@ -1031,6 +1032,9 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
                else
                        sc->sc_flags &= ~SCF_SACK_PERMITTED;
 
+               /* Update initial send window */
+               sc->sc_sndwnd = th->th_win;
+
                /*
                 * PCB may have changed, pick up new values.
                 */
@@ -1119,6 +1123,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
        if (to->to_flags & TOF_SIGNATURE)
                sc->sc_flags = SCF_SIGNATURE;
 #endif /* TCP_SIGNATURE */
+       sc->sc_sndwnd = th->th_win;
 
        if (syncache_respond(sc, m) == 0) {
                syncache_insert(sc, sch);
index d509a92..342f5f3 100644 (file)
@@ -236,7 +236,6 @@ struct tcpcb {
        u_char  snd_scale;              /* window scaling for send window */
        u_char  rcv_scale;              /* window scaling for recv window */
        u_char  request_r_scale;        /* pending window scaling */
-       u_char  requested_s_scale;
        u_long  ts_recent;              /* timestamp echo data */
 
        u_long  ts_recent_age;          /* when last updated */
@@ -483,6 +482,7 @@ struct syncache {
 #define SCF_SIGNATURE          0x40            /* send MD5 digests */
 #define SCF_MARKER             0x80            /* not a real entry */
        int             sc_rxtused;             /* time spent in SYN|ACK rxt */
+       u_long          sc_sndwnd;              /* send window */
        TAILQ_ENTRY(syncache) sc_hash;
        TAILQ_ENTRY(syncache) sc_timerq;
 };