kernel - MPSAFE the protocol drain routines
[dragonfly.git] / sys / netinet / tcp_input.c
index 84b821e..3069521 100644 (file)
@@ -69,6 +69,7 @@
  */
 
 #include "opt_ipfw.h"          /* for ipfw_fwd         */
+#include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
@@ -135,7 +136,6 @@ struct tcphdr tcp_savetcp;
 
 MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
 
-tcp_cc tcp_ccgen;
 static int log_in_vain = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
     &log_in_vain, 0, "Log all incoming TCP connections");
@@ -221,7 +221,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
     &tcp_autorcvbuf_inc, 0,
     "Incrementor step size of automatic receive buffer");
 
-int tcp_autorcvbuf_max = 16*1024*1024;
+int tcp_autorcvbuf_max = 2*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
     &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
 
@@ -308,7 +308,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
                tp->reportblk.rblk_start = tp->reportblk.rblk_end;
                return (0);
        }
-       tcp_reass_qsize++;
+       atomic_add_int(&tcp_reass_qsize, 1);
 
        /*
         * Find a segment which begins after this one does.
@@ -341,7 +341,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
                                tcpstat.tcps_rcvdupbyte += *tlenp;
                                m_freem(m);
                                kfree(te, M_TSEGQ);
-                               tcp_reass_qsize--;
+                               atomic_add_int(&tcp_reass_qsize, -1);
                                /*
                                 * Try to present any queued data
                                 * at the left window edge to the user.
@@ -396,7 +396,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
                LIST_REMOVE(q, tqe_q);
                m_freem(q->tqe_m);
                kfree(q, M_TSEGQ);
-               tcp_reass_qsize--;
+               atomic_add_int(&tcp_reass_qsize, -1);
                q = nq;
        }
 
@@ -422,7 +422,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
                        tp->reportblk.rblk_end = tend;
                LIST_REMOVE(q, tqe_q);
                kfree(q, M_TSEGQ);
-               tcp_reass_qsize--;
+               atomic_add_int(&tcp_reass_qsize, -1);
        }
 
        if (p == NULL) {
@@ -440,9 +440,10 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
                        if (!(tp->t_flags & TF_DUPSEG))
                                tp->reportblk.rblk_start = p->tqe_th->th_seq;
                        kfree(te, M_TSEGQ);
-                       tcp_reass_qsize--;
-               } else
+                       atomic_add_int(&tcp_reass_qsize, -1);
+               } else {
                        LIST_INSERT_AFTER(p, te, tqe_q);
+               }
        }
 
 present:
@@ -472,7 +473,7 @@ present:
        else
                ssb_appendstream(&so->so_rcv, q->tqe_m);
        kfree(q, M_TSEGQ);
-       tcp_reass_qsize--;
+       atomic_add_int(&tcp_reass_qsize, -1);
        ND6_HINT(tp);
        sorwakeup(so);
        return (flags);
@@ -521,7 +522,8 @@ tcp_input(struct mbuf *m, ...)
        struct inpcb *inp = NULL;
        u_char *optp = NULL;
        int optlen = 0;
-       int len, tlen, off;
+       int tlen, off;
+       int len = 0;
        int drop_hdrlen;
        struct tcpcb *tp = NULL;
        int thflags;
@@ -531,8 +533,6 @@ tcp_input(struct mbuf *m, ...)
        u_long tiwin;
        int recvwin;
        struct tcpopt to;               /* options in this segment */
-       struct rmxp_tao *taop;          /* pointer to our TAO cache entry */
-       struct rmxp_tao tao_noncached;  /* in case there's no cached entry */
        struct sockaddr_in *next_hop = NULL;
        int rstreason; /* For badport_bandlim accounting purposes */
        int cpu;
@@ -895,13 +895,21 @@ findpcb:
                                        rstreason = BANDLIM_RST_OPENPORT;
                                        goto dropwithreset;
                                }
+
+                               /*
+                                * Could not complete 3-way handshake,
+                                * connection is being closed down, and
+                                * syncache will free mbuf.
+                                */
                                if (so == NULL)
-                                       /*
-                                        * Could not complete 3-way handshake,
-                                        * connection is being closed down, and
-                                        * syncache will free mbuf.
-                                        */
                                        return;
+
+                               /*
+                                * We must be in the correct protocol thread
+                                * for this connection.
+                                */
+                               KKASSERT(so->so_port == &curthread->td_msgport);
+
                                /*
                                 * Socket is created in state SYN_RECEIVED.
                                 * Continue processing segment.
@@ -1025,15 +1033,20 @@ findpcb:
                        tcp_dooptions(&to, optp, optlen, TRUE);
                        if (!syncache_add(&inc, &to, th, &so, m))
                                goto drop;
+
+                       /*
+                        * Entry added to syncache, mbuf used to
+                        * send SYN,ACK packet.
+                        */
                        if (so == NULL)
-                               /*
-                                * Entry added to syncache, mbuf used to
-                                * send SYN,ACK packet.
-                                */
                                return;
+
                        /*
-                        * Segment passed TAO tests.
+                        * We must be in the correct protocol thread for
+                        * this connection.
                         */
+                       KKASSERT(so->so_port == &curthread->td_msgport);
+
                        inp = so->so_pcb;
                        tp = intotcpcb(inp);
                        tp->snd_wnd = tiwin;
@@ -1065,10 +1078,16 @@ findpcb:
                }
                goto drop;
        }
-after_listen:
 
-       /* should not happen - syncache should pick up these connections */
+after_listen:
+       /*
+        * Should not happen - syncache should pick up these connections.
+        *
+        * Once we are past handling listen sockets we must be in the
+        * correct protocol processing thread.
+        */
        KASSERT(tp->t_state != TCPS_LISTEN, ("tcp_input: TCPS_LISTEN state"));
+       KKASSERT(so->so_port == &curthread->td_msgport);
 
        /*
         * This is the second part of the MSS DoS prevention code (after
@@ -1121,8 +1140,6 @@ after_listen:
                        tp->ts_recent = to.to_tsval;
                        tp->ts_recent_age = ticks;
                }
-               if (to.to_flags & (TOF_CC | TOF_CCNEW))
-                       tp->t_flags |= TF_RCVD_CC;
                if (to.to_flags & TOF_MSS)
                        tcp_mss(tp, to.to_mss);
                /*
@@ -1156,13 +1173,6 @@ after_listen:
            !(tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN)) &&
            (!(to.to_flags & TOF_TS) ||
             TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
-           /*
-            * Using the CC option is compulsory if once started:
-            *   the segment is OK if no T/TCP was negotiated or
-            *   if the segment has a CC option equal to CCrecv
-            */
-           ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
-            ((to.to_flags & TOF_CC) && to.to_cc == tp->cc_recv)) &&
            th->th_seq == tp->rcv_nxt &&
            tp->snd_nxt == tp->snd_max) {
 
@@ -1275,7 +1285,7 @@ after_listen:
                    th->th_ack == tp->snd_una &&
                    LIST_EMPTY(&tp->t_segq) &&
                    tlen <= ssb_space(&so->so_rcv)) {
-                       int newsize = 0;        /* automatic sockbuf scaling */
+                       u_long newsize = 0;     /* automatic sockbuf scaling */
                        /*
                         * This is a pure, in-sequence data packet
                         * with nothing on the reassembly queue and
@@ -1329,9 +1339,9 @@ after_listen:
                                            so->so_rcv.ssb_hiwat <
                                            tcp_autorcvbuf_max) {
                                                newsize =
-                                                   min(so->so_rcv.ssb_hiwat +
-                                                   tcp_autorcvbuf_inc,
-                                                   tcp_autorcvbuf_max);
+                                                   ulmin(so->so_rcv.ssb_hiwat +
+                                                         tcp_autorcvbuf_inc,
+                                                         tcp_autorcvbuf_max);
                                        }
                                        /* Start over with next RTT. */
                                        tp->rfbuf_ts = 0;
@@ -1346,13 +1356,25 @@ after_listen:
                                m_freem(m);
                        } else {
                                /*
-                                * Set new socket buffer size.
-                                * Give up when limit is reached.
+                                * Set new socket buffer size, give up when
+                                * limit is reached.
+                                *
+                                * Adjusting the size can mess up ACK
+                                * sequencing when pure window updates are
+                                * being avoided (which is the default),
+                                * so force an ack.
                                 */
-                               if (newsize)
+                               if (newsize) {
+                                       tp->t_flags |= TF_RXRESIZED;
                                        if (!ssb_reserve(&so->so_rcv, newsize,
-                                           so, NULL))
-                                               so->so_rcv.ssb_flags &= ~SSB_AUTOSIZE;
+                                                        so, NULL)) {
+                                               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE);
+                                       }
+                                       if (newsize >=
+                                           (TCP_MAXWIN << tp->rcv_scale)) {
+                                               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE);
+                                       }
+                               }
                                m_adj(m, drop_hdrlen); /* delayed header drop */
                                ssb_appendstream(&so->so_rcv, m);
                        }
@@ -1442,28 +1464,11 @@ after_listen:
         *      continue processing rest of data/controls, beginning with URG
         */
        case TCPS_SYN_SENT:
-               if ((taop = tcp_gettaocache(&inp->inp_inc)) == NULL) {
-                       taop = &tao_noncached;
-                       bzero(taop, sizeof *taop);
-               }
-
                if ((thflags & TH_ACK) &&
                    (SEQ_LEQ(th->th_ack, tp->iss) ||
                     SEQ_GT(th->th_ack, tp->snd_max))) {
-                       /*
-                        * If we have a cached CCsent for the remote host,
-                        * hence we haven't just crashed and restarted,
-                        * do not send a RST.  This may be a retransmission
-                        * from the other side after our earlier ACK was lost.
-                        * Our new SYN, when it arrives, will serve as the
-                        * needed ACK.
-                        */
-                       if (taop->tao_ccsent != 0)
-                               goto drop;
-                       else {
-                               rstreason = BANDLIM_UNLIMITED;
-                               goto dropwithreset;
-                       }
+                       rstreason = BANDLIM_UNLIMITED;
+                       goto dropwithreset;
                }
                if (thflags & TH_RST) {
                        if (thflags & TH_ACK)
@@ -1473,30 +1478,11 @@ after_listen:
                if (!(thflags & TH_SYN))
                        goto drop;
                tp->snd_wnd = th->th_win;       /* initial send window */
-               tp->cc_recv = to.to_cc;         /* foreign CC */
 
                tp->irs = th->th_seq;
                tcp_rcvseqinit(tp);
                if (thflags & TH_ACK) {
-                       /*
-                        * Our SYN was acked.  If segment contains CC.ECHO
-                        * option, check it to make sure this segment really
-                        * matches our SYN.  If not, just drop it as old
-                        * duplicate, but send an RST if we're still playing
-                        * by the old rules.  If no CC.ECHO option, make sure
-                        * we don't get fooled into using T/TCP.
-                        */
-                       if (to.to_flags & TOF_CCECHO) {
-                               if (tp->cc_send != to.to_ccecho) {
-                                       if (taop->tao_ccsent != 0)
-                                               goto drop;
-                                       else {
-                                               rstreason = BANDLIM_UNLIMITED;
-                                               goto dropwithreset;
-                                       }
-                               }
-                       } else
-                               tp->t_flags &= ~TF_RCVD_CC;
+                       /* Our SYN was acked. */
                        tcpstat.tcps_connects++;
                        soisconnected(so);
                        /* Do window scaling on this connection? */
@@ -1505,10 +1491,6 @@ after_listen:
                                tp->snd_scale = tp->requested_s_scale;
                                tp->rcv_scale = tp->request_r_scale;
                        }
-                       /* Segment is acceptable, update cache if undefined. */
-                       if (taop->tao_ccsent == 0)
-                               taop->tao_ccsent = to.to_ccecho;
-
                        tp->rcv_adv += tp->rcv_wnd;
                        tp->snd_una++;          /* SYN is acked */
                        tcp_callout_stop(tp, tp->tt_rexmt);
@@ -1541,43 +1523,14 @@ after_listen:
                } else {
                        /*
                         * Received initial SYN in SYN-SENT[*] state =>
-                        * simultaneous open.  If segment contains CC option
-                        * and there is a cached CC, apply TAO test.
-                        * If it succeeds, connection is * half-synchronized.
-                        * Otherwise, do 3-way handshake:
+                        * simultaneous open.
+                        * Do 3-way handshake:
                         *        SYN-SENT -> SYN-RECEIVED
                         *        SYN-SENT* -> SYN-RECEIVED*
-                        * If there was no CC option, clear cached CC value.
                         */
                        tp->t_flags |= TF_ACKNOW;
                        tcp_callout_stop(tp, tp->tt_rexmt);
-                       if (to.to_flags & TOF_CC) {
-                               if (taop->tao_cc != 0 &&
-                                   CC_GT(to.to_cc, taop->tao_cc)) {
-                                       /*
-                                        * update cache and make transition:
-                                        *        SYN-SENT -> ESTABLISHED*
-                                        *        SYN-SENT* -> FIN-WAIT-1*
-                                        */
-                                       taop->tao_cc = to.to_cc;
-                                       tp->t_starttime = ticks;
-                                       if (tp->t_flags & TF_NEEDFIN) {
-                                               tp->t_state = TCPS_FIN_WAIT_1;
-                                               tp->t_flags &= ~TF_NEEDFIN;
-                                       } else {
-                                               tp->t_state = TCPS_ESTABLISHED;
-                                               tcp_callout_reset(tp,
-                                                   tp->tt_keep, tcp_keepidle,
-                                                   tcp_timer_keep);
-                                       }
-                                       tp->t_flags |= TF_NEEDSYN;
-                               } else
-                                       tp->t_state = TCPS_SYN_RECEIVED;
-                       } else {
-                               /* CC.NEW or no option => invalidate cache */
-                               taop->tao_cc = 0;
-                               tp->t_state = TCPS_SYN_RECEIVED;
-                       }
+                       tp->t_state = TCPS_SYN_RECEIVED;
                }
 
 trimthenstep6:
@@ -1611,35 +1564,11 @@ trimthenstep6:
 
        /*
         * If the state is LAST_ACK or CLOSING or TIME_WAIT:
-        *      if segment contains a SYN and CC [not CC.NEW] option:
-        *              if state == TIME_WAIT and connection duration > MSL,
-        *                  drop packet and send RST;
-        *
-        *              if SEG.CC > CCrecv then is new SYN, and can implicitly
-        *                  ack the FIN (and data) in retransmission queue.
-        *                  Complete close and delete TCPCB.  Then reprocess
-        *                  segment, hoping to find new TCPCB in LISTEN state;
-        *
-        *              else must be old SYN; drop it.
-        *      else do normal processing.
+        *      do normal processing (we no longer bother with T/TCP).
         */
        case TCPS_LAST_ACK:
        case TCPS_CLOSING:
        case TCPS_TIME_WAIT:
-               if ((thflags & TH_SYN) &&
-                   (to.to_flags & TOF_CC) && tp->cc_recv != 0) {
-                       if (tp->t_state == TCPS_TIME_WAIT &&
-                                       (ticks - tp->t_starttime) > tcp_msl) {
-                               rstreason = BANDLIM_UNLIMITED;
-                               goto dropwithreset;
-                       }
-                       if (CC_GT(to.to_cc, tp->cc_recv)) {
-                               tp = tcp_close(tp);
-                               goto findpcb;
-                       }
-                       else
-                               goto drop;
-               }
                break;  /* continue normal processing */
        }
 
@@ -1762,16 +1691,6 @@ trimthenstep6:
                }
        }
 
-       /*
-        * T/TCP mechanism
-        *   If T/TCP was negotiated and the segment doesn't have CC,
-        *   or if its CC is wrong then drop the segment.
-        *   RST segments do not have to comply with this.
-        */
-       if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
-           (!(to.to_flags & TOF_CC) || tp->cc_recv != to.to_cc))
-               goto dropafterack;
-
        /*
         * In the SYN-RECEIVED state, validate that the packet belongs to
         * this connection before trimming the data to fit the receive
@@ -1960,15 +1879,6 @@ trimthenstep6:
                        tp->snd_scale = tp->requested_s_scale;
                        tp->rcv_scale = tp->request_r_scale;
                }
-               /*
-                * Upon successful completion of 3-way handshake,
-                * update cache.CC if it was undefined, pass any queued
-                * data to the user, and advance state appropriately.
-                */
-               if ((taop = tcp_gettaocache(&inp->inp_inc)) != NULL &&
-                   taop->tao_cc == 0)
-                       taop->tao_cc = tp->cc_recv;
-
                /*
                 * Make transitions:
                 *      SYN-RECEIVED  -> ESTABLISHED
@@ -2400,16 +2310,8 @@ process_ACK:
                        if (ourfinisacked) {
                                tp->t_state = TCPS_TIME_WAIT;
                                tcp_canceltimers(tp);
-                               /* Shorten TIME_WAIT [RFC-1644, p.28] */
-                               if (tp->cc_recv != 0 &&
-                                   (ticks - tp->t_starttime) < tcp_msl) {
-                                       tcp_callout_reset(tp, tp->tt_2msl,
-                                           tp->t_rxtcur * TCPTV_TWTRUNC,
-                                           tcp_timer_2msl);
-                               } else {
-                                       tcp_callout_reset(tp, tp->tt_2msl,
+                               tcp_callout_reset(tp, tp->tt_2msl,
                                            2 * tcp_msl, tcp_timer_2msl);
-                               }
                                soisdisconnected(so);
                        }
                        break;
@@ -2633,18 +2535,8 @@ dodata:                                                  /* XXX */
                case TCPS_FIN_WAIT_2:
                        tp->t_state = TCPS_TIME_WAIT;
                        tcp_canceltimers(tp);
-                       /* Shorten TIME_WAIT [RFC-1644, p.28] */
-                       if (tp->cc_recv != 0 &&
-                           (ticks - tp->t_starttime) < tcp_msl) {
-                               tcp_callout_reset(tp, tp->tt_2msl,
-                                   tp->t_rxtcur * TCPTV_TWTRUNC,
-                                   tcp_timer_2msl);
-                               /* For transaction client, force ACK now. */
-                               tp->t_flags |= TF_ACKNOW;
-                       } else {
-                               tcp_callout_reset(tp, tp->tt_2msl, 2 * tcp_msl,
+                       tcp_callout_reset(tp, tp->tt_2msl, 2 * tcp_msl,
                                    tcp_timer_2msl);
-                       }
                        soisdisconnected(so);
                        break;
 
@@ -2814,31 +2706,6 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, boolean_t is_syn)
                        if (to->to_tsecr != 0 && TSTMP_GT(to->to_tsecr, ticks))
                                to->to_tsecr = 0;
                        break;
-               case TCPOPT_CC:
-                       if (optlen != TCPOLEN_CC)
-                               continue;
-                       to->to_flags |= TOF_CC;
-                       bcopy(cp + 2, &to->to_cc, sizeof to->to_cc);
-                       to->to_cc = ntohl(to->to_cc);
-                       break;
-               case TCPOPT_CCNEW:
-                       if (optlen != TCPOLEN_CC)
-                               continue;
-                       if (!is_syn)
-                               continue;
-                       to->to_flags |= TOF_CCNEW;
-                       bcopy(cp + 2, &to->to_cc, sizeof to->to_cc);
-                       to->to_cc = ntohl(to->to_cc);
-                       break;
-               case TCPOPT_CCECHO:
-                       if (optlen != TCPOLEN_CC)
-                               continue;
-                       if (!is_syn)
-                               continue;
-                       to->to_flags |= TOF_CCECHO;
-                       bcopy(cp + 2, &to->to_ccecho, sizeof to->to_ccecho);
-                       to->to_ccecho = ntohl(to->to_ccecho);
-                       break;
                case TCPOPT_SACK_PERMITTED:
                        if (optlen != TCPOLEN_SACK_PERMITTED)
                                continue;
@@ -2859,6 +2726,19 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, boolean_t is_syn)
                                r->rblk_end = ntohl(r->rblk_end);
                        }
                        break;
+#ifdef TCP_SIGNATURE
+               /*
+                * XXX In order to reply to a host which has set the
+                * TCP_SIGNATURE option in its initial SYN, we have to
+                * record the fact that the option was observed here
+                * for the syncache code to perform the correct response.
+                */
+               case TCPOPT_SIGNATURE:
+                       if (optlen != TCPOLEN_SIGNATURE)
+                               continue;
+                       to->to_flags |= (TOF_SIGNATURE | TOF_SIGLEN);
+                       break;
+#endif /* TCP_SIGNATURE */
                default:
                        continue;
                }
@@ -3000,10 +2880,6 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
  *
  * NOTE that this routine is only called when we process an incoming
  * segment, for outgoing segments only tcp_mssopt is called.
- *
- * In case of T/TCP, we call this routine during implicit connection
- * setup as well (offer = -1), to initialize maxseg from the cached
- * MSS of our peer.
  */
 void
 tcp_mss(struct tcpcb *tp, int offer)
@@ -3014,8 +2890,6 @@ tcp_mss(struct tcpcb *tp, int offer)
        u_long bufsize;
        struct inpcb *inp = tp->t_inpcb;
        struct socket *so;
-       struct rmxp_tao *taop;
-       int origoffer = offer;
 #ifdef INET6
        boolean_t isipv6 = ((inp->inp_vflag & INP_IPV6) ? TRUE : FALSE);
        size_t min_protoh = isipv6 ?
@@ -3038,34 +2912,41 @@ tcp_mss(struct tcpcb *tp, int offer)
        ifp = rt->rt_ifp;
        so = inp->inp_socket;
 
-       taop = rmx_taop(rt->rt_rmx);
-       /*
-        * Offer == -1 means that we didn't receive SYN yet,
-        * use cached value in that case;
-        */
-       if (offer == -1)
-               offer = taop->tao_mssopt;
        /*
         * Offer == 0 means that there was no MSS on the SYN segment,
-        * in this case we use tcp_mssdflt.
+        * in this case we use either the interface mtu or tcp_mssdflt.
+        *
+        * An offer which is too large will be cut down later.
         */
        if (offer == 0) {
-               offer = (isipv6 ? tcp_v6mssdflt : tcp_mssdflt);
-       } else {
-               /*
-                * Prevent DoS attack with too small MSS. Round up
-                * to at least minmss.
-                */
-               offer = max(offer, tcp_minmss);
-               /*
-                * Sanity check: make sure that maxopd will be large
-                * enough to allow some data on segments even is the
-                * all the option space is used (40bytes).  Otherwise
-                * funny things may happen in tcp_output.
-                */
-               offer = max(offer, 64);
+               if (isipv6) {
+                       if (in6_localaddr(&inp->in6p_faddr)) {
+                               offer = ND_IFINFO(rt->rt_ifp)->linkmtu -
+                                       min_protoh;
+                       } else {
+                               offer = tcp_v6mssdflt;
+                       }
+               } else {
+                       if (in_localaddr(inp->inp_faddr))
+                               offer = ifp->if_mtu - min_protoh;
+                       else
+                               offer = tcp_mssdflt;
+               }
        }
-       taop->tao_mssopt = offer;
+
+       /*
+        * Prevent DoS attack with too small MSS. Round up
+        * to at least minmss.
+        *
+        * Sanity check: make sure that maxopd will be large
+        * enough to allow some data on segments even is the
+        * all the option space is used (40bytes).  Otherwise
+        * funny things may happen in tcp_output.
+        */
+       offer = max(offer, tcp_minmss);
+       offer = max(offer, 64);
+
+       rt->rt_rmx.rmx_mssopt = offer;
 
        /*
         * While we're here, check if there's an initial rtt
@@ -3095,24 +2976,22 @@ tcp_mss(struct tcpcb *tp, int offer)
                              ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
                              tp->t_rttmin, TCPTV_REXMTMAX);
        }
+
        /*
         * if there's an mtu associated with the route, use it
-        * else, use the link mtu.
+        * else, use the link mtu.  Take the smaller of mss or offer
+        * as our final mss.
         */
-       if (rt->rt_rmx.rmx_mtu)
+       if (rt->rt_rmx.rmx_mtu) {
                mss = rt->rt_rmx.rmx_mtu - min_protoh;
-       else {
-               if (isipv6) {
+       else {
+               if (isipv6)
                        mss = ND_IFINFO(rt->rt_ifp)->linkmtu - min_protoh;
-                       if (!in6_localaddr(&inp->in6p_faddr))
-                               mss = min(mss, tcp_v6mssdflt);
-               } else {
+               else
                        mss = ifp->if_mtu - min_protoh;
-                       if (!in_localaddr(inp->inp_faddr))
-                               mss = min(mss, tcp_mssdflt);
-               }
        }
        mss = min(mss, offer);
+
        /*
         * maxopd stores the maximum length of data AND options
         * in a segment; maxseg is the amount of data in a normal
@@ -3122,19 +3001,9 @@ tcp_mss(struct tcpcb *tp, int offer)
         */
        tp->t_maxopd = mss;
 
-       /*
-        * In case of T/TCP, origoffer==-1 indicates, that no segments
-        * were received yet.  In this case we just guess, otherwise
-        * we do the same as before T/TCP.
-        */
        if ((tp->t_flags & (TF_REQ_TSTMP | TF_NOOPT)) == TF_REQ_TSTMP &&
-           (origoffer == -1 ||
-            (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
+           ((tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
                mss -= TCPOLEN_TSTAMP_APPA;
-       if ((tp->t_flags & (TF_REQ_CC | TF_NOOPT)) == TF_REQ_CC &&
-           (origoffer == -1 ||
-            (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
-               mss -= TCPOLEN_CC_APPA;
 
 #if    (MCLBYTES & (MCLBYTES - 1)) == 0
                if (mss > MCLBYTES)