ti.4 \
tl.4 \
trm.4 \
- ttcp.4 \
tty.4 \
tun.4 \
twa.4 \
use the
.Xr connect 2
call to initiate connections.
-.Tn TCP
-also supports a more datagram-like mode, called Transaction
-.Tn TCP ,
-which is described in
-.Xr ttcp 4 .
.Pp
Passive sockets may
.Dq underspecify
.Pq tcp.rfc1323
Implement the window scaling and timestamp options of RFC 1323
(default true).
-.It Dv TCPCTL_DO_RFC1644
-.Pq tcp.rfc1644
-Implement Transaction
-.Tn TCP ,
-as described in RFC 1644.
.It Dv TCPCTL_MSSDFLT
.Pq tcp.mssdflt
The default value used for the maximum segment size
+++ /dev/null
-.\" Copyright 1994, 1995 Massachusetts Institute of Technology
-.\"
-.\" Permission to use, copy, modify, and distribute this software and
-.\" its documentation for any purpose and without fee is hereby
-.\" granted, provided that both the above copyright notice and this
-.\" permission notice appear in all copies, that both the above
-.\" copyright notice and this permission notice appear in all
-.\" supporting documentation, and that the name of M.I.T. not be used
-.\" in advertising or publicity pertaining to distribution of the
-.\" software without specific, written prior permission. M.I.T. makes
-.\" no representations about the suitability of this software for any
-.\" purpose. It is provided "as is" without express or implied
-.\" warranty.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
-.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
-.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
-.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD: src/share/man/man4/ttcp.4,v 1.8.2.6 2001/12/17 11:30:12 ru Exp $
-.\" $DragonFly: src/share/man/man4/ttcp.4,v 1.3 2007/07/14 21:48:15 swildner Exp $
-.\"
-.Dd January 18, 1995
-.Dt TTCP 4
-.Os
-.Sh NAME
-.Nm ttcp
-.Nd Transmission Control Protocol Extensions for Transactions
-.Sh SYNOPSIS
-.In sys/types.h
-.In sys/socket.h
-.In netinet/in.h
-.In netinet/tcp.h
-.Ft int
-.Fn setsockopt sock IPPROTO_TCP TCP_NOPUSH &One "sizeof One"
-.Ft ssize_t
-.Fn sendto sock msg len MSG_EOF &sin "sizeof sin"
-.Ft ssize_t
-.Fn sendto sock msg len MSG_EOF 0 0
-.Sh DESCRIPTION
-.Tn T/TCP
-refers to a set of extensions to the
-.Tn TCP
-protocol (see
-.Xr tcp 4 )
-which permit hosts to reliably exchange a small amount of data in a
-two-packet exchange, thus eliminating the extra round-trip delays
-inherent in a standard
-.Tn TCP
-connection. The socket interface includes modifications to support
-.Tn T/TCP ,
-detailed here for the specific case, and in the
-.Xr socket 2
-and
-.Xr send 2
-manual pages for the protocol-independent support.
-.Tn T/TCP
-is defined in RFC 1644.
-.Pp
-The
-.Tn T/TCP
-extensions work by including certain options in all segments of a
-particular connection, which enable the implementation to avoid the
-three-way handshake for all but the first connection between a pair of
-hosts. These same options also make it possible to more reliably
-recognize old, duplicate packets, which in turn reduces the amount of
-time the
-.Tn TCP
-protocol must maintain state after a connection closes. The
-.Va net.inet.tcp.rfc1644
-MIB variable can be used to disable
-.Tn T/TCP
-negotiation at run time; however, the protocol has been designed to
-ensure that attempts by non-T/TCP
-systems to communicate with T/TCP-enhanced
-ones automatically degenerate into standard
-.Tn TCP .
-.Sh TRANSACTION MODEL
-The expected model of a
-.Dq transaction
-as used by
-.Tn T/TCP
-is a fairly simple one:
-.Bl -enum
-.It
-A client program generates a request to be sent to the server, which
-is small enough to fit in a single
-.Tn TCP
-segment, and sends a SYN PUSH FIN segment with options and data to the
-server.
-.It
-The server program accepts the request in the same manner as for
-regular
-.Tn TCP
-connections, interprets it, and generates a reply which may be small
-enough to fit in a single segment. If it is, the reply is sent in a
-single SYN PUSH FIN ACK segment with (different) options and data back
-to the client. If not, then the connection degenerates into (almost)
-the usual case for
-.Tn TCP .
-The server then closes its socket.
-.It
-The client reads the reply and closes its socket.
-.El
-.Sh CLIENT SUPPORT
-Support on the client side is provided by extending the semantics of
-the
-.Xr sendto 2
-and
-.Xr sendmsg 2
-system calls to understand the notion of
-.Dq implied connect
-and
-.Dq send and shutdown .
-To send the request in a transaction, the
-.Xr sendto 2
-system call is typically used, as in the following example:
-.Bd -literal -offset indent
-char request[REQ_LEN];
-struct sockaddr_in sin;
-int sock, req_len;
-
-sock = socket(PF_INET, SOCK_STREAM, 0);
-
-/* prepare request[] and sin */
-
-err = sendto(sock, request, req_len, MSG_EOF,
- (struct sockaddr *)&sin, sin.sin_len);
-
-/* do something if error */
-
-req_len = read(sock, request, sizeof request);
-close(sock);
-
-/* do something with the reply */
-
-.Ed
-.Pp
-Note that, after the
-call to
-.Fn sendto ,
-the socket is now in the same state as if the
-.Xr connect 2
-and
-.Xr shutdown 2
-system calls had been used. That is to say, the only reasonable
-operations to perform on this socket are
-.Xr read 2
-and
-.Xr close 2 .
-(Because the client's
-.Tn TCP
-sender is already shut down, it is not possible to
-.Xr connect 2
-this socket to another destination.)
-.Sh SERVER SUPPORT
-There are two different options available for servers using
-.Tn T/TCP :
-.Bl -enum
-.It
-Set the
-.Dv TCP_NOPUSH
-socket option, and use normal
-.Xr write 2
-calls when formulating the response.
-.It
-Use
-.Xr sendto 2
-with the
-.Dv MSG_EOF
-flag, as in the client, but with the destination unspecified.
-.El
-.Pp
-The first option is generally the appropriate choice when converting
-existing servers to use
-.Tn T/TCP
-extensions; simply add a call to
-.Fn setsockopt sock IPPROTO_TCP TCP_NOPUSH &One "sizeof One"
-(where
-.Va One
-is an integer variable with a non-zero value). The server socket must
-be closed before any data is sent (unless the socket buffers fill up).
-.Pp
-The second option is preferable for new servers, and is sometimes easy
-enough to retrofit into older servers. In this case, where the reply
-phase would ordinarily have included a call to
-.Fn write ,
-one substitutes:
-.Pp
-.Dl "sendto(sock, buf, len, MSG_EOF, NULL, 0)"
-.Pp
-In this case, the reply is sent immediately, but as in the client
-case, the socket is no longer useful for anything and should be
-immediately closed.
-.Sh MIB VARIABLES
-The
-.Tn T/TCP
-extensions require the
-.Va net.inet.tcp.rfc1644
-MIB variable to be true in order for the appropriate
-.Tn TCP
-options to be sent. See
-.Xr tcp 4
-for more information.
-.Sh SEE ALSO
-.Xr send 2 ,
-.Xr setsockopt 2 ,
-.Xr inet 4 ,
-.Xr tcp 4
-.Rs
-.%A R. Braden
-.%T "T/TCP \- TCP Extensions for Transactions"
-.%O RFC 1644
-.Re
-.Sh HISTORY
-Support for
-.Tn T/TCP
-first appeared in
-.Fx 2.1 ,
-based on code written by Bob Braden and Liming Wei at the
-University of Southern California, Information Sciences Institute, and
-ported by Andras Olah at the University of Twente.
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
u_long rmx_hopcount; /* max hops expected */
- u_long rmx_filler[4]; /* will be used for T/TCP later */
+ u_short rmx_mssopt; /* peer's cached MSS */
+ u_long rmx_filler[3]; /* for future expansion */
};
/*
typedef u_int32_t tcp_seq;
typedef int32_t tcp_seq_diff_t;
-typedef u_int32_t tcp_cc; /* connection count per rfc1644 */
#define tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */
#define tcp6hdr tcphdr /* for KAME src sync over BSD*'s */
#define TCPOPT_CC 11 /* CC options: RFC-1644 */
#define TCPOPT_CCNEW 12
#define TCPOPT_CCECHO 13
-#define TCPOLEN_CC 6
-#define TCPOLEN_CC_APPA (TCPOLEN_CC+2)
-#define TCPOPT_CC_HDR(ccopt) \
- (TCPOPT_2NOPs | (ccopt) << 8 | TCPOLEN_CC)
/*
* Default maximum segment size for TCP.
#define TCP6_MSS 1024
#define TCP_MAXWIN 65535 /* max value for (unscaled) window */
-#define TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */
#define TCP_MIN_WINSHIFT 5 /* requested minimum (x32) */
#define TCP_MAX_WINSHIFT 14 /* maximum window shift */
MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-tcp_cc tcp_ccgen;
static int log_in_vain = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
&log_in_vain, 0, "Log all incoming TCP connections");
u_long tiwin;
int recvwin;
struct tcpopt to; /* options in this segment */
- struct rmxp_tao *taop; /* pointer to our TAO cache entry */
- struct rmxp_tao tao_noncached; /* in case there's no cached entry */
struct sockaddr_in *next_hop = NULL;
int rstreason; /* For badport_bandlim accounting purposes */
int cpu;
* send SYN,ACK packet.
*/
return;
- /*
- * Segment passed TAO tests.
- */
inp = so->so_pcb;
tp = intotcpcb(inp);
tp->snd_wnd = tiwin;
tp->ts_recent = to.to_tsval;
tp->ts_recent_age = ticks;
}
- if (to.to_flags & (TOF_CC | TOF_CCNEW))
- tp->t_flags |= TF_RCVD_CC;
if (to.to_flags & TOF_MSS)
tcp_mss(tp, to.to_mss);
/*
!(tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN)) &&
(!(to.to_flags & TOF_TS) ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
- /*
- * Using the CC option is compulsory if once started:
- * the segment is OK if no T/TCP was negotiated or
- * if the segment has a CC option equal to CCrecv
- */
- ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
- ((to.to_flags & TOF_CC) && to.to_cc == tp->cc_recv)) &&
th->th_seq == tp->rcv_nxt &&
tp->snd_nxt == tp->snd_max) {
* continue processing rest of data/controls, beginning with URG
*/
case TCPS_SYN_SENT:
- if ((taop = tcp_gettaocache(&inp->inp_inc)) == NULL) {
- taop = &tao_noncached;
- bzero(taop, sizeof *taop);
- }
-
if ((thflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->iss) ||
SEQ_GT(th->th_ack, tp->snd_max))) {
- /*
- * If we have a cached CCsent for the remote host,
- * hence we haven't just crashed and restarted,
- * do not send a RST. This may be a retransmission
- * from the other side after our earlier ACK was lost.
- * Our new SYN, when it arrives, will serve as the
- * needed ACK.
- */
- if (taop->tao_ccsent != 0)
- goto drop;
- else {
- rstreason = BANDLIM_UNLIMITED;
- goto dropwithreset;
- }
+ rstreason = BANDLIM_UNLIMITED;
+ goto dropwithreset;
}
if (thflags & TH_RST) {
if (thflags & TH_ACK)
if (!(thflags & TH_SYN))
goto drop;
tp->snd_wnd = th->th_win; /* initial send window */
- tp->cc_recv = to.to_cc; /* foreign CC */
tp->irs = th->th_seq;
tcp_rcvseqinit(tp);
if (thflags & TH_ACK) {
- /*
- * Our SYN was acked. If segment contains CC.ECHO
- * option, check it to make sure this segment really
- * matches our SYN. If not, just drop it as old
- * duplicate, but send an RST if we're still playing
- * by the old rules. If no CC.ECHO option, make sure
- * we don't get fooled into using T/TCP.
- */
- if (to.to_flags & TOF_CCECHO) {
- if (tp->cc_send != to.to_ccecho) {
- if (taop->tao_ccsent != 0)
- goto drop;
- else {
- rstreason = BANDLIM_UNLIMITED;
- goto dropwithreset;
- }
- }
- } else
- tp->t_flags &= ~TF_RCVD_CC;
+ /* Our SYN was acked. */
tcpstat.tcps_connects++;
soisconnected(so);
/* Do window scaling on this connection? */
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
- /* Segment is acceptable, update cache if undefined. */
- if (taop->tao_ccsent == 0)
- taop->tao_ccsent = to.to_ccecho;
-
tp->rcv_adv += tp->rcv_wnd;
tp->snd_una++; /* SYN is acked */
tcp_callout_stop(tp, tp->tt_rexmt);
} else {
/*
* Received initial SYN in SYN-SENT[*] state =>
- * simultaneous open. If segment contains CC option
- * and there is a cached CC, apply TAO test.
- * If it succeeds, connection is * half-synchronized.
- * Otherwise, do 3-way handshake:
+ * simultaneous open.
+ * Do 3-way handshake:
* SYN-SENT -> SYN-RECEIVED
* SYN-SENT* -> SYN-RECEIVED*
- * If there was no CC option, clear cached CC value.
*/
tp->t_flags |= TF_ACKNOW;
tcp_callout_stop(tp, tp->tt_rexmt);
- if (to.to_flags & TOF_CC) {
- if (taop->tao_cc != 0 &&
- CC_GT(to.to_cc, taop->tao_cc)) {
- /*
- * update cache and make transition:
- * SYN-SENT -> ESTABLISHED*
- * SYN-SENT* -> FIN-WAIT-1*
- */
- taop->tao_cc = to.to_cc;
- tp->t_starttime = ticks;
- if (tp->t_flags & TF_NEEDFIN) {
- tp->t_state = TCPS_FIN_WAIT_1;
- tp->t_flags &= ~TF_NEEDFIN;
- } else {
- tp->t_state = TCPS_ESTABLISHED;
- tcp_callout_reset(tp,
- tp->tt_keep, tcp_keepidle,
- tcp_timer_keep);
- }
- tp->t_flags |= TF_NEEDSYN;
- } else
- tp->t_state = TCPS_SYN_RECEIVED;
- } else {
- /* CC.NEW or no option => invalidate cache */
- taop->tao_cc = 0;
- tp->t_state = TCPS_SYN_RECEIVED;
- }
+ tp->t_state = TCPS_SYN_RECEIVED;
}
trimthenstep6:
/*
* If the state is LAST_ACK or CLOSING or TIME_WAIT:
- * if segment contains a SYN and CC [not CC.NEW] option:
- * if state == TIME_WAIT and connection duration > MSL,
- * drop packet and send RST;
- *
- * if SEG.CC > CCrecv then is new SYN, and can implicitly
- * ack the FIN (and data) in retransmission queue.
- * Complete close and delete TCPCB. Then reprocess
- * segment, hoping to find new TCPCB in LISTEN state;
- *
- * else must be old SYN; drop it.
- * else do normal processing.
+ * do normal processing (we no longer bother with T/TCP).
*/
case TCPS_LAST_ACK:
case TCPS_CLOSING:
case TCPS_TIME_WAIT:
- if ((thflags & TH_SYN) &&
- (to.to_flags & TOF_CC) && tp->cc_recv != 0) {
- if (tp->t_state == TCPS_TIME_WAIT &&
- (ticks - tp->t_starttime) > tcp_msl) {
- rstreason = BANDLIM_UNLIMITED;
- goto dropwithreset;
- }
- if (CC_GT(to.to_cc, tp->cc_recv)) {
- tp = tcp_close(tp);
- goto findpcb;
- }
- else
- goto drop;
- }
break; /* continue normal processing */
}
}
/*
- * T/TCP mechanism
- * If T/TCP was negotiated and the segment doesn't have CC,
- * or if its CC is wrong then drop the segment.
- * RST segments do not have to comply with this.
- */
- if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
- (!(to.to_flags & TOF_CC) || tp->cc_recv != to.to_cc))
- goto dropafterack;
-
- /*
* In the SYN-RECEIVED state, validate that the packet belongs to
* this connection before trimming the data to fit the receive
* window. Check the sequence number versus IRS since we know
tp->rcv_scale = tp->request_r_scale;
}
/*
- * Upon successful completion of 3-way handshake,
- * update cache.CC if it was undefined, pass any queued
- * data to the user, and advance state appropriately.
- */
- if ((taop = tcp_gettaocache(&inp->inp_inc)) != NULL &&
- taop->tao_cc == 0)
- taop->tao_cc = tp->cc_recv;
-
- /*
* Make transitions:
* SYN-RECEIVED -> ESTABLISHED
* SYN-RECEIVED* -> FIN-WAIT-1
if (ourfinisacked) {
tp->t_state = TCPS_TIME_WAIT;
tcp_canceltimers(tp);
- /* Shorten TIME_WAIT [RFC-1644, p.28] */
- if (tp->cc_recv != 0 &&
- (ticks - tp->t_starttime) < tcp_msl) {
- tcp_callout_reset(tp, tp->tt_2msl,
- tp->t_rxtcur * TCPTV_TWTRUNC,
- tcp_timer_2msl);
- } else {
- tcp_callout_reset(tp, tp->tt_2msl,
+ tcp_callout_reset(tp, tp->tt_2msl,
2 * tcp_msl, tcp_timer_2msl);
- }
soisdisconnected(so);
}
break;
case TCPS_FIN_WAIT_2:
tp->t_state = TCPS_TIME_WAIT;
tcp_canceltimers(tp);
- /* Shorten TIME_WAIT [RFC-1644, p.28] */
- if (tp->cc_recv != 0 &&
- (ticks - tp->t_starttime) < tcp_msl) {
- tcp_callout_reset(tp, tp->tt_2msl,
- tp->t_rxtcur * TCPTV_TWTRUNC,
- tcp_timer_2msl);
- /* For transaction client, force ACK now. */
- tp->t_flags |= TF_ACKNOW;
- } else {
- tcp_callout_reset(tp, tp->tt_2msl, 2 * tcp_msl,
+ tcp_callout_reset(tp, tp->tt_2msl, 2 * tcp_msl,
tcp_timer_2msl);
- }
soisdisconnected(so);
break;
if (to->to_tsecr != 0 && TSTMP_GT(to->to_tsecr, ticks))
to->to_tsecr = 0;
break;
- case TCPOPT_CC:
- if (optlen != TCPOLEN_CC)
- continue;
- to->to_flags |= TOF_CC;
- bcopy(cp + 2, &to->to_cc, sizeof to->to_cc);
- to->to_cc = ntohl(to->to_cc);
- break;
- case TCPOPT_CCNEW:
- if (optlen != TCPOLEN_CC)
- continue;
- if (!is_syn)
- continue;
- to->to_flags |= TOF_CCNEW;
- bcopy(cp + 2, &to->to_cc, sizeof to->to_cc);
- to->to_cc = ntohl(to->to_cc);
- break;
- case TCPOPT_CCECHO:
- if (optlen != TCPOLEN_CC)
- continue;
- if (!is_syn)
- continue;
- to->to_flags |= TOF_CCECHO;
- bcopy(cp + 2, &to->to_ccecho, sizeof to->to_ccecho);
- to->to_ccecho = ntohl(to->to_ccecho);
- break;
case TCPOPT_SACK_PERMITTED:
if (optlen != TCPOLEN_SACK_PERMITTED)
continue;
*
* NOTE that this routine is only called when we process an incoming
* segment, for outgoing segments only tcp_mssopt is called.
- *
- * In case of T/TCP, we call this routine during implicit connection
- * setup as well (offer = -1), to initialize maxseg from the cached
- * MSS of our peer.
*/
void
tcp_mss(struct tcpcb *tp, int offer)
u_long bufsize;
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
- struct rmxp_tao *taop;
- int origoffer = offer;
#ifdef INET6
boolean_t isipv6 = ((inp->inp_vflag & INP_IPV6) ? TRUE : FALSE);
size_t min_protoh = isipv6 ?
ifp = rt->rt_ifp;
so = inp->inp_socket;
- taop = rmx_taop(rt->rt_rmx);
-
- /*
- * Offer == -1 means that we didn't receive SYN yet,
- * use cached value in that case;
- */
- if (offer == -1)
- offer = taop->tao_mssopt;
-
/*
* Offer == 0 means that there was no MSS on the SYN segment,
* in this case we use either the interface mtu or tcp_mssdflt.
offer = max(offer, tcp_minmss);
offer = max(offer, 64);
- taop->tao_mssopt = offer;
+ rt->rt_rmx.rmx_mssopt = offer;
/*
* While we're here, check if there's an initial rtt
*/
tp->t_maxopd = mss;
- /*
- * In case of T/TCP, origoffer==-1 indicates, that no segments
- * were received yet. In this case we just guess, otherwise
- * we do the same as before T/TCP.
- */
if ((tp->t_flags & (TF_REQ_TSTMP | TF_NOOPT)) == TF_REQ_TSTMP &&
- (origoffer == -1 ||
- (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
+ ((tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
mss -= TCPOLEN_TSTAMP_APPA;
- if ((tp->t_flags & (TF_REQ_CC | TF_NOOPT)) == TF_REQ_CC &&
- (origoffer == -1 ||
- (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
- mss -= TCPOLEN_CC_APPA;
#if (MCLBYTES & (MCLBYTES - 1)) == 0
if (mss > MCLBYTES)
#else
const boolean_t isipv6 = FALSE;
#endif
- struct rmxp_tao *taop;
/*
* Determine length of data that should be transmitted,
/*
* Lop off SYN bit if it has already been sent. However, if this
- * is SYN-SENT state and if segment contains data and if we don't
- * know that foreign host supports TAO, suppress sending segment.
+ * is SYN-SENT state and if segment contains data, suppress sending
+ * segment (sending the segment would be an option if we still
+ * did TAO and the remote host supported it).
*/
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
flags &= ~TH_SYN;
off--, len++;
- if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
- ((taop = tcp_gettaocache(&inp->inp_inc)) == NULL ||
- taop->tao_ccsent == 0))
+ if (len > 0 && tp->t_state == TCPS_SYN_SENT)
return 0;
}
/*
- * Be careful not to send data and/or FIN on SYN segments
- * in cases when no CC option will be sent.
+ * Be careful not to send data and/or FIN on SYN segments.
* This measure is needed to prevent interoperability problems
* with not fully conformant TCP implementations.
*/
- if ((flags & TH_SYN) &&
- ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
- ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
+ if (flags & TH_SYN) {
len = 0;
flags &= ~TH_FIN;
}
tp->rfbuf_ts = ticks;
/*
- * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
- * options are allowed (!TF_NOOPT) and it's not a RST.
- */
- if ((tp->t_flags & (TF_REQ_CC | TF_NOOPT)) == TF_REQ_CC &&
- !(flags & TH_RST)) {
- switch (flags & (TH_SYN | TH_ACK)) {
- /*
- * This is a normal ACK, send CC if we received CC before
- * from our peer.
- */
- case TH_ACK:
- if (!(tp->t_flags & TF_RCVD_CC))
- break;
- /*FALLTHROUGH*/
-
- /*
- * We can only get here in T/TCP's SYN_SENT* state, when
- * we're a sending a non-SYN segment without waiting for
- * the ACK of our SYN. A check above assures that we only
- * do this if our peer understands T/TCP.
- */
- case 0:
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_CC;
- opt[optlen++] = TCPOLEN_CC;
- *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
- optlen += 4;
- break;
-
- /*
- * This is our initial SYN, check whether we have to use
- * CC or CC.new.
- */
- case TH_SYN:
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
- TCPOPT_CCNEW : TCPOPT_CC;
- opt[optlen++] = TCPOLEN_CC;
- *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
- optlen += 4;
- break;
-
- /*
- * This is a SYN,ACK; send CC and CC.echo if we received
- * CC from our peer.
- */
- case (TH_SYN | TH_ACK):
- if (tp->t_flags & TF_RCVD_CC) {
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_CC;
- opt[optlen++] = TCPOLEN_CC;
- *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
- optlen += 4;
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_NOP;
- opt[optlen++] = TCPOPT_CCECHO;
- opt[optlen++] = TCPOLEN_CC;
- *(u_int32_t *)&opt[optlen] = htonl(tp->cc_recv);
- optlen += 4;
- }
- break;
- }
- }
-
- /*
* If this is a SACK connection and we have a block to report,
* fill in the SACK blocks in the TCP options.
*/
#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
/*
- * TCP connection counts are 32 bit integers operated
- * on with modular arithmetic. These macros can be
- * used to compare such integers.
- */
-#define CC_LT(a,b) ((int)((a)-(b)) < 0)
-#define CC_LEQ(a,b) ((int)((a)-(b)) <= 0)
-#define CC_GT(a,b) ((int)((a)-(b)) > 0)
-#define CC_GEQ(a,b) ((int)((a)-(b)) >= 0)
-
-/* Macro to increment a CC: skip 0 which has a special meaning */
-#define CC_INC(c) (++(c) == 0 ? ++(c) : (c))
-
-/*
* Macros to initialize tcp sequence numbers for
* send and receive from initial send and receive
* sequence numbers.
#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * hz)
/* timestamp wrap-around time */
-#ifdef _KERNEL
-extern tcp_cc tcp_ccgen; /* global connection count */
-#endif /* _KERNEL */
#endif /* _NETINET_TCP_SEQ_H_ */
SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
&tcp_do_rfc1323, 0, "Enable rfc1323 (high performance TCP) extensions");
-int tcp_do_rfc1644 = 0;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW,
- &tcp_do_rfc1644, 0, "Enable rfc1644 (TTCP) extensions");
-
static int tcp_tcbhashsize = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
&tcp_tcbhashsize, 0, "Size of TCP control block hashtable");
static struct malloc_pipe tcptemp_mpipe;
static void tcp_willblock(int);
-static void tcp_cleartaocache (void);
static void tcp_notify (struct inpcb *, int);
struct tcp_stats tcpstats_percpu[MAXCPU];
mpipe_init(&tcptemp_mpipe, M_TCPTEMP, sizeof(struct tcptemp),
25, -1, 0, NULL);
- tcp_ccgen = 1;
- tcp_cleartaocache();
-
tcp_delacktime = TCPTV_DELACK;
tcp_keepinit = TCPTV_KEEP_INIT;
tcp_keepidle = TCPTV_KEEP_IDLE;
if (tcp_do_rfc1323)
tp->t_flags = (TF_REQ_SCALE | TF_REQ_TSTMP);
- if (tcp_do_rfc1644)
- tp->t_flags |= TF_REQ_CC;
tp->t_inpcb = inp; /* XXX */
tp->t_state = TCPS_CLOSED;
/*
else
rt = tcp_rtlookup(&inp->inp_inc);
if (rt != NULL) {
- struct rmxp_tao *taop = rmx_taop(rt->rt_rmx);
-
if (rt->rt_rmx.rmx_mtu != 0 && rt->rt_rmx.rmx_mtu < mtu)
mtu = rt->rt_rmx.rmx_mtu;
* will get recorded and the new parameters should get
* recomputed. For Further Study.
*/
- if (taop->tao_mssopt != 0 && taop->tao_mssopt < maxopd)
- maxopd = taop->tao_mssopt;
+ if (rt->rt_rmx.rmx_mssopt && rt->rt_rmx.rmx_mssopt < maxopd)
+ maxopd = rt->rt_rmx.rmx_mssopt;
} else
maxopd = mtu -
(isipv6 ?
(TF_REQ_TSTMP | TF_RCVD_TSTMP))
mss -= TCPOLEN_TSTAMP_APPA;
- if ((tp->t_flags & (TF_REQ_CC | TF_RCVD_CC | TF_NOOPT)) ==
- (TF_REQ_CC | TF_RCVD_CC))
- mss -= TCPOLEN_CC_APPA;
-
/* round down to multiple of MCLBYTES */
#if (MCLBYTES & (MCLBYTES - 1)) == 0 /* test if MCLBYTES power of 2 */
if (mss > MCLBYTES)
#endif
/*
- * Return a pointer to the cached information about the remote host.
- * The cached information is stored in the protocol specific part of
- * the route metrics.
- */
-struct rmxp_tao *
-tcp_gettaocache(struct in_conninfo *inc)
-{
- struct rtentry *rt;
-
-#ifdef INET6
- if (inc->inc_isipv6)
- rt = tcp_rtlookup6(inc);
- else
-#endif
- rt = tcp_rtlookup(inc);
-
- /* Make sure this is a host route and is up. */
- if (rt == NULL ||
- (rt->rt_flags & (RTF_UP | RTF_HOST)) != (RTF_UP | RTF_HOST))
- return (NULL);
-
- return (rmx_taop(rt->rt_rmx));
-}
-
-/*
- * Clear all the TAO cache entries, called from tcp_init.
- *
- * XXX
- * This routine is just an empty one, because we assume that the routing
- * routing tables are initialized at the same time when TCP, so there is
- * nothing in the cache left over.
- */
-static void
-tcp_cleartaocache(void)
-{
-}
-
-/*
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
*
* This code attempts to calculate the bandwidth-delay product as a
tp->ts_recent = sc->sc_tsrecent;
tp->ts_recent_age = ticks;
}
- if (sc->sc_flags & SCF_CC) {
- /*
- * Initialization of the tcpcb for transaction;
- * set SND.WND = SEG.WND,
- * initialize CCsend and CCrecv.
- */
- tp->t_flags |= TF_REQ_CC | TF_RCVD_CC;
- tp->cc_send = sc->sc_cc_send;
- tp->cc_recv = sc->sc_cc_recv;
- }
if (sc->sc_flags & SCF_SACK_PERMITTED)
tp->t_flags |= TF_SACK_PERMITTED;
struct syncache *sc = NULL;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
- struct rmxp_tao *taop;
int win;
syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid];
sc->sc_flags |= SCF_WINSCALE;
}
}
- if (tcp_do_rfc1644) {
- /*
- * A CC or CC.new option received in a SYN makes
- * it ok to send CC in subsequent segments.
- */
- if (to->to_flags & (TOF_CC | TOF_CCNEW)) {
- sc->sc_cc_recv = to->to_cc;
- sc->sc_cc_send = CC_INC(tcp_ccgen);
- sc->sc_flags |= SCF_CC;
- }
- }
if (tcp_do_sack && (to->to_flags & TOF_SACK_PERMITTED))
sc->sc_flags |= SCF_SACK_PERMITTED;
if (tp->t_flags & TF_NOOPT)
sc->sc_flags = SCF_NOOPT;
- /*
- * XXX
- * We have the option here of not doing TAO (even if the segment
- * qualifies) and instead fall back to a normal 3WHS via the syncache.
- * This allows us to apply synflood protection to TAO-qualifying SYNs
- * also. However, there should be a hueristic to determine when to
- * do this, and is not present at the moment.
- */
-
- /*
- * Perform TAO test on incoming CC (SEG.CC) option, if any.
- * - compare SEG.CC against cached CC from the same host, if any.
- * - if SEG.CC > chached value, SYN must be new and is accepted
- * immediately: save new CC in the cache, mark the socket
- * connected, enter ESTABLISHED state, turn on flag to
- * send a SYN in the next segment.
- * A virtual advertised window is set in rcv_adv to
- * initialize SWS prevention. Then enter normal segment
- * processing: drop SYN, process data and FIN.
- * - otherwise do a normal 3-way handshake.
- */
- taop = tcp_gettaocache(&sc->sc_inc);
- if (to->to_flags & TOF_CC) {
- if ((tp->t_flags & TF_NOPUSH) &&
- sc->sc_flags & SCF_CC &&
- taop != NULL && taop->tao_cc != 0 &&
- CC_GT(to->to_cc, taop->tao_cc)) {
- sc->sc_rxtslot = 0;
- so = syncache_socket(sc, *sop, m);
- if (so != NULL) {
- taop->tao_cc = to->to_cc;
- *sop = so;
- }
- syncache_free(sc);
- return (so != NULL);
- }
- } else {
- /*
- * No CC option, but maybe CC.NEW: invalidate cached value.
- */
- if (taop != NULL)
- taop->tao_cc = 0;
- }
- /*
- * TAO test failed or there was no CC option,
- * do a standard 3-way handshake.
- */
if (syncache_respond(sc, m) == 0) {
syncache_insert(sc, sch);
tcpstat.tcps_sndacks++;
optlen = TCPOLEN_MAXSEG +
((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0) +
- ((sc->sc_flags & SCF_CC) ? TCPOLEN_CC_APPA * 2 : 0) +
((sc->sc_flags & SCF_SACK_PERMITTED) ?
TCPOLEN_SACK_PERMITTED_ALIGNED : 0);
}
optp += TCPOLEN_TSTAMP_APPA;
}
- /*
- * Send CC and CC.echo if we received CC from our peer.
- */
- if (sc->sc_flags & SCF_CC) {
- u_int32_t *lp = (u_int32_t *)(optp);
-
- *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC));
- *lp++ = htonl(sc->sc_cc_send);
- *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CCECHO));
- *lp = htonl(sc->sc_cc_recv);
- optp += TCPOLEN_CC_APPA * 2;
- }
-
if (sc->sc_flags & SCF_SACK_PERMITTED) {
*((u_int32_t *)optp) = htonl(TCPOPT_SACK_PERMITTED_ALIGNED);
optp += TCPOLEN_SACK_PERMITTED_ALIGNED;
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
/*
- * Disable rfc1323 and rfc1644 if we havn't got any response to
+ * Disable rfc1323 if we havn't got any response to
* our third SYN to work-around some broken terminal servers
* (most of which have hopefully been retired) that have bad VJ
* header compression code which trashes TCP segments containing
* unknown-to-them TCP options.
*/
if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
- tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC);
+ tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
/*
* If losing, let the lower level know and try for
* a better route. Also, if we backed off this far,
struct inpcb *inp = tp->t_inpcb, *oinp;
struct socket *so = inp->inp_socket;
struct route *ro = &inp->inp_route;
- struct tcpcb *otp;
- struct rmxp_tao *taop;
- struct rmxp_tao tao_noncached;
oinp = in_pcblookup_hash(&tcbinfo[mycpu->gd_cpuid],
sin->sin_addr, sin->sin_port,
inp->inp_laddr : if_sin->sin_addr,
inp->inp_lport, 0, NULL);
if (oinp != NULL) {
- if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
- otp->t_state == TCPS_TIME_WAIT &&
- (ticks - otp->t_starttime) < tcp_msl &&
- (otp->t_flags & TF_RCVD_CC)) {
- tcp_close(otp);
- } else {
- m_freem(m);
- return (EADDRINUSE);
- }
+ m_freem(m);
+ return (EADDRINUSE);
}
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = if_sin->sin_addr;
}
/*
- * Generate a CC value for this connection and
- * check whether CC or CCnew should be used.
- */
- if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) {
- taop = &tao_noncached;
- bzero(taop, sizeof *taop);
- }
-
- tp->cc_send = CC_INC(tcp_ccgen);
- if (taop->tao_ccsent != 0 &&
- CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
- taop->tao_ccsent = tp->cc_send;
- } else {
- taop->tao_ccsent = 0;
- tp->t_flags |= TF_SENDCCNEW;
- }
-
- /*
* Close the send side of the connection after
* the data is sent if flagged.
*/
* Common subroutine to open a TCP connection to remote host specified
* by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
* port number if needed. Call in_pcbladdr to do the routing and to choose
- * a local host address (interface). If there is an existing incarnation
- * of the same connection in TIME-WAIT state and if the remote host was
- * sending CC options and if the connection duration was < MSL, then
- * truncate the previous TIME-WAIT state and proceed.
+ * a local host address (interface).
* Initialize connection parameters and enter SYN-SENT state.
*/
static int
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
struct inpcb *oinp;
- struct tcpcb *otp;
- struct rmxp_tao *taop;
- struct rmxp_tao tao_noncached;
/*
* Cannot simply call in_pcbconnect, because there might be an
addr6 : &inp->in6p_laddr,
inp->inp_lport, 0, NULL);
if (oinp) {
- if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
- otp->t_state == TCPS_TIME_WAIT &&
- (ticks - otp->t_starttime) < tcp_msl &&
- (otp->t_flags & TF_RCVD_CC)) {
- otp = tcp_close(otp);
- } else {
- m_freem(m);
- return (EADDRINUSE);
- }
+ m_freem(m);
+ return (EADDRINUSE);
}
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = *addr6;
}
/*
- * Generate a CC value for this connection and
- * check whether CC or CCnew should be used.
- */
- if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) {
- taop = &tao_noncached;
- bzero(taop, sizeof *taop);
- }
-
- tp->cc_send = CC_INC(tcp_ccgen);
- if (taop->tao_ccsent != 0 &&
- CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
- taop->tao_ccsent = tp->cc_send;
- } else {
- taop->tao_ccsent = 0;
- tp->t_flags |= TF_SENDCCNEW;
- }
-
- /*
* Close the send side of the connection after
* the data is sent if flagged.
*/
* Kernel variables for tcp.
*/
extern int tcp_do_rfc1323;
-extern int tcp_do_rfc1644;
extern int tcp_do_rfc3390;
extern int tcp_do_sack;
extern int tcp_do_smartsack;
#define TF_NEEDSYN 0x00000400 /* send SYN (implicit state) */
#define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */
#define TF_NOPUSH 0x00001000 /* don't push */
-#define TF_REQ_CC 0x00002000 /* have/will request CC */
-#define TF_RCVD_CC 0x00004000 /* a CC was received in SYN */
-#define TF_SENDCCNEW 0x00008000 /* send CCnew instead of CC in SYN */
+/* 0x00001000 - 0x00008000 were used for T/TCP */
#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */
#define TF_LASTIDLE 0x00040000 /* connection was previously idle */
u_long ts_recent_age; /* when last updated */
tcp_seq last_ack_sent;
-/* RFC 1644 variables */
- tcp_cc cc_send; /* send connection count */
- tcp_cc cc_recv; /* receive connection count */
-
/* experimental */
u_long snd_cwnd_prev; /* cwnd prior to retransmit */
u_long snd_wacked_prev; /* prior bytes acked in send window */
struct tcpopt {
u_long to_flags; /* which options are present */
#define TOF_TS 0x0001 /* timestamp */
-#define TOF_CC 0x0002 /* CC and CCnew are exclusive */
-#define TOF_CCNEW 0x0004
-#define TOF_CCECHO 0x0008
#define TOF_MSS 0x0010
#define TOF_SCALE 0x0020
#define TOF_SACK_PERMITTED 0x0040
#define TOF_SACK 0x0080
u_int32_t to_tsval;
u_int32_t to_tsecr;
- tcp_cc to_cc; /* holds CC or CCnew */
- tcp_cc to_ccecho;
u_int16_t to_mss;
u_int8_t to_requested_s_scale;
u_int8_t to_nsackblocks;
#define sc_route sc_inc.inc_route
#define sc_route6 sc_inc.inc6_route
u_int32_t sc_tsrecent;
- tcp_cc sc_cc_send; /* holds CC or CCnew */
- tcp_cc sc_cc_recv;
tcp_seq sc_irs; /* seq from peer */
tcp_seq sc_iss; /* our ISS */
u_long sc_rxttime; /* retransmit time */
#define SCF_NOOPT 0x01 /* no TCP options */
#define SCF_WINSCALE 0x02 /* negotiated window scaling */
#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */
-#define SCF_CC 0x08 /* negotiated CC */
#define SCF_UNREACH 0x10 /* icmp unreachable received */
#define SCF_SACK_PERMITTED 0x20 /* saw SACK permitted option */
TAILQ_ENTRY(syncache) sc_hash;
u_int sch_length;
};
-/*
- * The TAO cache entry which is stored in the protocol family specific
- * portion of the route metrics.
- */
-struct rmxp_tao {
- tcp_cc tao_cc; /* latest CC in valid SYN */
- tcp_cc tao_ccsent; /* latest CC sent to peer */
- u_short tao_mssopt; /* peer's cached MSS */
-
-#ifdef notyet
- u_short tao_flags; /* cache status flags */
-#define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */
-#define TAOF_OK 0x0002 /* peer does understand rfc1644 */
-#define TAOF_UNDEF 0 /* we don't know yet */
-#endif
-};
-
-#define rmx_taop(rt) ((struct rmxp_tao *)(rt).rmx_filler)
-
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
#define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
* Names for TCP sysctl objects
*/
#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
-#define TCPCTL_DO_RFC1644 2 /* use RFC-1644 extensions */
+/* 2 was TCPCTL_DO_RFC1644 */
#define TCPCTL_MSSDFLT 3 /* MSS default */
#define TCPCTL_STATS 4 /* statistics (read-only) */
#define TCPCTL_RTTDFLT 5 /* default RTT estimate */
#define TCPCTL_NAMES { \
{ 0, 0 }, \
{ "rfc1323", CTLTYPE_INT }, \
- { "rfc1644", CTLTYPE_INT }, \
+ { "reserved", CTLTYPE_INT}, /* was rfc1644 */ \
{ "mssdflt", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
{ "rttdflt", CTLTYPE_INT }, \
tcp_drop (struct tcpcb *, int);
void tcp_drain (void);
void tcp_fasttimo (void);
-struct rmxp_tao *
- tcp_gettaocache (struct in_conninfo *);
void tcp_init (void);
void tcp_thread_init (void);
void tcp_input (struct mbuf *, ...);