2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)spp_usrreq.c 8.1 (Berkeley) 6/10/93
34 * $FreeBSD: src/sys/netns/spp_usrreq.c,v 1.11 1999/08/28 00:49:53 peter Exp $
35 * $DragonFly: src/sys/netproto/ns/spp_usrreq.c,v 1.22 2007/04/22 01:13:16 dillon Exp $
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/errno.h>
47 #include <sys/thread2.h>
50 #include <net/route.h>
51 #include <netinet/tcp_fsm.h>
60 #include "spp_timer.h"
62 #include "spp_debug.h"
64 extern u_char nsctlerrmap[]; /* from ns_input.c */
65 extern int idpcksum; /* from ns_input.c */
67 static MALLOC_DEFINE(M_IDP, "ns_idp", "NS Packet Management");
68 static MALLOC_DEFINE(M_SPIDP_Q, "ns_spidp_q", "NS Packet Management");
69 static MALLOC_DEFINE(M_SPPCB, "ns_sppcb", "NS PCB Management");
71 struct spp_istat spp_istat;
73 int spp_backoff[SPP_MAXRXTSHIFT+1] =
74 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
77 * SP protocol implementation.
82 spp_iss = 1; /* WRONG !! should fish it out of TODR */
84 struct spidp spp_savesi;
86 extern int sppconsdebug;
88 int spp_use_delack = 0;
89 u_short spp_newchecks[50];
93 spp_input(struct mbuf *m, ...)
104 nsp = __va_arg(ap, struct nspcb *);
107 sppstat.spps_rcvtotal++;
109 panic("No nspcb in spp_input");
114 if (cb == 0) goto bad;
116 if (m->m_len < sizeof(struct spidp)) {
117 if ((m = m_pullup(m, sizeof(*si))) == 0) {
118 sppstat.spps_rcvshort++;
122 si = mtod(m, struct spidp *);
123 si->si_seq = ntohs(si->si_seq);
124 si->si_ack = ntohs(si->si_ack);
125 si->si_alo = ntohs(si->si_alo);
127 so = nsp->nsp_socket;
128 if (so->so_options & SO_DEBUG || traceallspps) {
129 ostate = cb->s_state;
132 if (so->so_options & SO_ACCEPTCONN) {
133 struct sppcb *ocb = cb;
135 so = sonewconn(so, 0);
140 * This is ugly, but ....
142 * Mark socket as temporary until we're
143 * committed to keeping it. The code at
144 * ``drop'' and ``dropwithreset'' check the
145 * flag dropsocket to see if the temporary
146 * socket created here should be discarded.
147 * We mark the socket as discardable until
148 * we're committed to it below in TCPS_LISTEN.
151 nsp = (struct nspcb *)so->so_pcb;
152 nsp->nsp_laddr = si->si_dna;
154 cb->s_mtu = ocb->s_mtu; /* preserve sockopts */
155 cb->s_flags = ocb->s_flags; /* preserve sockopts */
156 cb->s_flags2 = ocb->s_flags2; /* preserve sockopts */
157 cb->s_state = TCPS_LISTEN;
161 * Packet received on connection.
162 * reset idle time and keep-alive timer;
165 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
167 switch (cb->s_state) {
171 struct sockaddr_ns *sns;
172 struct ns_addr laddr;
175 * If somebody here was carying on a conversation
176 * and went away, and his pen pal thinks he can
177 * still talk, we get the misdirected packet.
179 if (spp_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
183 am = m_get(MB_DONTWAIT, MT_SONAME);
186 am->m_len = sizeof (struct sockaddr_ns);
187 sns = mtod(am, struct sockaddr_ns *);
188 sns->sns_len = sizeof(*sns);
189 sns->sns_family = AF_NS;
190 sns->sns_addr = si->si_sna;
191 laddr = nsp->nsp_laddr;
192 if (ns_nullhost(laddr))
193 nsp->nsp_laddr = si->si_dna;
194 if (ns_pcbconnect(nsp, mtod(am, struct sockaddr *))) {
195 nsp->nsp_laddr = laddr;
202 dropsocket = 0; /* committed to socket */
203 cb->s_did = si->si_sid;
204 cb->s_rack = si->si_ack;
205 cb->s_ralo = si->si_alo;
206 #define THREEWAYSHAKE
208 cb->s_state = TCPS_SYN_RECEIVED;
209 cb->s_force = 1 + SPPT_KEEP;
210 sppstat.spps_accepts++;
211 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
215 * This state means that we have heard a response
216 * to our acceptance of their connection
217 * It is probably logically unnecessary in this
220 case TCPS_SYN_RECEIVED: {
221 if (si->si_did!=cb->s_sid) {
226 nsp->nsp_fport = si->si_sport;
227 cb->s_timer[SPPT_REXMT] = 0;
228 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
230 cb->s_state = TCPS_ESTABLISHED;
231 sppstat.spps_accepts++;
236 * This state means that we have gotten a response
237 * to our attempt to establish a connection.
238 * We fill in the data from the other side,
239 * telling us which port to respond to, instead of the well-
240 * known one we might have sent to in the first place.
241 * We also require that this is a response to our
245 if (si->si_did!=cb->s_sid) {
249 sppstat.spps_connects++;
250 cb->s_did = si->si_sid;
251 cb->s_rack = si->si_ack;
252 cb->s_ralo = si->si_alo;
253 cb->s_dport = nsp->nsp_fport = si->si_sport;
254 cb->s_timer[SPPT_REXMT] = 0;
255 cb->s_flags |= SF_ACKNOW;
257 cb->s_state = TCPS_ESTABLISHED;
258 /* Use roundtrip time of connection request for initial rtt */
260 cb->s_srtt = cb->s_rtt << 3;
261 cb->s_rttvar = cb->s_rtt << 1;
262 SPPT_RANGESET(cb->s_rxtcur,
263 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
264 SPPTV_MIN, SPPTV_REXMTMAX);
268 if (so->so_options & SO_DEBUG || traceallspps)
269 spp_trace(SA_INPUT, (u_char)ostate, cb, &spp_savesi, 0);
271 m->m_len -= sizeof (struct idp);
272 m->m_pkthdr.len -= sizeof (struct idp);
273 m->m_data += sizeof (struct idp);
275 if (spp_reass(cb, si, m)) {
278 if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
279 spp_output(cb, NULL);
280 cb->s_flags &= ~(SF_WIN|SF_RXT);
286 si->si_seq = ntohs(si->si_seq);
287 si->si_ack = ntohs(si->si_ack);
288 si->si_alo = ntohs(si->si_alo);
289 ns_error(m, NS_ERR_NOSOCK, 0);
290 if (cb->s_nspcb->nsp_socket->so_options & SO_DEBUG || traceallspps)
291 spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
296 if (cb == 0 || cb->s_nspcb->nsp_socket->so_options & SO_DEBUG ||
298 spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
302 int spprexmtthresh = 3;
305 * This is structurally similar to the tcp reassembly routine
306 * but its function is somewhat different: It merely queues
307 * packets up, and suppresses duplicates.
310 spp_reass(struct sppcb *cb, struct spidp *si, struct mbuf *si_m)
315 struct socket *so = cb->s_nspcb->nsp_socket;
316 char packetp = cb->s_flags & SF_HI;
323 * Update our news from them.
325 if (si->si_cc & SP_SA)
326 cb->s_flags |= (spp_use_delack ? SF_DELACK : SF_ACKNOW);
327 if (SSEQ_GT(si->si_alo, cb->s_ralo))
328 cb->s_flags |= SF_WIN;
329 if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
330 if ((si->si_cc & SP_SP) && cb->s_rack != (cb->s_smax + 1)) {
331 sppstat.spps_rcvdupack++;
333 * If this is a completely duplicate ack
334 * and other conditions hold, we assume
335 * a packet has been dropped and retransmit
336 * it exactly as in tcp_input().
338 if (si->si_ack != cb->s_rack ||
339 si->si_alo != cb->s_ralo)
341 else if (++cb->s_dupacks == spprexmtthresh) {
342 u_short onxt = cb->s_snxt;
343 int cwnd = cb->s_cwnd;
345 cb->s_snxt = si->si_ack;
347 cb->s_force = 1 + SPPT_REXMT;
348 spp_output(cb, NULL);
349 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
351 if (cwnd >= 4 * CUNIT)
352 cb->s_cwnd = cwnd / 2;
353 if (SSEQ_GT(onxt, cb->s_snxt))
363 * If our correspondent acknowledges data we haven't sent
364 * TCP would drop the packet after acking. We'll be a little
367 if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
368 sppstat.spps_rcvacktoomuch++;
369 si->si_ack = cb->s_smax + 1;
371 sppstat.spps_rcvackpack++;
373 * If transmit timer is running and timed sequence
374 * number was acked, update smoothed round trip time.
375 * See discussion of algorithm in tcp_input.c
377 if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
378 sppstat.spps_rttupdated++;
379 if (cb->s_srtt != 0) {
381 delta = cb->s_rtt - (cb->s_srtt >> 3);
382 if ((cb->s_srtt += delta) <= 0)
386 delta -= (cb->s_rttvar >> 2);
387 if ((cb->s_rttvar += delta) <= 0)
391 * No rtt measurement yet
393 cb->s_srtt = cb->s_rtt << 3;
394 cb->s_rttvar = cb->s_rtt << 1;
398 SPPT_RANGESET(cb->s_rxtcur,
399 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
400 SPPTV_MIN, SPPTV_REXMTMAX);
403 * If all outstanding data is acked, stop retransmit
404 * timer and remember to restart (more output or persist).
405 * If there is more data to be acked, restart retransmit
406 * timer, using current (possibly backed-off) value;
408 if (si->si_ack == cb->s_smax + 1) {
409 cb->s_timer[SPPT_REXMT] = 0;
410 cb->s_flags |= SF_RXT;
411 } else if (cb->s_timer[SPPT_PERSIST] == 0)
412 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
414 * When new data is acked, open the congestion window.
415 * If the window gives us less than ssthresh packets
416 * in flight, open exponentially (maxseg at a time).
417 * Otherwise open linearly (maxseg^2 / cwnd at a time).
420 if (cb->s_cwnd > cb->s_ssthresh)
421 incr = max(incr * incr / cb->s_cwnd, 1);
422 cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
424 * Trim Acked data from output queue.
426 while ((m = so->so_snd.ssb_mb) != NULL) {
427 if (SSEQ_LT((mtod(m, struct spidp *))->si_seq, si->si_ack))
428 sbdroprecord(&so->so_snd.sb);
433 cb->s_rack = si->si_ack;
435 if (SSEQ_LT(cb->s_snxt, cb->s_rack))
436 cb->s_snxt = cb->s_rack;
437 if (SSEQ_LT(cb->s_swl1, si->si_seq) || (cb->s_swl1 == si->si_seq &&
438 (SSEQ_LT(cb->s_swl2, si->si_ack) ||
439 (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo))))) {
440 /* keep track of pure window updates */
441 if ((si->si_cc & SP_SP) && cb->s_swl2 == si->si_ack
442 && SSEQ_LT(cb->s_ralo, si->si_alo)) {
443 sppstat.spps_rcvwinupd++;
444 sppstat.spps_rcvdupack--;
446 cb->s_ralo = si->si_alo;
447 cb->s_swl1 = si->si_seq;
448 cb->s_swl2 = si->si_ack;
449 cb->s_swnd = (1 + si->si_alo - si->si_ack);
450 if (cb->s_swnd > cb->s_smxw)
451 cb->s_smxw = cb->s_swnd;
452 cb->s_flags |= SF_WIN;
455 * If this packet number is higher than that which
456 * we have allocated refuse it, unless urgent
458 if (SSEQ_GT(si->si_seq, cb->s_alo)) {
459 if (si->si_cc & SP_SP) {
460 sppstat.spps_rcvwinprobe++;
463 sppstat.spps_rcvpackafterwin++;
464 if (si->si_cc & SP_OB) {
465 if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
466 ns_error(si_m, NS_ERR_FULLUP, 0);
468 } /* else queue this packet; */
470 /*register struct socket *so = cb->s_nspcb->nsp_socket;
471 if (so->so_state && SS_NOFDREF) {
472 ns_error(si_m, NS_ERR_NOSOCK, 0);
477 ns_error(si_m, NS_ERR_FULLUP, 0);
482 * If this is a system packet, we don't need to
483 * queue it up, and won't update acknowledge #
485 if (si->si_cc & SP_SP) {
489 * We have already seen this packet, so drop.
491 if (SSEQ_LT(si->si_seq, cb->s_ack)) {
493 sppstat.spps_rcvduppack++;
494 if (si->si_seq == cb->s_ack - 1)
499 * Loop through all packets queued up to insert in
500 * appropriate sequence.
502 for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
503 if (si->si_seq == SI(q)->si_seq) {
504 sppstat.spps_rcvduppack++;
507 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
508 sppstat.spps_rcvoopack++;
512 nq = kmalloc(sizeof(struct spidp_q), M_SPIDP_Q, M_INTNOWAIT);
517 insque(nq, q->si_prev);
520 * If this packet is urgent, inform process
522 if (si->si_cc & SP_OB) {
523 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
525 cb->s_oobflags |= SF_IOOB;
528 #define SPINC sizeof(struct sphdr)
530 * Loop through all packets queued up to update acknowledge
531 * number, and present all acknowledged data to user;
532 * If in packet interface mode, show packet headers.
534 for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
535 if (SI(q)->si_seq == cb->s_ack) {
538 if (SI(q)->si_cc & SP_OB) {
539 cb->s_oobflags &= ~SF_IOOB;
540 if (so->so_rcv.ssb_cc)
541 so->so_oobmark = so->so_rcv.ssb_cc;
543 so->so_state |= SS_RCVATMARK;
548 kfree(nq, M_SPIDP_Q);
550 sppstat.spps_rcvpack++;
552 if (cb->s_flags2 & SF_NEWCALL) {
553 struct sphdr *sp = mtod(m, struct sphdr *);
554 u_char dt = sp->sp_dt;
556 if (dt != cb->s_rhdr.sp_dt) {
558 m_getclr(MB_DONTWAIT, MT_CONTROL);
563 cb->s_rhdr.sp_dt = dt;
564 mm->m_len = 5; /*XXX*/
567 *(u_char *)(&s[2]) = dt;
568 sbappend(&so->so_rcv.sb, mm);
571 if (sp->sp_cc & SP_OB) {
572 m_chtype(m, MT_OOBDATA);
575 so->so_state &= ~SS_RCVATMARK;
580 m->m_pkthdr.len -= SPINC;
582 if ((sp->sp_cc & SP_EM) || packetp) {
583 sbappendrecord(&so->so_rcv.sb, m);
586 sbappend(&so->so_rcv.sb, m);
590 sbappendrecord(&so->so_rcv.sb, m);
592 cb->s_rhdr = *mtod(m, struct sphdr *);
595 m->m_pkthdr.len -= SPINC;
596 sbappend(&so->so_rcv.sb, m);
601 if (wakeup) sorwakeup(so);
606 spp_ctlinput(int cmd, caddr_t arg)
609 struct ns_errp *errp = 0;
611 struct sockaddr_ns *sns;
614 if (cmd < 0 || cmd > PRC_NCMDS)
616 type = NS_ERR_UNREACH_HOST;
625 case PRC_HOSTUNREACH:
626 sns = (struct sockaddr_ns *)arg;
627 if (sns->sns_family != AF_NS)
633 errp = (struct ns_errp *)arg;
634 na = &errp->ns_err_idp.idp_dna;
635 type = errp->ns_err_num;
636 type = ntohs((u_short)type);
640 case NS_ERR_UNREACH_HOST:
641 ns_pcbnotify(na, (int)nsctlerrmap[cmd], spp_abort, (long) 0);
646 nsp = ns_pcblookup(na, errp->ns_err_idp.idp_sna.x_port,
650 spp_drop((struct sppcb *)nsp->nsp_pcb,
651 (int)nsctlerrmap[cmd]);
653 idp_drop(nsp, (int)nsctlerrmap[cmd]);
658 ns_pcbnotify(na, 0, spp_quench, (long) 0);
662 * When a source quench is received, close congestion window
663 * to one packet. We will gradually open it again as we proceed.
666 spp_quench(struct nspcb *nsp)
668 struct sppcb *cb = nstosppcb(nsp);
676 spp_fixmtu(struct nspcb *nsp)
678 struct sppcb *cb = (struct sppcb *)(nsp->nsp_pcb);
682 struct signalsockbuf *ssb;
684 struct mbuf *firstbad, *m0;
688 * The notification that we have sent
689 * too much is bad news -- we will
690 * have to go through queued up so far
691 * splitting ones which are too big and
692 * reassigning sequence numbers and checksums.
693 * we should then retransmit all packets from
694 * one above the offending packet to the last one
695 * we had sent (or our allocation)
696 * then the offending one so that the any queued
697 * data at our destination will be discarded.
699 ep = (struct ns_errp *)nsp->nsp_notify_param;
700 ssb = &nsp->nsp_socket->so_snd;
701 cb->s_mtu = ep->ns_err_param;
702 badseq = ep->ns_err_idp.si_seq;
703 for (m = ssb->ssb_mb; m; m = m->m_nextpkt) {
704 si = mtod(m, struct spidp *);
705 if (si->si_seq == badseq)
711 /* calculate length */
712 for (m0 = m, len = 0; m ; m = m->m_next)
714 if (len > cb->s_mtu) {
723 spp_output(struct sppcb *cb, struct mbuf *m0)
725 struct socket *so = cb->s_nspcb->nsp_socket;
726 struct mbuf *m = NULL;
727 struct spidp *si = NULL;
728 struct signalsockbuf *ssb = &so->so_snd;
729 int len = 0, win, rcv_win;
730 short span, off, recordp = 0;
732 int error = 0, sendalot;
742 * Make sure that packet isn't too big.
744 for (m = m0; m ; m = m->m_next) {
747 if (m->m_flags & M_EOR)
750 datalen = (cb->s_flags & SF_HO) ?
751 len - sizeof (struct sphdr) : len;
753 if (cb->s_flags & SF_PI) {
757 int oldEM = cb->s_cc & SP_EM;
762 * Here we are only being called
763 * from usrreq(), so it is OK to
766 m = m_copym(m0, 0, mtu, MB_WAIT);
767 if (cb->s_flags & SF_NEWCALL) {
771 mm->m_flags &= ~M_EOR;
775 error = spp_output(cb, m);
788 * Force length even, by adding a "garbage byte" if
793 if (M_TRAILINGSPACE(m) >= 1)
796 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
803 *(mtod(m1, u_char *)) = 0;
807 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
813 * Fill in mbuf with extended SP header
814 * and addresses and length put into network format.
816 MH_ALIGN(m, sizeof (struct spidp));
817 m->m_len = sizeof (struct spidp);
819 si = mtod(m, struct spidp *);
820 si->si_i = *cb->s_idp;
821 si->si_s = cb->s_shdr;
822 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
824 if (m0->m_len < sizeof (*sh)) {
825 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
832 sh = mtod(m0, struct sphdr *);
833 si->si_dt = sh->sp_dt;
834 si->si_cc |= sh->sp_cc & SP_EM;
835 m0->m_len -= sizeof (*sh);
836 m0->m_data += sizeof (*sh);
840 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
844 if (cb->s_oobflags & SF_SOOB) {
847 * make sure OB packets convey exactly 1 byte.
848 * If the packet is 1 byte or larger, we
849 * have already guaranted there to be at least
850 * one garbage byte for the checksum, and
851 * extra bytes shouldn't hurt!
853 if (len > sizeof(*si)) {
855 len = (1 + sizeof(*si));
858 si->si_len = htons((u_short)len);
859 m->m_pkthdr.len = ((len - 1) | 1) + 1;
861 * queue stuff up for output
863 sbappendrecord(&ssb->sb, m);
867 idle = (cb->s_smax == (cb->s_rack - 1));
871 off = cb->s_snxt - cb->s_rack;
872 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT));
875 * If in persist timeout with window of 0, send a probe.
876 * Otherwise, if window is small but nonzero
877 * and timer expired, send what we can and go into
880 if (cb->s_force == 1 + SPPT_PERSIST) {
882 cb->s_timer[SPPT_PERSIST] = 0;
886 span = cb->s_seq - cb->s_rack;
887 len = min(span, win) - off;
891 * Window shrank after we went into it.
892 * If window shrank to 0, cancel pending
893 * restransmission and pull s_snxt back
894 * to (closed) window. We will enter persist
895 * state below. If the widndow didn't close completely,
896 * just wait for an ACK.
900 cb->s_timer[SPPT_REXMT] = 0;
901 cb->s_snxt = cb->s_rack;
906 rcv_win = ssb_space(&so->so_rcv);
909 * Send if we owe peer an ACK.
911 if (cb->s_oobflags & SF_SOOB) {
913 * must transmit this out of band packet
915 cb->s_oobflags &= ~ SF_SOOB;
917 sppstat.spps_sndurg++;
920 if (cb->s_flags & SF_ACKNOW)
922 if (cb->s_state < TCPS_ESTABLISHED)
925 * Silly window can't happen in spp.
926 * Code from tcp deleted.
931 * Compare available window to amount of window
932 * known to peer (as advertised window less
933 * next expected input.) If the difference is at least two
934 * packets or at least 35% of the mximum possible window,
935 * then want to send a window update to peer.
938 u_short delta = 1 + cb->s_alo - cb->s_ack;
939 int adv = rcv_win - (delta * cb->s_mtu);
941 if ((so->so_rcv.ssb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
942 (100 * adv / so->so_rcv.ssb_hiwat >= 35)) {
943 sppstat.spps_sndwinup++;
944 cb->s_flags |= SF_ACKNOW;
950 * Many comments from tcp_output.c are appropriate here
952 * If send window is too small, there is data to transmit, and no
953 * retransmit or persist is pending, then go to persist state.
954 * If nothing happens soon, send when timer expires:
955 * if window is nonzero, transmit what we can,
956 * otherwise send a probe.
958 if (so->so_snd.ssb_cc && cb->s_timer[SPPT_REXMT] == 0 &&
959 cb->s_timer[SPPT_PERSIST] == 0) {
964 * No reason to send a packet, just return.
971 * Find requested packet.
975 cb->s_want = cb->s_snxt;
976 for (m = ssb->ssb_mb; m; m = m->m_nextpkt) {
977 si = mtod(m, struct spidp *);
978 if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
983 if (si->si_seq == cb->s_snxt)
986 sppstat.spps_sndvoid++, si = 0;
994 alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
995 if (SSEQ_LT(alo, cb->s_alo))
1000 * must make a copy of this packet for
1001 * idp_output to monkey with
1003 m = m_copy(m, 0, (int)M_COPYALL);
1007 si = mtod(m, struct spidp *);
1008 if (SSEQ_LT(si->si_seq, cb->s_smax))
1009 sppstat.spps_sndrexmitpack++;
1011 sppstat.spps_sndpack++;
1012 } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1014 * Must send an acknowledgement or a probe
1017 sppstat.spps_sndprobe++;
1018 if (cb->s_flags & SF_ACKNOW)
1019 sppstat.spps_sndacks++;
1020 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1024 * Fill in mbuf with extended SP header
1025 * and addresses and length put into network format.
1027 MH_ALIGN(m, sizeof (struct spidp));
1028 m->m_len = sizeof (*si);
1029 m->m_pkthdr.len = sizeof (*si);
1030 si = mtod(m, struct spidp *);
1031 si->si_i = *cb->s_idp;
1032 si->si_s = cb->s_shdr;
1033 si->si_seq = cb->s_smax + 1;
1034 si->si_len = htons(sizeof (*si));
1038 if (so->so_options & SO_DEBUG || traceallspps)
1039 spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1043 * Stuff checksum and output datagram.
1045 if ((si->si_cc & SP_SP) == 0) {
1046 if (cb->s_force != (1 + SPPT_PERSIST) ||
1047 cb->s_timer[SPPT_PERSIST] == 0) {
1049 * If this is a new packet and we are not currently
1050 * timing anything, time this one.
1052 if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1053 cb->s_smax = si->si_seq;
1054 if (cb->s_rtt == 0) {
1055 sppstat.spps_segstimed++;
1056 cb->s_rtseq = si->si_seq;
1061 * Set rexmt timer if not currently set,
1062 * Initial value for retransmit timer is smoothed
1063 * round-trip time + 2 * round-trip time variance.
1064 * Initialize shift counter which is used for backoff
1065 * of retransmit time.
1067 if (cb->s_timer[SPPT_REXMT] == 0 &&
1068 cb->s_snxt != cb->s_rack) {
1069 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1070 if (cb->s_timer[SPPT_PERSIST]) {
1071 cb->s_timer[SPPT_PERSIST] = 0;
1075 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1076 cb->s_smax = si->si_seq;
1078 } else if (cb->s_state < TCPS_ESTABLISHED) {
1080 cb->s_rtt = 1; /* Time initial handshake */
1081 if (cb->s_timer[SPPT_REXMT] == 0)
1082 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1086 * Do not request acks when we ack their data packets or
1087 * when we do a gratuitous window update.
1089 if (((si->si_cc & SP_SP) == 0) || cb->s_force)
1091 si->si_seq = htons(si->si_seq);
1092 si->si_alo = htons(alo);
1093 si->si_ack = htons(cb->s_ack);
1097 len = ntohs(si->si_len);
1100 si->si_sum = ns_cksum(m, len);
1102 si->si_sum = 0xffff;
1105 if (so->so_options & SO_DEBUG || traceallspps)
1106 spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1108 if (so->so_options & SO_DONTROUTE)
1109 error = ns_output(m, NULL, NS_ROUTETOIF);
1111 error = ns_output(m, &cb->s_nspcb->nsp_route, 0);
1116 sppstat.spps_sndtotal++;
1118 * Data sent (as far as we can tell).
1119 * If this advertises a larger window than any other segment,
1120 * then remember the size of the advertized window.
1121 * Any pending ACK has now been sent.
1124 cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1125 if (SSEQ_GT(alo, cb->s_alo))
1133 int spp_do_persist_panics = 0;
1136 spp_setpersist(struct sppcb *cb)
1138 int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1140 if (cb->s_timer[SPPT_REXMT] && spp_do_persist_panics)
1141 panic("spp_output REXMT");
1143 * Start/restart persistance timer.
1145 SPPT_RANGESET(cb->s_timer[SPPT_PERSIST],
1146 t*spp_backoff[cb->s_rxtshift],
1147 SPPTV_PERSMIN, SPPTV_PERSMAX);
1148 if (cb->s_rxtshift < SPP_MAXRXTSHIFT)
1153 spp_ctloutput(int req, struct socket *so, int level,
1154 int name, struct mbuf **value)
1157 struct nspcb *nsp = sotonspcb(so);
1159 int mask, error = 0;
1161 if (level != NSPROTO_SPP) {
1162 /* This will have to be changed when we do more general
1163 stacking of protocols */
1164 return (idp_ctloutput(req, so, level, name, value));
1170 cb = nstosppcb(nsp);
1177 m = m_get(MB_DONTWAIT, MT_DATA);
1182 case SO_HEADERS_ON_INPUT:
1186 case SO_HEADERS_ON_OUTPUT:
1189 m->m_len = sizeof(short);
1190 *mtod(m, short *) = cb->s_flags & mask;
1194 m->m_len = sizeof(u_short);
1195 *mtod(m, short *) = cb->s_mtu;
1198 case SO_LAST_HEADER:
1199 m->m_len = sizeof(struct sphdr);
1200 *mtod(m, struct sphdr *) = cb->s_rhdr;
1203 case SO_DEFAULT_HEADERS:
1204 m->m_len = sizeof(struct spidp);
1205 *mtod(m, struct sphdr *) = cb->s_shdr;
1215 if (value == 0 || *value == 0) {
1222 case SO_HEADERS_ON_INPUT:
1226 case SO_HEADERS_ON_OUTPUT:
1229 if (cb->s_flags & SF_PI) {
1230 ok = mtod(*value, int *);
1232 cb->s_flags |= mask;
1234 cb->s_flags &= ~mask;
1235 } else error = EINVAL;
1239 cb->s_mtu = *(mtod(*value, u_short *));
1244 ok = mtod(*value, int *);
1246 cb->s_flags2 |= SF_NEWCALL;
1249 cb->s_flags2 &= ~SF_NEWCALL;
1255 case SO_DEFAULT_HEADERS:
1258 = mtod(*value, struct sphdr *);
1259 cb->s_dt = sp->sp_dt;
1260 cb->s_cc = sp->sp_cc & SP_EM;
1275 * SPP_USRREQ PROCEDURES
1279 spp_usr_abort(struct socket *so)
1281 struct nspcb *nsp = sotonspcb(so);
1286 cb = nstosppcb(nsp);
1287 spp_drop(cb, ECONNABORTED);
1296 spp_accept(struct socket *so, struct sockaddr **nam)
1298 struct nspcb *nsp = sotonspcb(so);
1300 struct sockaddr_ns sns;
1304 cb = nstosppcb(nsp);
1305 bzero(&sns, sizeof(sns));
1306 sns.sns_family = AF_NS;
1307 sns.sns_addr = nsp->nsp_faddr;
1308 *nam = dup_sockaddr((struct sockaddr *)&sns);
1317 spp_attach(struct socket *so, int proto, struct pru_attach_info *ai)
1319 struct nspcb *nsp = sotonspcb(so);
1321 struct signalsockbuf *ssb;
1326 if ((error = ns_pcballoc(so, &nspcb)) != 0)
1328 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
1329 if ((error = soreserve(so, 3072, 3072, ai->sb_rlimit)) != 0)
1332 nsp = sotonspcb(so);
1335 cb = kmalloc(sizeof(struct sppcb), M_SPPCB, M_WAITOK|M_ZERO);
1336 cb->s_idp = kmalloc(sizeof(struct idp), M_IDP, M_WAITOK|M_ZERO);
1337 cb->s_state = TCPS_LISTEN;
1340 cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1342 cb->s_mtu = 576 - sizeof (struct spidp);
1343 cb->s_cwnd = ssb_space(ssb) * CUNIT / cb->s_mtu;
1344 cb->s_ssthresh = cb->s_cwnd;
1345 cb->s_cwmx = ssb_space(ssb) * CUNIT / (2 * sizeof (struct spidp));
1348 * Above is recomputed when connecting to account
1349 * for changed buffering or mtu's
1351 cb->s_rtt = SPPTV_SRTTBASE;
1352 cb->s_rttvar = SPPTV_SRTTDFLT << 2;
1353 SPPT_RANGESET(cb->s_rxtcur,
1354 ((SPPTV_SRTTBASE >> 2) + (SPPTV_SRTTDFLT << 2)) >> 1,
1355 SPPTV_MIN, SPPTV_REXMTMAX);
1356 nsp->nsp_pcb = (caddr_t)cb;
1361 spp_attach_sp(struct socket *so, int proto, struct pru_attach_info *ai)
1366 if ((error = spp_attach(so, proto, ai)) == 0) {
1367 nsp = sotonspcb(so);
1368 ((struct sppcb *)nsp->nsp_pcb)->s_flags |=
1369 (SF_HI | SF_HO | SF_PI);
1375 spp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1377 struct nspcb *nsp = sotonspcb(so);
1381 error = ns_pcbbind(nsp, nam);
1388 * Initiate connection to peer.
1389 * Enter SYN_SENT state, and mark socket as connecting.
1390 * Start keep-alive timer, setup prototype header,
1391 * Send initial system packet requesting connection.
1394 spp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1396 struct nspcb *nsp = sotonspcb(so);
1401 cb = nstosppcb(nsp);
1402 if (nsp->nsp_lport == 0) {
1403 if ((error = ns_pcbbind(nsp, NULL)) != 0)
1406 if ((error = ns_pcbconnect(nsp, nam)) != 0)
1409 sppstat.spps_connattempt++;
1410 cb->s_state = TCPS_SYN_SENT;
1413 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
1414 cb->s_force = 1 + SPPTV_KEEP;
1416 * Other party is required to respond to
1417 * the port I send from, but he is not
1418 * required to answer from where I am sending to,
1419 * so allow wildcarding.
1420 * original port I am sending to is still saved in
1424 error = spp_output(cb, NULL);
1432 spp_detach(struct socket *so)
1434 struct nspcb *nsp = sotonspcb(so);
1439 cb = nstosppcb(nsp);
1440 if (cb->s_state > TCPS_LISTEN)
1448 * We may decide later to implement connection closing
1449 * handshaking at the spp level optionally.
1450 * here is the hook to do it:
1453 spp_usr_disconnect(struct socket *so)
1455 struct nspcb *nsp = sotonspcb(so);
1460 cb = nstosppcb(nsp);
1470 spp_listen(struct socket *so, struct thread *td)
1472 struct nspcb *nsp = sotonspcb(so);
1477 cb = nstosppcb(nsp);
1479 if (nsp->nsp_lport == 0)
1480 error = ns_pcbbind(nsp, NULL);
1482 cb->s_state = TCPS_LISTEN;
1490 spp_peeraddr(struct socket *so, struct sockaddr **nam)
1492 struct nspcb *nsp = sotonspcb(so);
1496 ns_setpeeraddr(nsp, nam);
1505 spp_rcvd(struct socket *so, int flags)
1507 struct nspcb *nsp = sotonspcb(so);
1512 cb = nstosppcb(nsp);
1513 cb->s_flags |= SF_RVD;
1514 spp_output(cb, (struct mbuf *) 0);
1515 cb->s_flags &= ~SF_RVD;
1524 spp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1526 struct nspcb *nsp = sotonspcb(so);
1531 cb = nstosppcb(nsp);
1532 if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1533 (so->so_state & SS_RCVATMARK)) {
1535 *mtod(m, caddr_t) = cb->s_iobc;
1547 spp_send(struct socket *so, int flags, struct mbuf *m,
1548 struct sockaddr *addr, struct mbuf *control,
1551 struct nspcb *nsp = sotonspcb(so);
1556 cb = nstosppcb(nsp);
1558 if (flags & PRUS_OOB) {
1559 if (ssb_space(&so->so_snd) < -512) {
1562 cb->s_oobflags |= SF_SOOB;
1567 u_short *p = mtod(control, u_short *);
1569 /* XXXX, for testing */
1570 if ((p[0] == 5) && p[1] == 1) {
1571 cb->s_shdr.sp_dt = *(u_char *)(&p[2]);
1577 error = spp_output(cb, m);
1591 spp_shutdown(struct socket *so)
1593 struct nspcb *nsp = sotonspcb(so);
1598 cb = nstosppcb(nsp);
1600 if ((cb = spp_usrclosed(cb)) != NULL)
1601 error = spp_output(cb, NULL);
1611 spp_sockaddr(struct socket *so, struct sockaddr **nam)
1613 struct nspcb *nsp = sotonspcb(so);
1617 ns_setsockaddr(nsp, nam);
1625 struct pr_usrreqs spp_usrreqs = {
1626 .pru_abort = spp_usr_abort,
1627 .pru_accept = spp_accept,
1628 .pru_attach = spp_attach,
1629 .pru_bind = spp_bind,
1630 .pru_connect = spp_connect,
1631 .pru_connect2 = pru_connect2_notsupp,
1632 .pru_control = ns_control,
1633 .pru_detach = spp_detach,
1634 .pru_disconnect = spp_usr_disconnect,
1635 .pru_listen = spp_listen,
1636 .pru_peeraddr = spp_peeraddr,
1637 .pru_rcvd = spp_rcvd,
1638 .pru_rcvoob = spp_rcvoob,
1639 .pru_send = spp_send,
1640 .pru_sense = pru_sense_null,
1641 .pru_shutdown = spp_shutdown,
1642 .pru_sockaddr = spp_sockaddr,
1643 .pru_sosend = sosend,
1644 .pru_soreceive = soreceive,
1645 .pru_sopoll = sopoll
1648 struct pr_usrreqs spp_usrreqs_sp = {
1649 .pru_abort = spp_usr_abort,
1650 .pru_accept = spp_accept,
1651 .pru_attach = spp_attach_sp,
1652 .pru_bind = spp_bind,
1653 .pru_connect = spp_connect,
1654 .pru_connect2 = pru_connect2_notsupp,
1655 .pru_control = ns_control,
1656 .pru_detach = spp_detach,
1657 .pru_disconnect = spp_usr_disconnect,
1658 .pru_listen = spp_listen,
1659 .pru_peeraddr = spp_peeraddr,
1660 .pru_rcvd = spp_rcvd,
1661 .pru_rcvoob = spp_rcvoob,
1662 .pru_send = spp_send,
1663 .pru_sense = pru_sense_null,
1664 .pru_shutdown = spp_shutdown,
1665 .pru_sockaddr = spp_sockaddr,
1666 .pru_sosend = sosend,
1667 .pru_soreceive = soreceive,
1668 .pru_sopoll = sopoll
1672 * Create template to be used to send spp packets on a connection.
1673 * Called after host entry created, fills
1674 * in a skeletal spp header (choosing connection id),
1675 * minimizing the amount of work necessary when the connection is used.
1678 spp_template(struct sppcb *cb)
1680 struct nspcb *nsp = cb->s_nspcb;
1681 struct idp *idp = cb->s_idp;
1682 struct signalsockbuf *ssb = &(nsp->nsp_socket->so_snd);
1684 idp->idp_pt = NSPROTO_SPP;
1685 idp->idp_sna = nsp->nsp_laddr;
1686 idp->idp_dna = nsp->nsp_faddr;
1687 cb->s_sid = htons(spp_iss);
1688 spp_iss += SPP_ISSINCR/2;
1690 cb->s_cwnd = (ssb_space(ssb) * CUNIT) / cb->s_mtu;
1691 cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1693 cb->s_cwmx = (ssb_space(ssb) * CUNIT) / (2 * sizeof(struct spidp));
1694 cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1695 /* But allow for lots of little packets as well */
1699 * Close a SPIP control block:
1700 * discard spp control block itself
1701 * discard ns protocol control block
1702 * wake up any sleepers
1705 spp_close(struct sppcb *cb)
1709 struct nspcb *nsp = cb->s_nspcb;
1710 struct socket *so = nsp->nsp_socket;
1713 q = cb->s_q.si_next;
1714 while (q != &(cb->s_q)) {
1720 kfree(oq, M_SPIDP_Q);
1722 kfree(cb->s_idp, M_IDP);
1725 soisdisconnected(so);
1727 sppstat.spps_closed++;
1731 * Someday we may do level 3 handshaking
1732 * to close a connection or send a xerox style error.
1733 * For now, just close.
1736 spp_usrclosed(struct sppcb *cb)
1738 return (spp_close(cb));
1742 spp_disconnect(struct sppcb *cb)
1744 return (spp_close(cb));
1748 * Drop connection, reporting
1749 * the specified error.
1752 spp_drop(struct sppcb *cb, int error)
1754 struct socket *so = cb->s_nspcb->nsp_socket;
1757 * someday, in the xerox world
1758 * we will generate error protocol packets
1759 * announcing that the socket has gone away.
1761 if (TCPS_HAVERCVDSYN(cb->s_state)) {
1762 sppstat.spps_drops++;
1763 cb->s_state = TCPS_CLOSED;
1766 sppstat.spps_conndrops++;
1767 so->so_error = error;
1768 return (spp_close(cb));
1772 spp_abort(struct nspcb *nsp)
1774 spp_close((struct sppcb *)nsp->nsp_pcb);
1778 * Fast timeout routine for processing delayed acks
1787 nsp = nspcb.nsp_next;
1789 for (; nsp != &nspcb; nsp = nsp->nsp_next) {
1790 if ((cb = (struct sppcb *)nsp->nsp_pcb) &&
1791 (cb->s_flags & SF_DELACK)) {
1792 cb->s_flags &= ~SF_DELACK;
1793 cb->s_flags |= SF_ACKNOW;
1794 sppstat.spps_delack++;
1795 spp_output(cb, (struct mbuf *) 0);
1803 * spp protocol timeout routine called every 500 ms.
1804 * Updates the timers in all active pcb's and
1805 * causes finite state machine actions if timers expire.
1810 struct nspcb *ip, *ipnxt;
1815 * Search through tcb's and update active timers.
1818 ip = nspcb.nsp_next;
1823 while (ip != &nspcb) {
1825 ipnxt = ip->nsp_next;
1828 for (i = 0; i < SPPT_NTIMERS; i++) {
1829 if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1831 if (ipnxt->nsp_prev != ip)
1841 spp_iss += SPP_ISSINCR/PR_SLOWHZ; /* increment iss */
1845 * SPP timer processing.
1848 spp_timers(struct sppcb *cb, int timer)
1853 cb->s_force = 1 + timer;
1857 * 2 MSL timeout in shutdown went off. TCP deletes connection
1861 kprintf("spp: SPPT_2MSL went off for no reason\n");
1862 cb->s_timer[timer] = 0;
1866 * Retransmission timer went off. Message has not
1867 * been acked within retransmit interval. Back off
1868 * to a longer retransmit interval and retransmit one packet.
1871 if (++cb->s_rxtshift > SPP_MAXRXTSHIFT) {
1872 cb->s_rxtshift = SPP_MAXRXTSHIFT;
1873 sppstat.spps_timeoutdrop++;
1874 cb = spp_drop(cb, ETIMEDOUT);
1877 sppstat.spps_rexmttimeo++;
1878 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1879 rexmt *= spp_backoff[cb->s_rxtshift];
1880 SPPT_RANGESET(cb->s_rxtcur, rexmt, SPPTV_MIN, SPPTV_REXMTMAX);
1881 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1883 * If we have backed off fairly far, our srtt
1884 * estimate is probably bogus. Clobber it
1885 * so we'll take the next rtt measurement as our srtt;
1886 * move the current srtt into rttvar to keep the current
1887 * retransmit times until then.
1889 if (cb->s_rxtshift > SPP_MAXRXTSHIFT / 4 ) {
1890 cb->s_rttvar += (cb->s_srtt >> 2);
1893 cb->s_snxt = cb->s_rack;
1895 * If timing a packet, stop the timer.
1899 * See very long discussion in tcp_timer.c about congestion
1900 * window and sstrhesh
1902 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1906 cb->s_ssthresh = win * CUNIT;
1907 spp_output(cb, (struct mbuf *) 0);
1911 * Persistance timer into zero window.
1912 * Force a probe to be sent.
1915 sppstat.spps_persisttimeo++;
1917 spp_output(cb, (struct mbuf *) 0);
1921 * Keep-alive timer went off; send something
1922 * or drop connection if idle for too long.
1925 sppstat.spps_keeptimeo++;
1926 if (cb->s_state < TCPS_ESTABLISHED)
1928 if (cb->s_nspcb->nsp_socket->so_options & SO_KEEPALIVE) {
1929 if (cb->s_idle >= SPPTV_MAXIDLE)
1931 sppstat.spps_keepprobe++;
1932 spp_output(cb, (struct mbuf *) 0);
1935 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
1938 sppstat.spps_keepdrops++;
1939 cb = spp_drop(cb, ETIMEDOUT);
1945 int SppcbSize = sizeof (struct sppcb);
1946 int NspcbSize = sizeof (struct nspcb);