2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)spp_usrreq.c 8.1 (Berkeley) 6/10/93
34 * $FreeBSD: src/sys/netns/spp_usrreq.c,v 1.11 1999/08/28 00:49:53 peter Exp $
35 * $DragonFly: src/sys/netproto/ns/spp_usrreq.c,v 1.22 2007/04/22 01:13:16 dillon Exp $
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/socketvar2.h>
47 #include <sys/errno.h>
48 #include <sys/thread2.h>
51 #include <net/route.h>
52 #include <netinet/tcp_fsm.h>
61 #include "spp_timer.h"
63 #include "spp_debug.h"
65 extern u_char nsctlerrmap[]; /* from ns_input.c */
66 extern int idpcksum; /* from ns_input.c */
68 static MALLOC_DEFINE(M_IDP, "ns_idp", "NS Packet Management");
69 static MALLOC_DEFINE(M_SPIDP_Q, "ns_spidp_q", "NS Packet Management");
70 static MALLOC_DEFINE(M_SPPCB, "ns_sppcb", "NS PCB Management");
72 struct spp_istat spp_istat;
74 int spp_backoff[SPP_MAXRXTSHIFT+1] =
75 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
78 * SP protocol implementation.
83 spp_iss = 1; /* WRONG !! should fish it out of TODR */
85 struct spidp spp_savesi;
87 extern int sppconsdebug;
89 int spp_use_delack = 0;
90 u_short spp_newchecks[50];
94 spp_input(struct mbuf *m, ...)
105 nsp = __va_arg(ap, struct nspcb *);
108 sppstat.spps_rcvtotal++;
110 panic("No nspcb in spp_input");
115 if (cb == 0) goto bad;
117 if (m->m_len < sizeof(struct spidp)) {
118 if ((m = m_pullup(m, sizeof(*si))) == 0) {
119 sppstat.spps_rcvshort++;
123 si = mtod(m, struct spidp *);
124 si->si_seq = ntohs(si->si_seq);
125 si->si_ack = ntohs(si->si_ack);
126 si->si_alo = ntohs(si->si_alo);
128 so = nsp->nsp_socket;
129 if (so->so_options & SO_DEBUG || traceallspps) {
130 ostate = cb->s_state;
133 if (so->so_options & SO_ACCEPTCONN) {
134 struct sppcb *ocb = cb;
136 so = sonewconn(so, 0);
141 * This is ugly, but ....
143 * Mark socket as temporary until we're
144 * committed to keeping it. The code at
145 * ``drop'' and ``dropwithreset'' check the
146 * flag dropsocket to see if the temporary
147 * socket created here should be discarded.
148 * We mark the socket as discardable until
149 * we're committed to it below in TCPS_LISTEN.
152 nsp = (struct nspcb *)so->so_pcb;
153 nsp->nsp_laddr = si->si_dna;
155 cb->s_mtu = ocb->s_mtu; /* preserve sockopts */
156 cb->s_flags = ocb->s_flags; /* preserve sockopts */
157 cb->s_flags2 = ocb->s_flags2; /* preserve sockopts */
158 cb->s_state = TCPS_LISTEN;
162 * Packet received on connection.
163 * reset idle time and keep-alive timer;
166 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
168 switch (cb->s_state) {
172 struct sockaddr_ns *sns;
173 struct ns_addr laddr;
176 * If somebody here was carying on a conversation
177 * and went away, and his pen pal thinks he can
178 * still talk, we get the misdirected packet.
180 if (spp_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
184 am = m_get(MB_DONTWAIT, MT_SONAME);
187 am->m_len = sizeof (struct sockaddr_ns);
188 sns = mtod(am, struct sockaddr_ns *);
189 sns->sns_len = sizeof(*sns);
190 sns->sns_family = AF_NS;
191 sns->sns_addr = si->si_sna;
192 laddr = nsp->nsp_laddr;
193 if (ns_nullhost(laddr))
194 nsp->nsp_laddr = si->si_dna;
195 if (ns_pcbconnect(nsp, mtod(am, struct sockaddr *))) {
196 nsp->nsp_laddr = laddr;
203 dropsocket = 0; /* committed to socket */
204 cb->s_did = si->si_sid;
205 cb->s_rack = si->si_ack;
206 cb->s_ralo = si->si_alo;
207 #define THREEWAYSHAKE
209 cb->s_state = TCPS_SYN_RECEIVED;
210 cb->s_force = 1 + SPPT_KEEP;
211 sppstat.spps_accepts++;
212 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
216 * This state means that we have heard a response
217 * to our acceptance of their connection
218 * It is probably logically unnecessary in this
221 case TCPS_SYN_RECEIVED: {
222 if (si->si_did!=cb->s_sid) {
227 nsp->nsp_fport = si->si_sport;
228 cb->s_timer[SPPT_REXMT] = 0;
229 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
231 cb->s_state = TCPS_ESTABLISHED;
232 sppstat.spps_accepts++;
237 * This state means that we have gotten a response
238 * to our attempt to establish a connection.
239 * We fill in the data from the other side,
240 * telling us which port to respond to, instead of the well-
241 * known one we might have sent to in the first place.
242 * We also require that this is a response to our
246 if (si->si_did!=cb->s_sid) {
250 sppstat.spps_connects++;
251 cb->s_did = si->si_sid;
252 cb->s_rack = si->si_ack;
253 cb->s_ralo = si->si_alo;
254 cb->s_dport = nsp->nsp_fport = si->si_sport;
255 cb->s_timer[SPPT_REXMT] = 0;
256 cb->s_flags |= SF_ACKNOW;
258 cb->s_state = TCPS_ESTABLISHED;
259 /* Use roundtrip time of connection request for initial rtt */
261 cb->s_srtt = cb->s_rtt << 3;
262 cb->s_rttvar = cb->s_rtt << 1;
263 SPPT_RANGESET(cb->s_rxtcur,
264 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
265 SPPTV_MIN, SPPTV_REXMTMAX);
269 if (so->so_options & SO_DEBUG || traceallspps)
270 spp_trace(SA_INPUT, (u_char)ostate, cb, &spp_savesi, 0);
272 m->m_len -= sizeof (struct idp);
273 m->m_pkthdr.len -= sizeof (struct idp);
274 m->m_data += sizeof (struct idp);
276 if (spp_reass(cb, si, m)) {
279 if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
280 spp_output(cb, NULL);
281 cb->s_flags &= ~(SF_WIN|SF_RXT);
287 si->si_seq = ntohs(si->si_seq);
288 si->si_ack = ntohs(si->si_ack);
289 si->si_alo = ntohs(si->si_alo);
290 ns_error(m, NS_ERR_NOSOCK, 0);
291 if (cb->s_nspcb->nsp_socket->so_options & SO_DEBUG || traceallspps)
292 spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
297 if (cb == 0 || cb->s_nspcb->nsp_socket->so_options & SO_DEBUG ||
299 spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
303 int spprexmtthresh = 3;
306 * This is structurally similar to the tcp reassembly routine
307 * but its function is somewhat different: It merely queues
308 * packets up, and suppresses duplicates.
311 spp_reass(struct sppcb *cb, struct spidp *si, struct mbuf *si_m)
316 struct socket *so = cb->s_nspcb->nsp_socket;
317 char packetp = cb->s_flags & SF_HI;
324 * Update our news from them.
326 if (si->si_cc & SP_SA)
327 cb->s_flags |= (spp_use_delack ? SF_DELACK : SF_ACKNOW);
328 if (SSEQ_GT(si->si_alo, cb->s_ralo))
329 cb->s_flags |= SF_WIN;
330 if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
331 if ((si->si_cc & SP_SP) && cb->s_rack != (cb->s_smax + 1)) {
332 sppstat.spps_rcvdupack++;
334 * If this is a completely duplicate ack
335 * and other conditions hold, we assume
336 * a packet has been dropped and retransmit
337 * it exactly as in tcp_input().
339 if (si->si_ack != cb->s_rack ||
340 si->si_alo != cb->s_ralo)
342 else if (++cb->s_dupacks == spprexmtthresh) {
343 u_short onxt = cb->s_snxt;
344 int cwnd = cb->s_cwnd;
346 cb->s_snxt = si->si_ack;
348 cb->s_force = 1 + SPPT_REXMT;
349 spp_output(cb, NULL);
350 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
352 if (cwnd >= 4 * CUNIT)
353 cb->s_cwnd = cwnd / 2;
354 if (SSEQ_GT(onxt, cb->s_snxt))
364 * If our correspondent acknowledges data we haven't sent
365 * TCP would drop the packet after acking. We'll be a little
368 if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
369 sppstat.spps_rcvacktoomuch++;
370 si->si_ack = cb->s_smax + 1;
372 sppstat.spps_rcvackpack++;
374 * If transmit timer is running and timed sequence
375 * number was acked, update smoothed round trip time.
376 * See discussion of algorithm in tcp_input.c
378 if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
379 sppstat.spps_rttupdated++;
380 if (cb->s_srtt != 0) {
382 delta = cb->s_rtt - (cb->s_srtt >> 3);
383 if ((cb->s_srtt += delta) <= 0)
387 delta -= (cb->s_rttvar >> 2);
388 if ((cb->s_rttvar += delta) <= 0)
392 * No rtt measurement yet
394 cb->s_srtt = cb->s_rtt << 3;
395 cb->s_rttvar = cb->s_rtt << 1;
399 SPPT_RANGESET(cb->s_rxtcur,
400 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
401 SPPTV_MIN, SPPTV_REXMTMAX);
404 * If all outstanding data is acked, stop retransmit
405 * timer and remember to restart (more output or persist).
406 * If there is more data to be acked, restart retransmit
407 * timer, using current (possibly backed-off) value;
409 if (si->si_ack == cb->s_smax + 1) {
410 cb->s_timer[SPPT_REXMT] = 0;
411 cb->s_flags |= SF_RXT;
412 } else if (cb->s_timer[SPPT_PERSIST] == 0)
413 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
415 * When new data is acked, open the congestion window.
416 * If the window gives us less than ssthresh packets
417 * in flight, open exponentially (maxseg at a time).
418 * Otherwise open linearly (maxseg^2 / cwnd at a time).
421 if (cb->s_cwnd > cb->s_ssthresh)
422 incr = max(incr * incr / cb->s_cwnd, 1);
423 cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
425 * Trim Acked data from output queue.
427 while ((m = so->so_snd.ssb_mb) != NULL) {
428 if (SSEQ_LT((mtod(m, struct spidp *))->si_seq, si->si_ack))
429 sbdroprecord(&so->so_snd.sb);
434 cb->s_rack = si->si_ack;
436 if (SSEQ_LT(cb->s_snxt, cb->s_rack))
437 cb->s_snxt = cb->s_rack;
438 if (SSEQ_LT(cb->s_swl1, si->si_seq) || (cb->s_swl1 == si->si_seq &&
439 (SSEQ_LT(cb->s_swl2, si->si_ack) ||
440 (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo))))) {
441 /* keep track of pure window updates */
442 if ((si->si_cc & SP_SP) && cb->s_swl2 == si->si_ack
443 && SSEQ_LT(cb->s_ralo, si->si_alo)) {
444 sppstat.spps_rcvwinupd++;
445 sppstat.spps_rcvdupack--;
447 cb->s_ralo = si->si_alo;
448 cb->s_swl1 = si->si_seq;
449 cb->s_swl2 = si->si_ack;
450 cb->s_swnd = (1 + si->si_alo - si->si_ack);
451 if (cb->s_swnd > cb->s_smxw)
452 cb->s_smxw = cb->s_swnd;
453 cb->s_flags |= SF_WIN;
456 * If this packet number is higher than that which
457 * we have allocated refuse it, unless urgent
459 if (SSEQ_GT(si->si_seq, cb->s_alo)) {
460 if (si->si_cc & SP_SP) {
461 sppstat.spps_rcvwinprobe++;
464 sppstat.spps_rcvpackafterwin++;
465 if (si->si_cc & SP_OB) {
466 if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
467 ns_error(si_m, NS_ERR_FULLUP, 0);
469 } /* else queue this packet; */
471 /*register struct socket *so = cb->s_nspcb->nsp_socket;
472 if (so->so_state && SS_NOFDREF) {
473 ns_error(si_m, NS_ERR_NOSOCK, 0);
478 ns_error(si_m, NS_ERR_FULLUP, 0);
483 * If this is a system packet, we don't need to
484 * queue it up, and won't update acknowledge #
486 if (si->si_cc & SP_SP) {
490 * We have already seen this packet, so drop.
492 if (SSEQ_LT(si->si_seq, cb->s_ack)) {
494 sppstat.spps_rcvduppack++;
495 if (si->si_seq == cb->s_ack - 1)
500 * Loop through all packets queued up to insert in
501 * appropriate sequence.
503 for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
504 if (si->si_seq == SI(q)->si_seq) {
505 sppstat.spps_rcvduppack++;
508 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
509 sppstat.spps_rcvoopack++;
513 nq = kmalloc(sizeof(struct spidp_q), M_SPIDP_Q, M_INTNOWAIT);
518 insque(nq, q->si_prev);
521 * If this packet is urgent, inform process
523 if (si->si_cc & SP_OB) {
524 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
526 cb->s_oobflags |= SF_IOOB;
529 #define SPINC sizeof(struct sphdr)
531 * Loop through all packets queued up to update acknowledge
532 * number, and present all acknowledged data to user;
533 * If in packet interface mode, show packet headers.
535 for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
536 if (SI(q)->si_seq == cb->s_ack) {
539 if (SI(q)->si_cc & SP_OB) {
540 cb->s_oobflags &= ~SF_IOOB;
541 if (so->so_rcv.ssb_cc)
542 so->so_oobmark = so->so_rcv.ssb_cc;
544 sosetstate(so, SS_RCVATMARK);
549 kfree(nq, M_SPIDP_Q);
551 sppstat.spps_rcvpack++;
553 if (cb->s_flags2 & SF_NEWCALL) {
554 struct sphdr *sp = mtod(m, struct sphdr *);
555 u_char dt = sp->sp_dt;
557 if (dt != cb->s_rhdr.sp_dt) {
559 m_getclr(MB_DONTWAIT, MT_CONTROL);
564 cb->s_rhdr.sp_dt = dt;
565 mm->m_len = 5; /*XXX*/
568 *(u_char *)(&s[2]) = dt;
569 sbappend(&so->so_rcv.sb, mm);
572 if (sp->sp_cc & SP_OB) {
573 m_chtype(m, MT_OOBDATA);
576 soclrstate(so, SS_RCVATMARK);
581 m->m_pkthdr.len -= SPINC;
583 if ((sp->sp_cc & SP_EM) || packetp) {
584 sbappendrecord(&so->so_rcv.sb, m);
587 sbappend(&so->so_rcv.sb, m);
591 sbappendrecord(&so->so_rcv.sb, m);
593 cb->s_rhdr = *mtod(m, struct sphdr *);
596 m->m_pkthdr.len -= SPINC;
597 sbappend(&so->so_rcv.sb, m);
602 if (wakeup) sorwakeup(so);
607 spp_ctlinput(int cmd, caddr_t arg)
610 struct ns_errp *errp = 0;
612 struct sockaddr_ns *sns;
615 if (cmd < 0 || cmd > PRC_NCMDS)
617 type = NS_ERR_UNREACH_HOST;
626 case PRC_HOSTUNREACH:
627 sns = (struct sockaddr_ns *)arg;
628 if (sns->sns_family != AF_NS)
634 errp = (struct ns_errp *)arg;
635 na = &errp->ns_err_idp.idp_dna;
636 type = errp->ns_err_num;
637 type = ntohs((u_short)type);
641 case NS_ERR_UNREACH_HOST:
642 ns_pcbnotify(na, (int)nsctlerrmap[cmd], spp_abort, (long) 0);
647 nsp = ns_pcblookup(na, errp->ns_err_idp.idp_sna.x_port,
651 spp_drop((struct sppcb *)nsp->nsp_pcb,
652 (int)nsctlerrmap[cmd]);
654 idp_drop(nsp, (int)nsctlerrmap[cmd]);
659 ns_pcbnotify(na, 0, spp_quench, (long) 0);
663 * When a source quench is received, close congestion window
664 * to one packet. We will gradually open it again as we proceed.
667 spp_quench(struct nspcb *nsp)
669 struct sppcb *cb = nstosppcb(nsp);
677 spp_fixmtu(struct nspcb *nsp)
679 struct sppcb *cb = (struct sppcb *)(nsp->nsp_pcb);
683 struct signalsockbuf *ssb;
685 struct mbuf *firstbad, *m0;
689 * The notification that we have sent
690 * too much is bad news -- we will
691 * have to go through queued up so far
692 * splitting ones which are too big and
693 * reassigning sequence numbers and checksums.
694 * we should then retransmit all packets from
695 * one above the offending packet to the last one
696 * we had sent (or our allocation)
697 * then the offending one so that the any queued
698 * data at our destination will be discarded.
700 ep = (struct ns_errp *)nsp->nsp_notify_param;
701 ssb = &nsp->nsp_socket->so_snd;
702 cb->s_mtu = ep->ns_err_param;
703 badseq = ep->ns_err_idp.si_seq;
704 for (m = ssb->ssb_mb; m; m = m->m_nextpkt) {
705 si = mtod(m, struct spidp *);
706 if (si->si_seq == badseq)
712 /* calculate length */
713 for (m0 = m, len = 0; m ; m = m->m_next)
715 if (len > cb->s_mtu) {
724 spp_output(struct sppcb *cb, struct mbuf *m0)
726 struct socket *so = cb->s_nspcb->nsp_socket;
727 struct mbuf *m = NULL;
728 struct spidp *si = NULL;
729 struct signalsockbuf *ssb = &so->so_snd;
730 int len = 0, win, rcv_win;
731 short span, off, recordp = 0;
733 int error = 0, sendalot;
743 * Make sure that packet isn't too big.
745 for (m = m0; m ; m = m->m_next) {
748 if (m->m_flags & M_EOR)
751 datalen = (cb->s_flags & SF_HO) ?
752 len - sizeof (struct sphdr) : len;
754 if (cb->s_flags & SF_PI) {
758 int oldEM = cb->s_cc & SP_EM;
763 * Here we are only being called
764 * from usrreq(), so it is OK to
767 m = m_copym(m0, 0, mtu, MB_WAIT);
768 if (cb->s_flags & SF_NEWCALL) {
772 mm->m_flags &= ~M_EOR;
776 error = spp_output(cb, m);
789 * Force length even, by adding a "garbage byte" if
794 if (M_TRAILINGSPACE(m) >= 1)
797 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
804 *(mtod(m1, u_char *)) = 0;
808 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
814 * Fill in mbuf with extended SP header
815 * and addresses and length put into network format.
817 MH_ALIGN(m, sizeof (struct spidp));
818 m->m_len = sizeof (struct spidp);
820 si = mtod(m, struct spidp *);
821 si->si_i = *cb->s_idp;
822 si->si_s = cb->s_shdr;
823 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
825 if (m0->m_len < sizeof (*sh)) {
826 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
833 sh = mtod(m0, struct sphdr *);
834 si->si_dt = sh->sp_dt;
835 si->si_cc |= sh->sp_cc & SP_EM;
836 m0->m_len -= sizeof (*sh);
837 m0->m_data += sizeof (*sh);
841 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
845 if (cb->s_oobflags & SF_SOOB) {
848 * make sure OB packets convey exactly 1 byte.
849 * If the packet is 1 byte or larger, we
850 * have already guaranted there to be at least
851 * one garbage byte for the checksum, and
852 * extra bytes shouldn't hurt!
854 if (len > sizeof(*si)) {
856 len = (1 + sizeof(*si));
859 si->si_len = htons((u_short)len);
860 m->m_pkthdr.len = ((len - 1) | 1) + 1;
862 * queue stuff up for output
864 sbappendrecord(&ssb->sb, m);
868 idle = (cb->s_smax == (cb->s_rack - 1));
872 off = cb->s_snxt - cb->s_rack;
873 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT));
876 * If in persist timeout with window of 0, send a probe.
877 * Otherwise, if window is small but nonzero
878 * and timer expired, send what we can and go into
881 if (cb->s_force == 1 + SPPT_PERSIST) {
883 cb->s_timer[SPPT_PERSIST] = 0;
887 span = cb->s_seq - cb->s_rack;
888 len = min(span, win) - off;
892 * Window shrank after we went into it.
893 * If window shrank to 0, cancel pending
894 * restransmission and pull s_snxt back
895 * to (closed) window. We will enter persist
896 * state below. If the widndow didn't close completely,
897 * just wait for an ACK.
901 cb->s_timer[SPPT_REXMT] = 0;
902 cb->s_snxt = cb->s_rack;
907 rcv_win = ssb_space(&so->so_rcv);
910 * Send if we owe peer an ACK.
912 if (cb->s_oobflags & SF_SOOB) {
914 * must transmit this out of band packet
916 cb->s_oobflags &= ~ SF_SOOB;
918 sppstat.spps_sndurg++;
921 if (cb->s_flags & SF_ACKNOW)
923 if (cb->s_state < TCPS_ESTABLISHED)
926 * Silly window can't happen in spp.
927 * Code from tcp deleted.
932 * Compare available window to amount of window
933 * known to peer (as advertised window less
934 * next expected input.) If the difference is at least two
935 * packets or at least 35% of the mximum possible window,
936 * then want to send a window update to peer.
939 u_short delta = 1 + cb->s_alo - cb->s_ack;
940 int adv = rcv_win - (delta * cb->s_mtu);
942 if ((so->so_rcv.ssb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
943 (100 * adv / so->so_rcv.ssb_hiwat >= 35)) {
944 sppstat.spps_sndwinup++;
945 cb->s_flags |= SF_ACKNOW;
951 * Many comments from tcp_output.c are appropriate here
953 * If send window is too small, there is data to transmit, and no
954 * retransmit or persist is pending, then go to persist state.
955 * If nothing happens soon, send when timer expires:
956 * if window is nonzero, transmit what we can,
957 * otherwise send a probe.
959 if (so->so_snd.ssb_cc && cb->s_timer[SPPT_REXMT] == 0 &&
960 cb->s_timer[SPPT_PERSIST] == 0) {
965 * No reason to send a packet, just return.
972 * Find requested packet.
976 cb->s_want = cb->s_snxt;
977 for (m = ssb->ssb_mb; m; m = m->m_nextpkt) {
978 si = mtod(m, struct spidp *);
979 if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
984 if (si->si_seq == cb->s_snxt)
987 sppstat.spps_sndvoid++, si = 0;
995 alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
996 if (SSEQ_LT(alo, cb->s_alo))
1001 * must make a copy of this packet for
1002 * idp_output to monkey with
1004 m = m_copy(m, 0, (int)M_COPYALL);
1008 si = mtod(m, struct spidp *);
1009 if (SSEQ_LT(si->si_seq, cb->s_smax))
1010 sppstat.spps_sndrexmitpack++;
1012 sppstat.spps_sndpack++;
1013 } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1015 * Must send an acknowledgement or a probe
1018 sppstat.spps_sndprobe++;
1019 if (cb->s_flags & SF_ACKNOW)
1020 sppstat.spps_sndacks++;
1021 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1025 * Fill in mbuf with extended SP header
1026 * and addresses and length put into network format.
1028 MH_ALIGN(m, sizeof (struct spidp));
1029 m->m_len = sizeof (*si);
1030 m->m_pkthdr.len = sizeof (*si);
1031 si = mtod(m, struct spidp *);
1032 si->si_i = *cb->s_idp;
1033 si->si_s = cb->s_shdr;
1034 si->si_seq = cb->s_smax + 1;
1035 si->si_len = htons(sizeof (*si));
1039 if (so->so_options & SO_DEBUG || traceallspps)
1040 spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1044 * Stuff checksum and output datagram.
1046 if ((si->si_cc & SP_SP) == 0) {
1047 if (cb->s_force != (1 + SPPT_PERSIST) ||
1048 cb->s_timer[SPPT_PERSIST] == 0) {
1050 * If this is a new packet and we are not currently
1051 * timing anything, time this one.
1053 if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1054 cb->s_smax = si->si_seq;
1055 if (cb->s_rtt == 0) {
1056 sppstat.spps_segstimed++;
1057 cb->s_rtseq = si->si_seq;
1062 * Set rexmt timer if not currently set,
1063 * Initial value for retransmit timer is smoothed
1064 * round-trip time + 2 * round-trip time variance.
1065 * Initialize shift counter which is used for backoff
1066 * of retransmit time.
1068 if (cb->s_timer[SPPT_REXMT] == 0 &&
1069 cb->s_snxt != cb->s_rack) {
1070 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1071 if (cb->s_timer[SPPT_PERSIST]) {
1072 cb->s_timer[SPPT_PERSIST] = 0;
1076 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1077 cb->s_smax = si->si_seq;
1079 } else if (cb->s_state < TCPS_ESTABLISHED) {
1081 cb->s_rtt = 1; /* Time initial handshake */
1082 if (cb->s_timer[SPPT_REXMT] == 0)
1083 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1087 * Do not request acks when we ack their data packets or
1088 * when we do a gratuitous window update.
1090 if (((si->si_cc & SP_SP) == 0) || cb->s_force)
1092 si->si_seq = htons(si->si_seq);
1093 si->si_alo = htons(alo);
1094 si->si_ack = htons(cb->s_ack);
1098 len = ntohs(si->si_len);
1101 si->si_sum = ns_cksum(m, len);
1103 si->si_sum = 0xffff;
1106 if (so->so_options & SO_DEBUG || traceallspps)
1107 spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1109 if (so->so_options & SO_DONTROUTE)
1110 error = ns_output(m, NULL, NS_ROUTETOIF);
1112 error = ns_output(m, &cb->s_nspcb->nsp_route, 0);
1117 sppstat.spps_sndtotal++;
1119 * Data sent (as far as we can tell).
1120 * If this advertises a larger window than any other segment,
1121 * then remember the size of the advertized window.
1122 * Any pending ACK has now been sent.
1125 cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1126 if (SSEQ_GT(alo, cb->s_alo))
1134 int spp_do_persist_panics = 0;
1137 spp_setpersist(struct sppcb *cb)
1139 int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1141 if (cb->s_timer[SPPT_REXMT] && spp_do_persist_panics)
1142 panic("spp_output REXMT");
1144 * Start/restart persistance timer.
1146 SPPT_RANGESET(cb->s_timer[SPPT_PERSIST],
1147 t*spp_backoff[cb->s_rxtshift],
1148 SPPTV_PERSMIN, SPPTV_PERSMAX);
1149 if (cb->s_rxtshift < SPP_MAXRXTSHIFT)
1154 spp_ctloutput(int req, struct socket *so, int level,
1155 int name, struct mbuf **value)
1158 struct nspcb *nsp = sotonspcb(so);
1160 int mask, error = 0;
1162 if (level != NSPROTO_SPP) {
1163 /* This will have to be changed when we do more general
1164 stacking of protocols */
1165 return (idp_ctloutput(req, so, level, name, value));
1171 cb = nstosppcb(nsp);
1178 m = m_get(MB_DONTWAIT, MT_DATA);
1183 case SO_HEADERS_ON_INPUT:
1187 case SO_HEADERS_ON_OUTPUT:
1190 m->m_len = sizeof(short);
1191 *mtod(m, short *) = cb->s_flags & mask;
1195 m->m_len = sizeof(u_short);
1196 *mtod(m, short *) = cb->s_mtu;
1199 case SO_LAST_HEADER:
1200 m->m_len = sizeof(struct sphdr);
1201 *mtod(m, struct sphdr *) = cb->s_rhdr;
1204 case SO_DEFAULT_HEADERS:
1205 m->m_len = sizeof(struct spidp);
1206 *mtod(m, struct sphdr *) = cb->s_shdr;
1216 if (value == 0 || *value == 0) {
1223 case SO_HEADERS_ON_INPUT:
1227 case SO_HEADERS_ON_OUTPUT:
1230 if (cb->s_flags & SF_PI) {
1231 ok = mtod(*value, int *);
1233 cb->s_flags |= mask;
1235 cb->s_flags &= ~mask;
1236 } else error = EINVAL;
1240 cb->s_mtu = *(mtod(*value, u_short *));
1245 ok = mtod(*value, int *);
1247 cb->s_flags2 |= SF_NEWCALL;
1250 cb->s_flags2 &= ~SF_NEWCALL;
1256 case SO_DEFAULT_HEADERS:
1259 = mtod(*value, struct sphdr *);
1260 cb->s_dt = sp->sp_dt;
1261 cb->s_cc = sp->sp_cc & SP_EM;
1276 * SPP_USRREQ PROCEDURES
1280 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1281 * will sofree() it when we return.
1284 spp_usr_abort(struct socket *so)
1286 struct nspcb *nsp = sotonspcb(so);
1291 cb = nstosppcb(nsp);
1292 spp_drop(cb, ECONNABORTED);
1302 spp_accept(struct socket *so, struct sockaddr **nam)
1304 struct nspcb *nsp = sotonspcb(so);
1306 struct sockaddr_ns sns;
1310 cb = nstosppcb(nsp);
1311 bzero(&sns, sizeof(sns));
1312 sns.sns_family = AF_NS;
1313 sns.sns_addr = nsp->nsp_faddr;
1314 *nam = dup_sockaddr((struct sockaddr *)&sns);
1323 spp_attach(struct socket *so, int proto, struct pru_attach_info *ai)
1325 struct nspcb *nsp = sotonspcb(so);
1327 struct signalsockbuf *ssb;
1332 if ((error = ns_pcballoc(so, &nspcb)) != 0)
1334 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
1335 if ((error = soreserve(so, 3072, 3072, ai->sb_rlimit)) != 0)
1338 nsp = sotonspcb(so);
1341 cb = kmalloc(sizeof(struct sppcb), M_SPPCB, M_WAITOK|M_ZERO);
1342 cb->s_idp = kmalloc(sizeof(struct idp), M_IDP, M_WAITOK|M_ZERO);
1343 cb->s_state = TCPS_LISTEN;
1346 cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1348 cb->s_mtu = 576 - sizeof (struct spidp);
1349 cb->s_cwnd = ssb_space(ssb) * CUNIT / cb->s_mtu;
1350 cb->s_ssthresh = cb->s_cwnd;
1351 cb->s_cwmx = ssb_space(ssb) * CUNIT / (2 * sizeof (struct spidp));
1354 * Above is recomputed when connecting to account
1355 * for changed buffering or mtu's
1357 cb->s_rtt = SPPTV_SRTTBASE;
1358 cb->s_rttvar = SPPTV_SRTTDFLT << 2;
1359 SPPT_RANGESET(cb->s_rxtcur,
1360 ((SPPTV_SRTTBASE >> 2) + (SPPTV_SRTTDFLT << 2)) >> 1,
1361 SPPTV_MIN, SPPTV_REXMTMAX);
1362 nsp->nsp_pcb = (caddr_t)cb;
1367 spp_attach_sp(struct socket *so, int proto, struct pru_attach_info *ai)
1372 if ((error = spp_attach(so, proto, ai)) == 0) {
1373 nsp = sotonspcb(so);
1374 ((struct sppcb *)nsp->nsp_pcb)->s_flags |=
1375 (SF_HI | SF_HO | SF_PI);
1381 spp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1383 struct nspcb *nsp = sotonspcb(so);
1387 error = ns_pcbbind(nsp, nam);
1394 * Initiate connection to peer.
1395 * Enter SYN_SENT state, and mark socket as connecting.
1396 * Start keep-alive timer, setup prototype header,
1397 * Send initial system packet requesting connection.
1400 spp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1402 struct nspcb *nsp = sotonspcb(so);
1407 cb = nstosppcb(nsp);
1408 if (nsp->nsp_lport == 0) {
1409 if ((error = ns_pcbbind(nsp, NULL)) != 0)
1412 if ((error = ns_pcbconnect(nsp, nam)) != 0)
1415 sppstat.spps_connattempt++;
1416 cb->s_state = TCPS_SYN_SENT;
1419 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
1420 cb->s_force = 1 + SPPTV_KEEP;
1422 * Other party is required to respond to
1423 * the port I send from, but he is not
1424 * required to answer from where I am sending to,
1425 * so allow wildcarding.
1426 * original port I am sending to is still saved in
1430 error = spp_output(cb, NULL);
1438 spp_detach(struct socket *so)
1440 struct nspcb *nsp = sotonspcb(so);
1445 cb = nstosppcb(nsp);
1446 if (cb->s_state > TCPS_LISTEN)
1454 * We may decide later to implement connection closing
1455 * handshaking at the spp level optionally.
1456 * here is the hook to do it:
1459 spp_usr_disconnect(struct socket *so)
1461 struct nspcb *nsp = sotonspcb(so);
1466 cb = nstosppcb(nsp);
1476 spp_listen(struct socket *so, struct thread *td)
1478 struct nspcb *nsp = sotonspcb(so);
1483 cb = nstosppcb(nsp);
1485 if (nsp->nsp_lport == 0)
1486 error = ns_pcbbind(nsp, NULL);
1488 cb->s_state = TCPS_LISTEN;
1496 spp_peeraddr(struct socket *so, struct sockaddr **nam)
1498 struct nspcb *nsp = sotonspcb(so);
1502 ns_setpeeraddr(nsp, nam);
1511 spp_rcvd(struct socket *so, int flags)
1513 struct nspcb *nsp = sotonspcb(so);
1518 cb = nstosppcb(nsp);
1519 cb->s_flags |= SF_RVD;
1520 spp_output(cb, NULL);
1521 cb->s_flags &= ~SF_RVD;
1530 spp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1532 struct nspcb *nsp = sotonspcb(so);
1537 cb = nstosppcb(nsp);
1538 if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1539 (so->so_state & SS_RCVATMARK)) {
1541 *mtod(m, caddr_t) = cb->s_iobc;
1553 spp_send(struct socket *so, int flags, struct mbuf *m,
1554 struct sockaddr *addr, struct mbuf *control,
1557 struct nspcb *nsp = sotonspcb(so);
1562 cb = nstosppcb(nsp);
1564 if (flags & PRUS_OOB) {
1565 if (ssb_space(&so->so_snd) < -512) {
1568 cb->s_oobflags |= SF_SOOB;
1573 u_short *p = mtod(control, u_short *);
1575 /* XXXX, for testing */
1576 if ((p[0] == 5) && p[1] == 1) {
1577 cb->s_shdr.sp_dt = *(u_char *)(&p[2]);
1583 error = spp_output(cb, m);
1597 spp_shutdown(struct socket *so)
1599 struct nspcb *nsp = sotonspcb(so);
1604 cb = nstosppcb(nsp);
1606 if ((cb = spp_usrclosed(cb)) != NULL)
1607 error = spp_output(cb, NULL);
1617 spp_sockaddr(struct socket *so, struct sockaddr **nam)
1619 struct nspcb *nsp = sotonspcb(so);
1623 ns_setsockaddr(nsp, nam);
1631 struct pr_usrreqs spp_usrreqs = {
1632 .pru_abort = spp_usr_abort,
1633 .pru_accept = spp_accept,
1634 .pru_attach = spp_attach,
1635 .pru_bind = spp_bind,
1636 .pru_connect = spp_connect,
1637 .pru_connect2 = pru_connect2_notsupp,
1638 .pru_control = ns_control,
1639 .pru_detach = spp_detach,
1640 .pru_disconnect = spp_usr_disconnect,
1641 .pru_listen = spp_listen,
1642 .pru_peeraddr = spp_peeraddr,
1643 .pru_rcvd = spp_rcvd,
1644 .pru_rcvoob = spp_rcvoob,
1645 .pru_send = spp_send,
1646 .pru_sense = pru_sense_null,
1647 .pru_shutdown = spp_shutdown,
1648 .pru_sockaddr = spp_sockaddr,
1649 .pru_sosend = sosend,
1650 .pru_soreceive = soreceive
1653 struct pr_usrreqs spp_usrreqs_sp = {
1654 .pru_abort = spp_usr_abort,
1655 .pru_accept = spp_accept,
1656 .pru_attach = spp_attach_sp,
1657 .pru_bind = spp_bind,
1658 .pru_connect = spp_connect,
1659 .pru_connect2 = pru_connect2_notsupp,
1660 .pru_control = ns_control,
1661 .pru_detach = spp_detach,
1662 .pru_disconnect = spp_usr_disconnect,
1663 .pru_listen = spp_listen,
1664 .pru_peeraddr = spp_peeraddr,
1665 .pru_rcvd = spp_rcvd,
1666 .pru_rcvoob = spp_rcvoob,
1667 .pru_send = spp_send,
1668 .pru_sense = pru_sense_null,
1669 .pru_shutdown = spp_shutdown,
1670 .pru_sockaddr = spp_sockaddr,
1671 .pru_sosend = sosend,
1672 .pru_soreceive = soreceive
1676 * Create template to be used to send spp packets on a connection.
1677 * Called after host entry created, fills
1678 * in a skeletal spp header (choosing connection id),
1679 * minimizing the amount of work necessary when the connection is used.
1682 spp_template(struct sppcb *cb)
1684 struct nspcb *nsp = cb->s_nspcb;
1685 struct idp *idp = cb->s_idp;
1686 struct signalsockbuf *ssb = &(nsp->nsp_socket->so_snd);
1688 idp->idp_pt = NSPROTO_SPP;
1689 idp->idp_sna = nsp->nsp_laddr;
1690 idp->idp_dna = nsp->nsp_faddr;
1691 cb->s_sid = htons(spp_iss);
1692 spp_iss += SPP_ISSINCR/2;
1694 cb->s_cwnd = (ssb_space(ssb) * CUNIT) / cb->s_mtu;
1695 cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1697 cb->s_cwmx = (ssb_space(ssb) * CUNIT) / (2 * sizeof(struct spidp));
1698 cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1699 /* But allow for lots of little packets as well */
1703 * Close a SPIP control block:
1704 * discard spp control block itself
1705 * discard ns protocol control block
1706 * wake up any sleepers
1709 spp_close(struct sppcb *cb)
1713 struct nspcb *nsp = cb->s_nspcb;
1714 struct socket *so = nsp->nsp_socket;
1717 q = cb->s_q.si_next;
1718 while (q != &(cb->s_q)) {
1724 kfree(oq, M_SPIDP_Q);
1726 kfree(cb->s_idp, M_IDP);
1728 nsp->nsp_pcb = NULL;
1729 soisdisconnected(so);
1731 sppstat.spps_closed++;
1735 * Someday we may do level 3 handshaking
1736 * to close a connection or send a xerox style error.
1737 * For now, just close.
1740 spp_usrclosed(struct sppcb *cb)
1742 return (spp_close(cb));
1746 spp_disconnect(struct sppcb *cb)
1748 return (spp_close(cb));
1752 * Drop connection, reporting
1753 * the specified error.
1756 spp_drop(struct sppcb *cb, int error)
1758 struct socket *so = cb->s_nspcb->nsp_socket;
1761 * someday, in the xerox world
1762 * we will generate error protocol packets
1763 * announcing that the socket has gone away.
1765 if (TCPS_HAVERCVDSYN(cb->s_state)) {
1766 sppstat.spps_drops++;
1767 cb->s_state = TCPS_CLOSED;
1770 sppstat.spps_conndrops++;
1771 so->so_error = error;
1772 return (spp_close(cb));
1776 spp_abort(struct nspcb *nsp)
1778 spp_close((struct sppcb *)nsp->nsp_pcb);
1782 * Fast timeout routine for processing delayed acks
1791 nsp = nspcb.nsp_next;
1793 for (; nsp != &nspcb; nsp = nsp->nsp_next) {
1794 if ((cb = (struct sppcb *)nsp->nsp_pcb) &&
1795 (cb->s_flags & SF_DELACK)) {
1796 cb->s_flags &= ~SF_DELACK;
1797 cb->s_flags |= SF_ACKNOW;
1798 sppstat.spps_delack++;
1799 spp_output(cb, NULL);
1807 * spp protocol timeout routine called every 500 ms.
1808 * Updates the timers in all active pcb's and
1809 * causes finite state machine actions if timers expire.
1814 struct nspcb *ip, *ipnxt;
1819 * Search through tcb's and update active timers.
1822 ip = nspcb.nsp_next;
1827 while (ip != &nspcb) {
1829 ipnxt = ip->nsp_next;
1832 for (i = 0; i < SPPT_NTIMERS; i++) {
1833 if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1835 if (ipnxt->nsp_prev != ip)
1845 spp_iss += SPP_ISSINCR/PR_SLOWHZ; /* increment iss */
1849 * SPP timer processing.
1852 spp_timers(struct sppcb *cb, int timer)
1857 cb->s_force = 1 + timer;
1861 * 2 MSL timeout in shutdown went off. TCP deletes connection
1865 kprintf("spp: SPPT_2MSL went off for no reason\n");
1866 cb->s_timer[timer] = 0;
1870 * Retransmission timer went off. Message has not
1871 * been acked within retransmit interval. Back off
1872 * to a longer retransmit interval and retransmit one packet.
1875 if (++cb->s_rxtshift > SPP_MAXRXTSHIFT) {
1876 cb->s_rxtshift = SPP_MAXRXTSHIFT;
1877 sppstat.spps_timeoutdrop++;
1878 cb = spp_drop(cb, ETIMEDOUT);
1881 sppstat.spps_rexmttimeo++;
1882 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1883 rexmt *= spp_backoff[cb->s_rxtshift];
1884 SPPT_RANGESET(cb->s_rxtcur, rexmt, SPPTV_MIN, SPPTV_REXMTMAX);
1885 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1887 * If we have backed off fairly far, our srtt
1888 * estimate is probably bogus. Clobber it
1889 * so we'll take the next rtt measurement as our srtt;
1890 * move the current srtt into rttvar to keep the current
1891 * retransmit times until then.
1893 if (cb->s_rxtshift > SPP_MAXRXTSHIFT / 4 ) {
1894 cb->s_rttvar += (cb->s_srtt >> 2);
1897 cb->s_snxt = cb->s_rack;
1899 * If timing a packet, stop the timer.
1903 * See very long discussion in tcp_timer.c about congestion
1904 * window and sstrhesh
1906 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1910 cb->s_ssthresh = win * CUNIT;
1911 spp_output(cb, NULL);
1915 * Persistance timer into zero window.
1916 * Force a probe to be sent.
1919 sppstat.spps_persisttimeo++;
1921 spp_output(cb, NULL);
1925 * Keep-alive timer went off; send something
1926 * or drop connection if idle for too long.
1929 sppstat.spps_keeptimeo++;
1930 if (cb->s_state < TCPS_ESTABLISHED)
1932 if (cb->s_nspcb->nsp_socket->so_options & SO_KEEPALIVE) {
1933 if (cb->s_idle >= SPPTV_MAXIDLE)
1935 sppstat.spps_keepprobe++;
1936 spp_output(cb, NULL);
1939 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
1942 sppstat.spps_keepdrops++;
1943 cb = spp_drop(cb, ETIMEDOUT);
1949 int SppcbSize = sizeof (struct sppcb);
1950 int NspcbSize = sizeof (struct nspcb);