2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)spp_usrreq.c 8.1 (Berkeley) 6/10/93
34 * $FreeBSD: src/sys/netns/spp_usrreq.c,v 1.11 1999/08/28 00:49:53 peter Exp $
35 * $DragonFly: src/sys/netproto/ns/spp_usrreq.c,v 1.14 2004/07/31 07:52:58 dillon Exp $
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/errno.h>
49 #include <net/route.h>
50 #include <netinet/tcp_fsm.h>
59 #include "spp_timer.h"
61 #include "spp_debug.h"
63 extern u_char nsctlerrmap[]; /* from ns_input.c */
64 extern int idpcksum; /* from ns_input.c */
66 static MALLOC_DEFINE(M_IDP, "ns_idp", "NS Packet Management");
67 static MALLOC_DEFINE(M_SPIDP_Q, "ns_spidp_q", "NS Packet Management");
68 static MALLOC_DEFINE(M_SPPCB, "ns_sppcb", "NS PCB Management");
70 struct spp_istat spp_istat;
72 int spp_backoff[SPP_MAXRXTSHIFT+1] =
73 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
76 * SP protocol implementation.
81 spp_iss = 1; /* WRONG !! should fish it out of TODR */
83 struct spidp spp_savesi;
85 extern int sppconsdebug;
87 int spp_use_delack = 0;
88 u_short spp_newchecks[50];
92 spp_input(struct mbuf *m, struct nspcb *nsp)
101 sppstat.spps_rcvtotal++;
103 panic("No nspcb in spp_input");
108 if (cb == 0) goto bad;
110 if (m->m_len < sizeof(struct spidp)) {
111 if ((m = m_pullup(m, sizeof(*si))) == 0) {
112 sppstat.spps_rcvshort++;
116 si = mtod(m, struct spidp *);
117 si->si_seq = ntohs(si->si_seq);
118 si->si_ack = ntohs(si->si_ack);
119 si->si_alo = ntohs(si->si_alo);
121 so = nsp->nsp_socket;
122 if (so->so_options & SO_DEBUG || traceallspps) {
123 ostate = cb->s_state;
126 if (so->so_options & SO_ACCEPTCONN) {
127 struct sppcb *ocb = cb;
129 so = sonewconn(so, 0);
134 * This is ugly, but ....
136 * Mark socket as temporary until we're
137 * committed to keeping it. The code at
138 * ``drop'' and ``dropwithreset'' check the
139 * flag dropsocket to see if the temporary
140 * socket created here should be discarded.
141 * We mark the socket as discardable until
142 * we're committed to it below in TCPS_LISTEN.
145 nsp = (struct nspcb *)so->so_pcb;
146 nsp->nsp_laddr = si->si_dna;
148 cb->s_mtu = ocb->s_mtu; /* preserve sockopts */
149 cb->s_flags = ocb->s_flags; /* preserve sockopts */
150 cb->s_flags2 = ocb->s_flags2; /* preserve sockopts */
151 cb->s_state = TCPS_LISTEN;
155 * Packet received on connection.
156 * reset idle time and keep-alive timer;
159 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
161 switch (cb->s_state) {
165 struct sockaddr_ns *sns;
166 struct ns_addr laddr;
169 * If somebody here was carying on a conversation
170 * and went away, and his pen pal thinks he can
171 * still talk, we get the misdirected packet.
173 if (spp_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
177 am = m_get(MB_DONTWAIT, MT_SONAME);
180 am->m_len = sizeof (struct sockaddr_ns);
181 sns = mtod(am, struct sockaddr_ns *);
182 sns->sns_len = sizeof(*sns);
183 sns->sns_family = AF_NS;
184 sns->sns_addr = si->si_sna;
185 laddr = nsp->nsp_laddr;
186 if (ns_nullhost(laddr))
187 nsp->nsp_laddr = si->si_dna;
188 if (ns_pcbconnect(nsp, mtod(am, struct sockaddr *))) {
189 nsp->nsp_laddr = laddr;
196 dropsocket = 0; /* committed to socket */
197 cb->s_did = si->si_sid;
198 cb->s_rack = si->si_ack;
199 cb->s_ralo = si->si_alo;
200 #define THREEWAYSHAKE
202 cb->s_state = TCPS_SYN_RECEIVED;
203 cb->s_force = 1 + SPPT_KEEP;
204 sppstat.spps_accepts++;
205 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
209 * This state means that we have heard a response
210 * to our acceptance of their connection
211 * It is probably logically unnecessary in this
214 case TCPS_SYN_RECEIVED: {
215 if (si->si_did!=cb->s_sid) {
220 nsp->nsp_fport = si->si_sport;
221 cb->s_timer[SPPT_REXMT] = 0;
222 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
224 cb->s_state = TCPS_ESTABLISHED;
225 sppstat.spps_accepts++;
230 * This state means that we have gotten a response
231 * to our attempt to establish a connection.
232 * We fill in the data from the other side,
233 * telling us which port to respond to, instead of the well-
234 * known one we might have sent to in the first place.
235 * We also require that this is a response to our
239 if (si->si_did!=cb->s_sid) {
243 sppstat.spps_connects++;
244 cb->s_did = si->si_sid;
245 cb->s_rack = si->si_ack;
246 cb->s_ralo = si->si_alo;
247 cb->s_dport = nsp->nsp_fport = si->si_sport;
248 cb->s_timer[SPPT_REXMT] = 0;
249 cb->s_flags |= SF_ACKNOW;
251 cb->s_state = TCPS_ESTABLISHED;
252 /* Use roundtrip time of connection request for initial rtt */
254 cb->s_srtt = cb->s_rtt << 3;
255 cb->s_rttvar = cb->s_rtt << 1;
256 SPPT_RANGESET(cb->s_rxtcur,
257 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
258 SPPTV_MIN, SPPTV_REXMTMAX);
262 if (so->so_options & SO_DEBUG || traceallspps)
263 spp_trace(SA_INPUT, (u_char)ostate, cb, &spp_savesi, 0);
265 m->m_len -= sizeof (struct idp);
266 m->m_pkthdr.len -= sizeof (struct idp);
267 m->m_data += sizeof (struct idp);
269 if (spp_reass(cb, si, m)) {
272 if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
273 (void) spp_output(cb, (struct mbuf *)0);
274 cb->s_flags &= ~(SF_WIN|SF_RXT);
280 si->si_seq = ntohs(si->si_seq);
281 si->si_ack = ntohs(si->si_ack);
282 si->si_alo = ntohs(si->si_alo);
283 ns_error(m, NS_ERR_NOSOCK, 0);
284 if (cb->s_nspcb->nsp_socket->so_options & SO_DEBUG || traceallspps)
285 spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
290 if (cb == 0 || cb->s_nspcb->nsp_socket->so_options & SO_DEBUG ||
292 spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
296 int spprexmtthresh = 3;
299 * This is structurally similar to the tcp reassembly routine
300 * but its function is somewhat different: It merely queues
301 * packets up, and suppresses duplicates.
304 spp_reass(struct sppcb *cb, struct spidp *si, struct mbuf *si_m)
309 struct socket *so = cb->s_nspcb->nsp_socket;
310 char packetp = cb->s_flags & SF_HI;
317 * Update our news from them.
319 if (si->si_cc & SP_SA)
320 cb->s_flags |= (spp_use_delack ? SF_DELACK : SF_ACKNOW);
321 if (SSEQ_GT(si->si_alo, cb->s_ralo))
322 cb->s_flags |= SF_WIN;
323 if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
324 if ((si->si_cc & SP_SP) && cb->s_rack != (cb->s_smax + 1)) {
325 sppstat.spps_rcvdupack++;
327 * If this is a completely duplicate ack
328 * and other conditions hold, we assume
329 * a packet has been dropped and retransmit
330 * it exactly as in tcp_input().
332 if (si->si_ack != cb->s_rack ||
333 si->si_alo != cb->s_ralo)
335 else if (++cb->s_dupacks == spprexmtthresh) {
336 u_short onxt = cb->s_snxt;
337 int cwnd = cb->s_cwnd;
339 cb->s_snxt = si->si_ack;
341 cb->s_force = 1 + SPPT_REXMT;
342 (void) spp_output(cb, (struct mbuf *)0);
343 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
345 if (cwnd >= 4 * CUNIT)
346 cb->s_cwnd = cwnd / 2;
347 if (SSEQ_GT(onxt, cb->s_snxt))
357 * If our correspondent acknowledges data we haven't sent
358 * TCP would drop the packet after acking. We'll be a little
361 if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
362 sppstat.spps_rcvacktoomuch++;
363 si->si_ack = cb->s_smax + 1;
365 sppstat.spps_rcvackpack++;
367 * If transmit timer is running and timed sequence
368 * number was acked, update smoothed round trip time.
369 * See discussion of algorithm in tcp_input.c
371 if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
372 sppstat.spps_rttupdated++;
373 if (cb->s_srtt != 0) {
375 delta = cb->s_rtt - (cb->s_srtt >> 3);
376 if ((cb->s_srtt += delta) <= 0)
380 delta -= (cb->s_rttvar >> 2);
381 if ((cb->s_rttvar += delta) <= 0)
385 * No rtt measurement yet
387 cb->s_srtt = cb->s_rtt << 3;
388 cb->s_rttvar = cb->s_rtt << 1;
392 SPPT_RANGESET(cb->s_rxtcur,
393 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
394 SPPTV_MIN, SPPTV_REXMTMAX);
397 * If all outstanding data is acked, stop retransmit
398 * timer and remember to restart (more output or persist).
399 * If there is more data to be acked, restart retransmit
400 * timer, using current (possibly backed-off) value;
402 if (si->si_ack == cb->s_smax + 1) {
403 cb->s_timer[SPPT_REXMT] = 0;
404 cb->s_flags |= SF_RXT;
405 } else if (cb->s_timer[SPPT_PERSIST] == 0)
406 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
408 * When new data is acked, open the congestion window.
409 * If the window gives us less than ssthresh packets
410 * in flight, open exponentially (maxseg at a time).
411 * Otherwise open linearly (maxseg^2 / cwnd at a time).
414 if (cb->s_cwnd > cb->s_ssthresh)
415 incr = max(incr * incr / cb->s_cwnd, 1);
416 cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
418 * Trim Acked data from output queue.
420 while ((m = so->so_snd.sb_mb) != NULL) {
421 if (SSEQ_LT((mtod(m, struct spidp *))->si_seq, si->si_ack))
422 sbdroprecord(&so->so_snd);
427 cb->s_rack = si->si_ack;
429 if (SSEQ_LT(cb->s_snxt, cb->s_rack))
430 cb->s_snxt = cb->s_rack;
431 if (SSEQ_LT(cb->s_swl1, si->si_seq) || (cb->s_swl1 == si->si_seq &&
432 (SSEQ_LT(cb->s_swl2, si->si_ack) ||
433 (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo))))) {
434 /* keep track of pure window updates */
435 if ((si->si_cc & SP_SP) && cb->s_swl2 == si->si_ack
436 && SSEQ_LT(cb->s_ralo, si->si_alo)) {
437 sppstat.spps_rcvwinupd++;
438 sppstat.spps_rcvdupack--;
440 cb->s_ralo = si->si_alo;
441 cb->s_swl1 = si->si_seq;
442 cb->s_swl2 = si->si_ack;
443 cb->s_swnd = (1 + si->si_alo - si->si_ack);
444 if (cb->s_swnd > cb->s_smxw)
445 cb->s_smxw = cb->s_swnd;
446 cb->s_flags |= SF_WIN;
449 * If this packet number is higher than that which
450 * we have allocated refuse it, unless urgent
452 if (SSEQ_GT(si->si_seq, cb->s_alo)) {
453 if (si->si_cc & SP_SP) {
454 sppstat.spps_rcvwinprobe++;
457 sppstat.spps_rcvpackafterwin++;
458 if (si->si_cc & SP_OB) {
459 if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
460 ns_error(si_m, NS_ERR_FULLUP, 0);
462 } /* else queue this packet; */
464 /*register struct socket *so = cb->s_nspcb->nsp_socket;
465 if (so->so_state && SS_NOFDREF) {
466 ns_error(si_m, NS_ERR_NOSOCK, 0);
471 ns_error(si_m, NS_ERR_FULLUP, 0);
476 * If this is a system packet, we don't need to
477 * queue it up, and won't update acknowledge #
479 if (si->si_cc & SP_SP) {
483 * We have already seen this packet, so drop.
485 if (SSEQ_LT(si->si_seq, cb->s_ack)) {
487 sppstat.spps_rcvduppack++;
488 if (si->si_seq == cb->s_ack - 1)
493 * Loop through all packets queued up to insert in
494 * appropriate sequence.
496 for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
497 if (si->si_seq == SI(q)->si_seq) {
498 sppstat.spps_rcvduppack++;
501 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
502 sppstat.spps_rcvoopack++;
506 nq = malloc(sizeof(struct spidp_q), M_SPIDP_Q, M_INTNOWAIT);
511 insque(nq, q->si_prev);
514 * If this packet is urgent, inform process
516 if (si->si_cc & SP_OB) {
517 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
519 cb->s_oobflags |= SF_IOOB;
522 #define SPINC sizeof(struct sphdr)
524 * Loop through all packets queued up to update acknowledge
525 * number, and present all acknowledged data to user;
526 * If in packet interface mode, show packet headers.
528 for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
529 if (SI(q)->si_seq == cb->s_ack) {
532 if (SI(q)->si_cc & SP_OB) {
533 cb->s_oobflags &= ~SF_IOOB;
534 if (so->so_rcv.sb_cc)
535 so->so_oobmark = so->so_rcv.sb_cc;
537 so->so_state |= SS_RCVATMARK;
544 sppstat.spps_rcvpack++;
546 if (cb->s_flags2 & SF_NEWCALL) {
547 struct sphdr *sp = mtod(m, struct sphdr *);
548 u_char dt = sp->sp_dt;
550 if (dt != cb->s_rhdr.sp_dt) {
552 m_getclr(MB_DONTWAIT, MT_CONTROL);
557 cb->s_rhdr.sp_dt = dt;
558 mm->m_len = 5; /*XXX*/
561 *(u_char *)(&s[2]) = dt;
562 sbappend(&so->so_rcv, mm);
565 if (sp->sp_cc & SP_OB) {
566 m_chtype(m, MT_OOBDATA);
569 so->so_state &= ~SS_RCVATMARK;
574 m->m_pkthdr.len -= SPINC;
576 if ((sp->sp_cc & SP_EM) || packetp) {
577 sbappendrecord(&so->so_rcv, m);
580 sbappend(&so->so_rcv, m);
584 sbappendrecord(&so->so_rcv, m);
586 cb->s_rhdr = *mtod(m, struct sphdr *);
589 m->m_pkthdr.len -= SPINC;
590 sbappend(&so->so_rcv, m);
595 if (wakeup) sorwakeup(so);
600 spp_ctlinput(int cmd, caddr_t arg)
603 struct ns_errp *errp = 0;
605 struct sockaddr_ns *sns;
608 if (cmd < 0 || cmd > PRC_NCMDS)
610 type = NS_ERR_UNREACH_HOST;
619 case PRC_HOSTUNREACH:
620 sns = (struct sockaddr_ns *)arg;
621 if (sns->sns_family != AF_NS)
627 errp = (struct ns_errp *)arg;
628 na = &errp->ns_err_idp.idp_dna;
629 type = errp->ns_err_num;
630 type = ntohs((u_short)type);
634 case NS_ERR_UNREACH_HOST:
635 ns_pcbnotify(na, (int)nsctlerrmap[cmd], spp_abort, (long) 0);
640 nsp = ns_pcblookup(na, errp->ns_err_idp.idp_sna.x_port,
644 (void) spp_drop((struct sppcb *)nsp->nsp_pcb,
645 (int)nsctlerrmap[cmd]);
647 (void) idp_drop(nsp, (int)nsctlerrmap[cmd]);
652 ns_pcbnotify(na, 0, spp_quench, (long) 0);
656 * When a source quench is received, close congestion window
657 * to one packet. We will gradually open it again as we proceed.
660 spp_quench(struct nspcb *nsp)
662 struct sppcb *cb = nstosppcb(nsp);
670 spp_fixmtu(struct nspcb *nsp)
672 struct sppcb *cb = (struct sppcb *)(nsp->nsp_pcb);
678 struct mbuf *firstbad, *m0;
682 * The notification that we have sent
683 * too much is bad news -- we will
684 * have to go through queued up so far
685 * splitting ones which are too big and
686 * reassigning sequence numbers and checksums.
687 * we should then retransmit all packets from
688 * one above the offending packet to the last one
689 * we had sent (or our allocation)
690 * then the offending one so that the any queued
691 * data at our destination will be discarded.
693 ep = (struct ns_errp *)nsp->nsp_notify_param;
694 sb = &nsp->nsp_socket->so_snd;
695 cb->s_mtu = ep->ns_err_param;
696 badseq = ep->ns_err_idp.si_seq;
697 for (m = sb->sb_mb; m; m = m->m_nextpkt) {
698 si = mtod(m, struct spidp *);
699 if (si->si_seq == badseq)
705 /* calculate length */
706 for (m0 = m, len = 0; m ; m = m->m_next)
708 if (len > cb->s_mtu) {
717 spp_output(struct sppcb *cb, struct mbuf *m0)
719 struct socket *so = cb->s_nspcb->nsp_socket;
720 struct mbuf *m = NULL;
721 struct spidp *si = NULL;
722 struct sockbuf *sb = &so->so_snd;
723 int len = 0, win, rcv_win;
724 short span, off, recordp = 0;
726 int error = 0, sendalot;
736 * Make sure that packet isn't too big.
738 for (m = m0; m ; m = m->m_next) {
741 if (m->m_flags & M_EOR)
744 datalen = (cb->s_flags & SF_HO) ?
745 len - sizeof (struct sphdr) : len;
747 if (cb->s_flags & SF_PI) {
751 int oldEM = cb->s_cc & SP_EM;
756 * Here we are only being called
757 * from usrreq(), so it is OK to
760 m = m_copym(m0, 0, mtu, MB_WAIT);
761 if (cb->s_flags & SF_NEWCALL) {
765 mm->m_flags &= ~M_EOR;
769 error = spp_output(cb, m);
782 * Force length even, by adding a "garbage byte" if
787 if (M_TRAILINGSPACE(m) >= 1)
790 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
797 *(mtod(m1, u_char *)) = 0;
801 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
807 * Fill in mbuf with extended SP header
808 * and addresses and length put into network format.
810 MH_ALIGN(m, sizeof (struct spidp));
811 m->m_len = sizeof (struct spidp);
813 si = mtod(m, struct spidp *);
814 si->si_i = *cb->s_idp;
815 si->si_s = cb->s_shdr;
816 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
818 if (m0->m_len < sizeof (*sh)) {
819 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
826 sh = mtod(m0, struct sphdr *);
827 si->si_dt = sh->sp_dt;
828 si->si_cc |= sh->sp_cc & SP_EM;
829 m0->m_len -= sizeof (*sh);
830 m0->m_data += sizeof (*sh);
834 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
838 if (cb->s_oobflags & SF_SOOB) {
841 * make sure OB packets convey exactly 1 byte.
842 * If the packet is 1 byte or larger, we
843 * have already guaranted there to be at least
844 * one garbage byte for the checksum, and
845 * extra bytes shouldn't hurt!
847 if (len > sizeof(*si)) {
849 len = (1 + sizeof(*si));
852 si->si_len = htons((u_short)len);
853 m->m_pkthdr.len = ((len - 1) | 1) + 1;
855 * queue stuff up for output
857 sbappendrecord(sb, m);
861 idle = (cb->s_smax == (cb->s_rack - 1));
865 off = cb->s_snxt - cb->s_rack;
866 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT));
869 * If in persist timeout with window of 0, send a probe.
870 * Otherwise, if window is small but nonzero
871 * and timer expired, send what we can and go into
874 if (cb->s_force == 1 + SPPT_PERSIST) {
876 cb->s_timer[SPPT_PERSIST] = 0;
880 span = cb->s_seq - cb->s_rack;
881 len = min(span, win) - off;
885 * Window shrank after we went into it.
886 * If window shrank to 0, cancel pending
887 * restransmission and pull s_snxt back
888 * to (closed) window. We will enter persist
889 * state below. If the widndow didn't close completely,
890 * just wait for an ACK.
894 cb->s_timer[SPPT_REXMT] = 0;
895 cb->s_snxt = cb->s_rack;
900 rcv_win = sbspace(&so->so_rcv);
903 * Send if we owe peer an ACK.
905 if (cb->s_oobflags & SF_SOOB) {
907 * must transmit this out of band packet
909 cb->s_oobflags &= ~ SF_SOOB;
911 sppstat.spps_sndurg++;
914 if (cb->s_flags & SF_ACKNOW)
916 if (cb->s_state < TCPS_ESTABLISHED)
919 * Silly window can't happen in spp.
920 * Code from tcp deleted.
925 * Compare available window to amount of window
926 * known to peer (as advertised window less
927 * next expected input.) If the difference is at least two
928 * packets or at least 35% of the mximum possible window,
929 * then want to send a window update to peer.
932 u_short delta = 1 + cb->s_alo - cb->s_ack;
933 int adv = rcv_win - (delta * cb->s_mtu);
935 if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
936 (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
937 sppstat.spps_sndwinup++;
938 cb->s_flags |= SF_ACKNOW;
944 * Many comments from tcp_output.c are appropriate here
946 * If send window is too small, there is data to transmit, and no
947 * retransmit or persist is pending, then go to persist state.
948 * If nothing happens soon, send when timer expires:
949 * if window is nonzero, transmit what we can,
950 * otherwise send a probe.
952 if (so->so_snd.sb_cc && cb->s_timer[SPPT_REXMT] == 0 &&
953 cb->s_timer[SPPT_PERSIST] == 0) {
958 * No reason to send a packet, just return.
965 * Find requested packet.
969 cb->s_want = cb->s_snxt;
970 for (m = sb->sb_mb; m; m = m->m_nextpkt) {
971 si = mtod(m, struct spidp *);
972 if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
977 if (si->si_seq == cb->s_snxt)
980 sppstat.spps_sndvoid++, si = 0;
988 alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
989 if (SSEQ_LT(alo, cb->s_alo))
994 * must make a copy of this packet for
995 * idp_output to monkey with
997 m = m_copy(m, 0, (int)M_COPYALL);
1001 si = mtod(m, struct spidp *);
1002 if (SSEQ_LT(si->si_seq, cb->s_smax))
1003 sppstat.spps_sndrexmitpack++;
1005 sppstat.spps_sndpack++;
1006 } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1008 * Must send an acknowledgement or a probe
1011 sppstat.spps_sndprobe++;
1012 if (cb->s_flags & SF_ACKNOW)
1013 sppstat.spps_sndacks++;
1014 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1018 * Fill in mbuf with extended SP header
1019 * and addresses and length put into network format.
1021 MH_ALIGN(m, sizeof (struct spidp));
1022 m->m_len = sizeof (*si);
1023 m->m_pkthdr.len = sizeof (*si);
1024 si = mtod(m, struct spidp *);
1025 si->si_i = *cb->s_idp;
1026 si->si_s = cb->s_shdr;
1027 si->si_seq = cb->s_smax + 1;
1028 si->si_len = htons(sizeof (*si));
1032 if (so->so_options & SO_DEBUG || traceallspps)
1033 spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1037 * Stuff checksum and output datagram.
1039 if ((si->si_cc & SP_SP) == 0) {
1040 if (cb->s_force != (1 + SPPT_PERSIST) ||
1041 cb->s_timer[SPPT_PERSIST] == 0) {
1043 * If this is a new packet and we are not currently
1044 * timing anything, time this one.
1046 if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1047 cb->s_smax = si->si_seq;
1048 if (cb->s_rtt == 0) {
1049 sppstat.spps_segstimed++;
1050 cb->s_rtseq = si->si_seq;
1055 * Set rexmt timer if not currently set,
1056 * Initial value for retransmit timer is smoothed
1057 * round-trip time + 2 * round-trip time variance.
1058 * Initialize shift counter which is used for backoff
1059 * of retransmit time.
1061 if (cb->s_timer[SPPT_REXMT] == 0 &&
1062 cb->s_snxt != cb->s_rack) {
1063 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1064 if (cb->s_timer[SPPT_PERSIST]) {
1065 cb->s_timer[SPPT_PERSIST] = 0;
1069 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1070 cb->s_smax = si->si_seq;
1072 } else if (cb->s_state < TCPS_ESTABLISHED) {
1074 cb->s_rtt = 1; /* Time initial handshake */
1075 if (cb->s_timer[SPPT_REXMT] == 0)
1076 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1080 * Do not request acks when we ack their data packets or
1081 * when we do a gratuitous window update.
1083 if (((si->si_cc & SP_SP) == 0) || cb->s_force)
1085 si->si_seq = htons(si->si_seq);
1086 si->si_alo = htons(alo);
1087 si->si_ack = htons(cb->s_ack);
1091 len = ntohs(si->si_len);
1094 si->si_sum = ns_cksum(m, len);
1096 si->si_sum = 0xffff;
1099 if (so->so_options & SO_DEBUG || traceallspps)
1100 spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1102 if (so->so_options & SO_DONTROUTE)
1103 error = ns_output(m, (struct route *)0, NS_ROUTETOIF);
1105 error = ns_output(m, &cb->s_nspcb->nsp_route, 0);
1110 sppstat.spps_sndtotal++;
1112 * Data sent (as far as we can tell).
1113 * If this advertises a larger window than any other segment,
1114 * then remember the size of the advertized window.
1115 * Any pending ACK has now been sent.
1118 cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1119 if (SSEQ_GT(alo, cb->s_alo))
1127 int spp_do_persist_panics = 0;
1130 spp_setpersist(struct sppcb *cb)
1132 int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1134 if (cb->s_timer[SPPT_REXMT] && spp_do_persist_panics)
1135 panic("spp_output REXMT");
1137 * Start/restart persistance timer.
1139 SPPT_RANGESET(cb->s_timer[SPPT_PERSIST],
1140 t*spp_backoff[cb->s_rxtshift],
1141 SPPTV_PERSMIN, SPPTV_PERSMAX);
1142 if (cb->s_rxtshift < SPP_MAXRXTSHIFT)
1147 spp_ctloutput(int req, struct socket *so, int level,
1148 int name, struct mbuf **value)
1151 struct nspcb *nsp = sotonspcb(so);
1153 int mask, error = 0;
1155 if (level != NSPROTO_SPP) {
1156 /* This will have to be changed when we do more general
1157 stacking of protocols */
1158 return (idp_ctloutput(req, so, level, name, value));
1164 cb = nstosppcb(nsp);
1171 m = m_get(MB_DONTWAIT, MT_DATA);
1176 case SO_HEADERS_ON_INPUT:
1180 case SO_HEADERS_ON_OUTPUT:
1183 m->m_len = sizeof(short);
1184 *mtod(m, short *) = cb->s_flags & mask;
1188 m->m_len = sizeof(u_short);
1189 *mtod(m, short *) = cb->s_mtu;
1192 case SO_LAST_HEADER:
1193 m->m_len = sizeof(struct sphdr);
1194 *mtod(m, struct sphdr *) = cb->s_rhdr;
1197 case SO_DEFAULT_HEADERS:
1198 m->m_len = sizeof(struct spidp);
1199 *mtod(m, struct sphdr *) = cb->s_shdr;
1209 if (value == 0 || *value == 0) {
1216 case SO_HEADERS_ON_INPUT:
1220 case SO_HEADERS_ON_OUTPUT:
1223 if (cb->s_flags & SF_PI) {
1224 ok = mtod(*value, int *);
1226 cb->s_flags |= mask;
1228 cb->s_flags &= ~mask;
1229 } else error = EINVAL;
1233 cb->s_mtu = *(mtod(*value, u_short *));
1238 ok = mtod(*value, int *);
1240 cb->s_flags2 |= SF_NEWCALL;
1243 cb->s_flags2 &= ~SF_NEWCALL;
1249 case SO_DEFAULT_HEADERS:
1252 = mtod(*value, struct sphdr *);
1253 cb->s_dt = sp->sp_dt;
1254 cb->s_cc = sp->sp_cc & SP_EM;
1269 * SPP_USRREQ PROCEDURES
1273 spp_usr_abort(struct socket *so)
1275 struct nspcb *nsp = sotonspcb(so);
1280 cb = nstosppcb(nsp);
1281 spp_drop(cb, ECONNABORTED);
1290 spp_accept(struct socket *so, struct sockaddr **nam)
1292 struct nspcb *nsp = sotonspcb(so);
1294 struct sockaddr_ns sns;
1298 cb = nstosppcb(nsp);
1299 bzero(&sns, sizeof(sns));
1300 sns.sns_family = AF_NS;
1301 sns.sns_addr = nsp->nsp_faddr;
1302 *nam = dup_sockaddr((struct sockaddr *)&sns);
1311 spp_attach(struct socket *so, int proto, struct pru_attach_info *ai)
1313 struct nspcb *nsp = sotonspcb(so);
1320 if ((error = ns_pcballoc(so, &nspcb)) != 0)
1322 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1323 if ((error = soreserve(so, 3072, 3072, ai->sb_rlimit)) != 0)
1326 nsp = sotonspcb(so);
1329 cb = malloc(sizeof(struct sppcb), M_SPPCB, M_WAITOK|M_ZERO);
1330 cb->s_idp = malloc(sizeof(struct idp), M_IDP, M_WAITOK|M_ZERO);
1331 cb->s_state = TCPS_LISTEN;
1334 cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1336 cb->s_mtu = 576 - sizeof (struct spidp);
1337 cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1338 cb->s_ssthresh = cb->s_cwnd;
1339 cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof (struct spidp));
1342 * Above is recomputed when connecting to account
1343 * for changed buffering or mtu's
1345 cb->s_rtt = SPPTV_SRTTBASE;
1346 cb->s_rttvar = SPPTV_SRTTDFLT << 2;
1347 SPPT_RANGESET(cb->s_rxtcur,
1348 ((SPPTV_SRTTBASE >> 2) + (SPPTV_SRTTDFLT << 2)) >> 1,
1349 SPPTV_MIN, SPPTV_REXMTMAX);
1350 nsp->nsp_pcb = (caddr_t)cb;
1355 spp_attach_sp(struct socket *so, int proto, struct pru_attach_info *ai)
1360 if ((error = spp_attach(so, proto, ai)) == 0) {
1361 nsp = sotonspcb(so);
1362 ((struct sppcb *)nsp->nsp_pcb)->s_flags |=
1363 (SF_HI | SF_HO | SF_PI);
1369 spp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1371 struct nspcb *nsp = sotonspcb(so);
1375 error = ns_pcbbind(nsp, nam);
1382 * Initiate connection to peer.
1383 * Enter SYN_SENT state, and mark socket as connecting.
1384 * Start keep-alive timer, setup prototype header,
1385 * Send initial system packet requesting connection.
1388 spp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1390 struct nspcb *nsp = sotonspcb(so);
1395 cb = nstosppcb(nsp);
1396 if (nsp->nsp_lport == 0) {
1397 if ((error = ns_pcbbind(nsp, NULL)) != 0)
1400 if ((error = ns_pcbconnect(nsp, nam)) != 0)
1403 sppstat.spps_connattempt++;
1404 cb->s_state = TCPS_SYN_SENT;
1407 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
1408 cb->s_force = 1 + SPPTV_KEEP;
1410 * Other party is required to respond to
1411 * the port I send from, but he is not
1412 * required to answer from where I am sending to,
1413 * so allow wildcarding.
1414 * original port I am sending to is still saved in
1418 error = spp_output(cb, NULL);
1426 spp_detach(struct socket *so)
1428 struct nspcb *nsp = sotonspcb(so);
1433 cb = nstosppcb(nsp);
1434 if (cb->s_state > TCPS_LISTEN)
1442 * We may decide later to implement connection closing
1443 * handshaking at the spp level optionally.
1444 * here is the hook to do it:
1447 spp_usr_disconnect(struct socket *so)
1449 struct nspcb *nsp = sotonspcb(so);
1454 cb = nstosppcb(nsp);
1464 spp_listen(struct socket *so, struct thread *td)
1466 struct nspcb *nsp = sotonspcb(so);
1471 cb = nstosppcb(nsp);
1473 if (nsp->nsp_lport == 0)
1474 error = ns_pcbbind(nsp, NULL);
1476 cb->s_state = TCPS_LISTEN;
1484 spp_peeraddr(struct socket *so, struct sockaddr **nam)
1486 struct nspcb *nsp = sotonspcb(so);
1490 ns_setpeeraddr(nsp, nam);
1499 spp_rcvd(struct socket *so, int flags)
1501 struct nspcb *nsp = sotonspcb(so);
1506 cb = nstosppcb(nsp);
1507 cb->s_flags |= SF_RVD;
1508 spp_output(cb, (struct mbuf *) 0);
1509 cb->s_flags &= ~SF_RVD;
1518 spp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1520 struct nspcb *nsp = sotonspcb(so);
1525 cb = nstosppcb(nsp);
1526 if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1527 (so->so_state & SS_RCVATMARK)) {
1529 *mtod(m, caddr_t) = cb->s_iobc;
1541 spp_send(struct socket *so, int flags, struct mbuf *m,
1542 struct sockaddr *addr, struct mbuf *control,
1545 struct nspcb *nsp = sotonspcb(so);
1550 cb = nstosppcb(nsp);
1552 if (flags & PRUS_OOB) {
1553 if (sbspace(&so->so_snd) < -512) {
1556 cb->s_oobflags |= SF_SOOB;
1561 u_short *p = mtod(control, u_short *);
1563 /* XXXX, for testing */
1564 if ((p[0] == 5) && p[1] == 1) {
1565 cb->s_shdr.sp_dt = *(u_char *)(&p[2]);
1571 error = spp_output(cb, m);
1585 spp_shutdown(struct socket *so)
1587 struct nspcb *nsp = sotonspcb(so);
1592 cb = nstosppcb(nsp);
1594 if ((cb = spp_usrclosed(cb)) != NULL)
1595 error = spp_output(cb, NULL);
1605 spp_sockaddr(struct socket *so, struct sockaddr **nam)
1607 struct nspcb *nsp = sotonspcb(so);
1611 ns_setsockaddr(nsp, nam);
1619 struct pr_usrreqs spp_usrreqs = {
1620 spp_usr_abort, spp_accept, spp_attach, spp_bind,
1621 spp_connect, pru_connect2_notsupp, ns_control, spp_detach,
1622 spp_usr_disconnect, spp_listen, spp_peeraddr, spp_rcvd,
1623 spp_rcvoob, spp_send, pru_sense_null, spp_shutdown,
1624 spp_sockaddr, sosend, soreceive, sopoll
1627 struct pr_usrreqs spp_usrreqs_sp = {
1628 spp_usr_abort, spp_accept, spp_attach_sp, spp_bind,
1629 spp_connect, pru_connect2_notsupp, ns_control, spp_detach,
1630 spp_usr_disconnect, spp_listen, spp_peeraddr, spp_rcvd,
1631 spp_rcvoob, spp_send, pru_sense_null, spp_shutdown,
1632 spp_sockaddr, sosend, soreceive, sopoll
1636 * Create template to be used to send spp packets on a connection.
1637 * Called after host entry created, fills
1638 * in a skeletal spp header (choosing connection id),
1639 * minimizing the amount of work necessary when the connection is used.
1642 spp_template(struct sppcb *cb)
1644 struct nspcb *nsp = cb->s_nspcb;
1645 struct idp *idp = cb->s_idp;
1646 struct sockbuf *sb = &(nsp->nsp_socket->so_snd);
1648 idp->idp_pt = NSPROTO_SPP;
1649 idp->idp_sna = nsp->nsp_laddr;
1650 idp->idp_dna = nsp->nsp_faddr;
1651 cb->s_sid = htons(spp_iss);
1652 spp_iss += SPP_ISSINCR/2;
1654 cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1655 cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1657 cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spidp));
1658 cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1659 /* But allow for lots of little packets as well */
1663 * Close a SPIP control block:
1664 * discard spp control block itself
1665 * discard ns protocol control block
1666 * wake up any sleepers
1669 spp_close(struct sppcb *cb)
1673 struct nspcb *nsp = cb->s_nspcb;
1674 struct socket *so = nsp->nsp_socket;
1677 q = cb->s_q.si_next;
1678 while (q != &(cb->s_q)) {
1684 free(oq, M_SPIDP_Q);
1686 free(cb->s_idp, M_IDP);
1689 soisdisconnected(so);
1691 sppstat.spps_closed++;
1692 return ((struct sppcb *)0);
1695 * Someday we may do level 3 handshaking
1696 * to close a connection or send a xerox style error.
1697 * For now, just close.
1700 spp_usrclosed(struct sppcb *cb)
1702 return (spp_close(cb));
1706 spp_disconnect(struct sppcb *cb)
1708 return (spp_close(cb));
1712 * Drop connection, reporting
1713 * the specified error.
1716 spp_drop(struct sppcb *cb, int errno)
1718 struct socket *so = cb->s_nspcb->nsp_socket;
1721 * someday, in the xerox world
1722 * we will generate error protocol packets
1723 * announcing that the socket has gone away.
1725 if (TCPS_HAVERCVDSYN(cb->s_state)) {
1726 sppstat.spps_drops++;
1727 cb->s_state = TCPS_CLOSED;
1728 /*(void) tcp_output(cb);*/
1730 sppstat.spps_conndrops++;
1731 so->so_error = errno;
1732 return (spp_close(cb));
1736 spp_abort(struct nspcb *nsp)
1738 spp_close((struct sppcb *)nsp->nsp_pcb);
1742 * Fast timeout routine for processing delayed acks
1751 nsp = nspcb.nsp_next;
1753 for (; nsp != &nspcb; nsp = nsp->nsp_next)
1754 if ((cb = (struct sppcb *)nsp->nsp_pcb) &&
1755 (cb->s_flags & SF_DELACK)) {
1756 cb->s_flags &= ~SF_DELACK;
1757 cb->s_flags |= SF_ACKNOW;
1758 sppstat.spps_delack++;
1759 (void) spp_output(cb, (struct mbuf *) 0);
1765 * spp protocol timeout routine called every 500 ms.
1766 * Updates the timers in all active pcb's and
1767 * causes finite state machine actions if timers expire.
1772 struct nspcb *ip, *ipnxt;
1778 * Search through tcb's and update active timers.
1780 ip = nspcb.nsp_next;
1785 while (ip != &nspcb) {
1787 ipnxt = ip->nsp_next;
1790 for (i = 0; i < SPPT_NTIMERS; i++) {
1791 if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1793 if (ipnxt->nsp_prev != ip)
1803 spp_iss += SPP_ISSINCR/PR_SLOWHZ; /* increment iss */
1807 * SPP timer processing.
1810 spp_timers(struct sppcb *cb, int timer)
1815 cb->s_force = 1 + timer;
1819 * 2 MSL timeout in shutdown went off. TCP deletes connection
1823 printf("spp: SPPT_2MSL went off for no reason\n");
1824 cb->s_timer[timer] = 0;
1828 * Retransmission timer went off. Message has not
1829 * been acked within retransmit interval. Back off
1830 * to a longer retransmit interval and retransmit one packet.
1833 if (++cb->s_rxtshift > SPP_MAXRXTSHIFT) {
1834 cb->s_rxtshift = SPP_MAXRXTSHIFT;
1835 sppstat.spps_timeoutdrop++;
1836 cb = spp_drop(cb, ETIMEDOUT);
1839 sppstat.spps_rexmttimeo++;
1840 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1841 rexmt *= spp_backoff[cb->s_rxtshift];
1842 SPPT_RANGESET(cb->s_rxtcur, rexmt, SPPTV_MIN, SPPTV_REXMTMAX);
1843 cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
1845 * If we have backed off fairly far, our srtt
1846 * estimate is probably bogus. Clobber it
1847 * so we'll take the next rtt measurement as our srtt;
1848 * move the current srtt into rttvar to keep the current
1849 * retransmit times until then.
1851 if (cb->s_rxtshift > SPP_MAXRXTSHIFT / 4 ) {
1852 cb->s_rttvar += (cb->s_srtt >> 2);
1855 cb->s_snxt = cb->s_rack;
1857 * If timing a packet, stop the timer.
1861 * See very long discussion in tcp_timer.c about congestion
1862 * window and sstrhesh
1864 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1868 cb->s_ssthresh = win * CUNIT;
1869 (void) spp_output(cb, (struct mbuf *) 0);
1873 * Persistance timer into zero window.
1874 * Force a probe to be sent.
1877 sppstat.spps_persisttimeo++;
1879 (void) spp_output(cb, (struct mbuf *) 0);
1883 * Keep-alive timer went off; send something
1884 * or drop connection if idle for too long.
1887 sppstat.spps_keeptimeo++;
1888 if (cb->s_state < TCPS_ESTABLISHED)
1890 if (cb->s_nspcb->nsp_socket->so_options & SO_KEEPALIVE) {
1891 if (cb->s_idle >= SPPTV_MAXIDLE)
1893 sppstat.spps_keepprobe++;
1894 (void) spp_output(cb, (struct mbuf *) 0);
1897 cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
1900 sppstat.spps_keepdrops++;
1901 cb = spp_drop(cb, ETIMEDOUT);
1907 int SppcbSize = sizeof (struct sppcb);
1908 int NspcbSize = sizeof (struct nspcb);