Merge branch 'master' of ssh://crater.dragonflybsd.org/repository/git/dragonfly
[dragonfly.git] / sys / netproto / ipx / spx_usrreq.c
1 /*
2  * Copyright (c) 1995, Mike Mitchell
3  * Copyright (c) 1984, 1985, 1986, 1987, 1993
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by the University of
17  *      California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)spx_usrreq.h
35  *
36  * $FreeBSD: src/sys/netipx/spx_usrreq.c,v 1.27.2.1 2001/02/22 09:44:18 bp Exp $
37  * $DragonFly: src/sys/netproto/ipx/spx_usrreq.c,v 1.20 2007/04/22 01:13:15 dillon Exp $
38  */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/proc.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/thread2.h>
50
51 #include <net/route.h>
52 #include <netinet/tcp_fsm.h>
53
54 #include "ipx.h"
55 #include "ipx_pcb.h"
56 #include "ipx_var.h"
57 #include "spx.h"
58 #include "spx_timer.h"
59 #include "spx_var.h"
60 #include "spx_debug.h"
61
62 /*
63  * SPX protocol implementation.
64  */
65 static u_short  spx_iss;
66 static u_short  spx_newchecks[50];
67 static int      spx_hardnosed;
68 static int      spx_use_delack = 0;
69 static int      traceallspxs = 0;
70 static struct   spx     spx_savesi;
71 static struct   spx_istat spx_istat;
72
73 /* Following was struct spxstat spxstat; */
74 #ifndef spxstat 
75 #define spxstat spx_istat.newstats
76 #endif  
77
78 static int spx_backoff[SPX_MAXRXTSHIFT+1] =
79     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
80
81 static  struct spxpcb *spx_close(struct spxpcb *cb);
82 static  struct spxpcb *spx_disconnect(struct spxpcb *cb);
83 static  struct spxpcb *spx_drop(struct spxpcb *cb, int errno);
84 static  int spx_output(struct spxpcb *cb, struct mbuf *m0);
85 static  int spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m);
86 static  void spx_setpersist(struct spxpcb *cb);
87 static  void spx_template(struct spxpcb *cb);
88 static  struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
89 static  struct spxpcb *spx_usrclosed(struct spxpcb *cb);
90
91 static  int spx_usr_abort(struct socket *so);
92 static  int spx_accept(struct socket *so, struct sockaddr **nam);
93 static  int spx_attach(struct socket *so, int proto,
94                        struct pru_attach_info *ai);
95 static  int spx_bind(struct socket *so, struct sockaddr *nam,
96                      struct thread *td);
97 static  int spx_connect(struct socket *so, struct sockaddr *nam,
98                         struct thread *td);
99 static  int spx_detach(struct socket *so);
100 static  int spx_usr_disconnect(struct socket *so);
101 static  int spx_listen(struct socket *so, struct thread *td);
102 static  int spx_rcvd(struct socket *so, int flags);
103 static  int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
104 static  int spx_send(struct socket *so, int flags, struct mbuf *m,
105                      struct sockaddr *addr, struct mbuf *control, 
106                      struct thread *td);
107 static  int spx_shutdown(struct socket *so);
108 static  int spx_sp_attach(struct socket *so, int proto,
109                           struct pru_attach_info *ai);
110
111 struct  pr_usrreqs spx_usrreqs = {
112         .pru_abort = spx_usr_abort,
113         .pru_accept = spx_accept,
114         .pru_attach = spx_attach,
115         .pru_bind = spx_bind,
116         .pru_connect = spx_connect,
117         .pru_connect2 = pru_connect2_notsupp,
118         .pru_control = ipx_control,
119         .pru_detach = spx_detach,
120         .pru_disconnect = spx_usr_disconnect,
121         .pru_listen = spx_listen,
122         .pru_peeraddr = ipx_peeraddr,
123         .pru_rcvd = spx_rcvd,
124         .pru_rcvoob = spx_rcvoob,
125         .pru_send = spx_send,
126         .pru_sense = pru_sense_null,
127         .pru_shutdown = spx_shutdown,
128         .pru_sockaddr = ipx_sockaddr,
129         .pru_sosend = sosend,
130         .pru_soreceive = soreceive
131 };
132
133 struct  pr_usrreqs spx_usrreq_sps = {
134         .pru_abort = spx_usr_abort,
135         .pru_accept = spx_accept,
136         .pru_attach = spx_sp_attach,
137         .pru_bind = spx_bind,
138         .pru_connect = spx_connect,
139         .pru_connect2 = pru_connect2_notsupp,
140         .pru_control = ipx_control,
141         .pru_detach = spx_detach,
142         .pru_disconnect = spx_usr_disconnect,
143         .pru_listen = spx_listen,
144         .pru_peeraddr = ipx_peeraddr,
145         .pru_rcvd = spx_rcvd,
146         .pru_rcvoob = spx_rcvoob,
147         .pru_send = spx_send,
148         .pru_sense = pru_sense_null,
149         .pru_shutdown = spx_shutdown,
150         .pru_sockaddr = ipx_sockaddr,
151         .pru_sosend = sosend,
152         .pru_soreceive = soreceive
153 };
154
155 static MALLOC_DEFINE(M_SPX_Q, "ipx_spx_q", "IPX Packet Management");
156
157 void
158 spx_init(void)
159 {
160
161         spx_iss = 1; /* WRONG !! should fish it out of TODR */
162 }
163
164 void
165 spx_input(struct mbuf *m, struct ipxpcb *ipxp)
166 {
167         struct spxpcb *cb;
168         struct spx *si;
169         struct socket *so;
170         int dropsocket = 0;
171         short ostate = 0;
172
173         spxstat.spxs_rcvtotal++;
174         if (ipxp == NULL) {
175                 panic("No ipxpcb in spx_input\n");
176                 return;
177         }
178
179         cb = ipxtospxpcb(ipxp);
180         if (cb == NULL)
181                 goto bad;
182
183         if (m->m_len < sizeof(struct spx)) {
184                 if ((m = m_pullup(m, sizeof(*si))) == NULL) {
185                         spxstat.spxs_rcvshort++;
186                         return;
187                 }
188         }
189         si = mtod(m, struct spx *);
190         si->si_seq = ntohs(si->si_seq);
191         si->si_ack = ntohs(si->si_ack);
192         si->si_alo = ntohs(si->si_alo);
193
194         so = ipxp->ipxp_socket;
195
196         if (so->so_options & SO_DEBUG || traceallspxs) {
197                 ostate = cb->s_state;
198                 spx_savesi = *si;
199         }
200         if (so->so_options & SO_ACCEPTCONN) {
201                 struct spxpcb *ocb = cb;
202
203                 so = sonewconn(so, 0);
204                 if (so == NULL) {
205                         goto drop;
206                 }
207                 /*
208                  * This is ugly, but ....
209                  *
210                  * Mark socket as temporary until we're
211                  * committed to keeping it.  The code at
212                  * ``drop'' and ``dropwithreset'' check the
213                  * flag dropsocket to see if the temporary
214                  * socket created here should be discarded.
215                  * We mark the socket as discardable until
216                  * we're committed to it below in TCPS_LISTEN.
217                  */
218                 dropsocket++;
219                 ipxp = (struct ipxpcb *)so->so_pcb;
220                 ipxp->ipxp_laddr = si->si_dna;
221                 cb = ipxtospxpcb(ipxp);
222                 cb->s_mtu = ocb->s_mtu;         /* preserve sockopts */
223                 cb->s_flags = ocb->s_flags;     /* preserve sockopts */
224                 cb->s_flags2 = ocb->s_flags2;   /* preserve sockopts */
225                 cb->s_state = TCPS_LISTEN;
226         }
227
228         /*
229          * Packet received on connection.
230          * reset idle time and keep-alive timer;
231          */
232         cb->s_idle = 0;
233         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
234
235         switch (cb->s_state) {
236
237         case TCPS_LISTEN:{
238                 struct sockaddr_ipx *sipx, ssipx;
239                 struct ipx_addr laddr;
240
241                 /*
242                  * If somebody here was carying on a conversation
243                  * and went away, and his pen pal thinks he can
244                  * still talk, we get the misdirected packet.
245                  */
246                 if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
247                         spx_istat.gonawy++;
248                         goto dropwithreset;
249                 }
250                 sipx = &ssipx;
251                 bzero(sipx, sizeof *sipx);
252                 sipx->sipx_len = sizeof(*sipx);
253                 sipx->sipx_family = AF_IPX;
254                 sipx->sipx_addr = si->si_sna;
255                 laddr = ipxp->ipxp_laddr;
256                 if (ipx_nullhost(laddr))
257                         ipxp->ipxp_laddr = si->si_dna;
258                 if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
259                         ipxp->ipxp_laddr = laddr;
260                         spx_istat.noconn++;
261                         goto drop;
262                 }
263                 spx_template(cb);
264                 dropsocket = 0;         /* committed to socket */
265                 cb->s_did = si->si_sid;
266                 cb->s_rack = si->si_ack;
267                 cb->s_ralo = si->si_alo;
268 #define THREEWAYSHAKE
269 #ifdef THREEWAYSHAKE
270                 cb->s_state = TCPS_SYN_RECEIVED;
271                 cb->s_force = 1 + SPXT_KEEP;
272                 spxstat.spxs_accepts++;
273                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
274                 }
275                 break;
276         /*
277          * This state means that we have heard a response
278          * to our acceptance of their connection
279          * It is probably logically unnecessary in this
280          * implementation.
281          */
282          case TCPS_SYN_RECEIVED: {
283                 if (si->si_did != cb->s_sid) {
284                         spx_istat.wrncon++;
285                         goto drop;
286                 }
287 #endif
288                 ipxp->ipxp_fport =  si->si_sport;
289                 cb->s_timer[SPXT_REXMT] = 0;
290                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
291                 soisconnected(so);
292                 cb->s_state = TCPS_ESTABLISHED;
293                 spxstat.spxs_accepts++;
294                 }
295                 break;
296
297         /*
298          * This state means that we have gotten a response
299          * to our attempt to establish a connection.
300          * We fill in the data from the other side,
301          * telling us which port to respond to, instead of the well-
302          * known one we might have sent to in the first place.
303          * We also require that this is a response to our
304          * connection id.
305          */
306         case TCPS_SYN_SENT:
307                 if (si->si_did != cb->s_sid) {
308                         spx_istat.notme++;
309                         goto drop;
310                 }
311                 spxstat.spxs_connects++;
312                 cb->s_did = si->si_sid;
313                 cb->s_rack = si->si_ack;
314                 cb->s_ralo = si->si_alo;
315                 cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
316                 cb->s_timer[SPXT_REXMT] = 0;
317                 cb->s_flags |= SF_ACKNOW;
318                 soisconnected(so);
319                 cb->s_state = TCPS_ESTABLISHED;
320                 /* Use roundtrip time of connection request for initial rtt */
321                 if (cb->s_rtt) {
322                         cb->s_srtt = cb->s_rtt << 3;
323                         cb->s_rttvar = cb->s_rtt << 1;
324                         SPXT_RANGESET(cb->s_rxtcur,
325                             ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
326                             SPXTV_MIN, SPXTV_REXMTMAX);
327                             cb->s_rtt = 0;
328                 }
329         }
330         if (so->so_options & SO_DEBUG || traceallspxs)
331                 spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
332
333         m->m_len -= sizeof(struct ipx);
334         m->m_pkthdr.len -= sizeof(struct ipx);
335         m->m_data += sizeof(struct ipx);
336
337         if (spx_reass(cb, si, m)) {
338                 m_freem(m);
339         }
340         if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
341                 spx_output(cb, NULL);
342         cb->s_flags &= ~(SF_WIN|SF_RXT);
343         return;
344
345 dropwithreset:
346         if (dropsocket)
347                 soabort(so);
348         si->si_seq = ntohs(si->si_seq);
349         si->si_ack = ntohs(si->si_ack);
350         si->si_alo = ntohs(si->si_alo);
351         m_freem(m);
352         if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
353                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
354         return;
355
356 drop:
357 bad:
358         if (cb == 0 || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
359             traceallspxs)
360                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
361         m_freem(m);
362 }
363
364 static int spxrexmtthresh = 3;
365
366 /*
367  * This is structurally similar to the tcp reassembly routine
368  * but its function is somewhat different:  It merely queues
369  * packets up, and suppresses duplicates.
370  */
371 static int
372 spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m)
373 {
374         struct spx_q *q;
375         struct spx_q *nq;
376         struct mbuf *m;
377         struct socket *so = cb->s_ipxpcb->ipxp_socket;
378         char packetp = cb->s_flags & SF_HI;
379         int incr;
380         char wakeup = 0;
381
382         if (si == NULL)
383                 goto present;
384         /*
385          * Update our news from them.
386          */
387         if (si->si_cc & SPX_SA)
388                 cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
389         if (SSEQ_GT(si->si_alo, cb->s_ralo))
390                 cb->s_flags |= SF_WIN;
391         if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
392                 if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
393                         spxstat.spxs_rcvdupack++;
394                         /*
395                          * If this is a completely duplicate ack
396                          * and other conditions hold, we assume
397                          * a packet has been dropped and retransmit
398                          * it exactly as in tcp_input().
399                          */
400                         if (si->si_ack != cb->s_rack ||
401                             si->si_alo != cb->s_ralo)
402                                 cb->s_dupacks = 0;
403                         else if (++cb->s_dupacks == spxrexmtthresh) {
404                                 u_short onxt = cb->s_snxt;
405                                 int cwnd = cb->s_cwnd;
406
407                                 cb->s_snxt = si->si_ack;
408                                 cb->s_cwnd = CUNIT;
409                                 cb->s_force = 1 + SPXT_REXMT;
410                                 spx_output(cb, NULL);
411                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
412                                 cb->s_rtt = 0;
413                                 if (cwnd >= 4 * CUNIT)
414                                         cb->s_cwnd = cwnd / 2;
415                                 if (SSEQ_GT(onxt, cb->s_snxt))
416                                         cb->s_snxt = onxt;
417                                 return (1);
418                         }
419                 } else
420                         cb->s_dupacks = 0;
421                 goto update_window;
422         }
423         cb->s_dupacks = 0;
424         /*
425          * If our correspondent acknowledges data we haven't sent
426          * TCP would drop the packet after acking.  We'll be a little
427          * more permissive
428          */
429         if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
430                 spxstat.spxs_rcvacktoomuch++;
431                 si->si_ack = cb->s_smax + 1;
432         }
433         spxstat.spxs_rcvackpack++;
434         /*
435          * If transmit timer is running and timed sequence
436          * number was acked, update smoothed round trip time.
437          * See discussion of algorithm in tcp_input.c
438          */
439         if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
440                 spxstat.spxs_rttupdated++;
441                 if (cb->s_srtt != 0) {
442                         short delta;
443                         delta = cb->s_rtt - (cb->s_srtt >> 3);
444                         if ((cb->s_srtt += delta) <= 0)
445                                 cb->s_srtt = 1;
446                         if (delta < 0)
447                                 delta = -delta;
448                         delta -= (cb->s_rttvar >> 2);
449                         if ((cb->s_rttvar += delta) <= 0)
450                                 cb->s_rttvar = 1;
451                 } else {
452                         /*
453                          * No rtt measurement yet
454                          */
455                         cb->s_srtt = cb->s_rtt << 3;
456                         cb->s_rttvar = cb->s_rtt << 1;
457                 }
458                 cb->s_rtt = 0;
459                 cb->s_rxtshift = 0;
460                 SPXT_RANGESET(cb->s_rxtcur,
461                         ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
462                         SPXTV_MIN, SPXTV_REXMTMAX);
463         }
464         /*
465          * If all outstanding data is acked, stop retransmit
466          * timer and remember to restart (more output or persist).
467          * If there is more data to be acked, restart retransmit
468          * timer, using current (possibly backed-off) value;
469          */
470         if (si->si_ack == cb->s_smax + 1) {
471                 cb->s_timer[SPXT_REXMT] = 0;
472                 cb->s_flags |= SF_RXT;
473         } else if (cb->s_timer[SPXT_PERSIST] == 0)
474                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
475         /*
476          * When new data is acked, open the congestion window.
477          * If the window gives us less than ssthresh packets
478          * in flight, open exponentially (maxseg at a time).
479          * Otherwise open linearly (maxseg^2 / cwnd at a time).
480          */
481         incr = CUNIT;
482         if (cb->s_cwnd > cb->s_ssthresh)
483                 incr = max(incr * incr / cb->s_cwnd, 1);
484         cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
485         /*
486          * Trim Acked data from output queue.
487          */
488         while ((m = so->so_snd.ssb_mb) != NULL) {
489                 if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
490                         sbdroprecord(&so->so_snd.sb);
491                 else
492                         break;
493         }
494         sowwakeup(so);
495         cb->s_rack = si->si_ack;
496 update_window:
497         if (SSEQ_LT(cb->s_snxt, cb->s_rack))
498                 cb->s_snxt = cb->s_rack;
499         if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
500             (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
501              (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
502                 /* keep track of pure window updates */
503                 if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
504                     && SSEQ_LT(cb->s_ralo, si->si_alo)) {
505                         spxstat.spxs_rcvwinupd++;
506                         spxstat.spxs_rcvdupack--;
507                 }
508                 cb->s_ralo = si->si_alo;
509                 cb->s_swl1 = si->si_seq;
510                 cb->s_swl2 = si->si_ack;
511                 cb->s_swnd = (1 + si->si_alo - si->si_ack);
512                 if (cb->s_swnd > cb->s_smxw)
513                         cb->s_smxw = cb->s_swnd;
514                 cb->s_flags |= SF_WIN;
515         }
516         /*
517          * If this packet number is higher than that which
518          * we have allocated refuse it, unless urgent
519          */
520         if (SSEQ_GT(si->si_seq, cb->s_alo)) {
521                 if (si->si_cc & SPX_SP) {
522                         spxstat.spxs_rcvwinprobe++;
523                         return (1);
524                 } else
525                         spxstat.spxs_rcvpackafterwin++;
526                 if (si->si_cc & SPX_OB) {
527                         if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
528                                 m_freem(si_m);
529                                 return (0);
530                         } /* else queue this packet; */
531                 } else {
532                         /*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
533                         if (so->so_state && SS_NOFDREF) {
534                                 spx_close(cb);
535                         } else
536                                        would crash system*/
537                         spx_istat.notyet++;
538                         m_freem(si_m);
539                         return (0);
540                 }
541         }
542         /*
543          * If this is a system packet, we don't need to
544          * queue it up, and won't update acknowledge #
545          */
546         if (si->si_cc & SPX_SP) {
547                 return (1);
548         }
549         /*
550          * We have already seen this packet, so drop.
551          */
552         if (SSEQ_LT(si->si_seq, cb->s_ack)) {
553                 spx_istat.bdreas++;
554                 spxstat.spxs_rcvduppack++;
555                 if (si->si_seq == cb->s_ack - 1)
556                         spx_istat.lstdup++;
557                 return (1);
558         }
559         /*
560          * Loop through all packets queued up to insert in
561          * appropriate sequence.
562          */
563         for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
564                 if (si->si_seq == SI(q)->si_seq) {
565                         spxstat.spxs_rcvduppack++;
566                         return (1);
567                 }
568                 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
569                         spxstat.spxs_rcvoopack++;
570                         break;
571                 }
572         }
573         nq = kmalloc(sizeof(struct spx_q), M_SPX_Q, M_INTNOWAIT);
574         if (nq == NULL) {
575                 m_freem(si_m);
576                 return (0);
577         }
578         insque(nq, q->si_prev);
579         nq->si_mbuf = si_m;
580         /*
581          * If this packet is urgent, inform process
582          */
583         if (si->si_cc & SPX_OB) {
584                 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
585                 sohasoutofband(so);
586                 cb->s_oobflags |= SF_IOOB;
587         }
588 present:
589 #define SPINC sizeof(struct spxhdr)
590         /*
591          * Loop through all packets queued up to update acknowledge
592          * number, and present all acknowledged data to user;
593          * If in packet interface mode, show packet headers.
594          */
595         for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
596                   if (SI(q)->si_seq == cb->s_ack) {
597                         cb->s_ack++;
598                         m = q->si_mbuf;
599                         if (SI(q)->si_cc & SPX_OB) {
600                                 cb->s_oobflags &= ~SF_IOOB;
601                                 if (so->so_rcv.ssb_cc)
602                                         so->so_oobmark = so->so_rcv.ssb_cc;
603                                 else
604                                         sosetstate(so, SS_RCVATMARK);
605                         }
606                         nq = q;
607                         q = q->si_prev;
608                         remque(nq);
609                         kfree(nq, M_SPX_Q);
610                         wakeup = 1;
611                         spxstat.spxs_rcvpack++;
612 #ifdef SF_NEWCALL
613                         if (cb->s_flags2 & SF_NEWCALL) {
614                                 struct spxhdr *sp = mtod(m, struct spxhdr *);
615                                 u_char dt = sp->spx_dt;
616                                 spx_newchecks[4]++;
617                                 if (dt != cb->s_rhdr.spx_dt) {
618                                         struct mbuf *mm =
619                                            m_getclr(MB_DONTWAIT, MT_CONTROL);
620                                         spx_newchecks[0]++;
621                                         if (mm != NULL) {
622                                                 u_short *s =
623                                                         mtod(mm, u_short *);
624                                                 cb->s_rhdr.spx_dt = dt;
625                                                 mm->m_len = 5; /*XXX*/
626                                                 s[0] = 5;
627                                                 s[1] = 1;
628                                                 *(u_char *)(&s[2]) = dt;
629                                                 sbappend(&so->so_rcv.sb, mm);
630                                         }
631                                 }
632                                 if (sp->spx_cc & SPX_OB) {
633                                         m_chtype(m, MT_OOBDATA);
634                                         spx_newchecks[1]++;
635                                         so->so_oobmark = 0;
636                                         soclrstate(so, SS_RCVATMARK);
637                                 }
638                                 if (packetp == 0) {
639                                         m->m_data += SPINC;
640                                         m->m_len -= SPINC;
641                                         m->m_pkthdr.len -= SPINC;
642                                 }
643                                 if ((sp->spx_cc & SPX_EM) || packetp) {
644                                         sbappendrecord(&so->so_rcv.sb, m);
645                                         spx_newchecks[9]++;
646                                 } else
647                                         sbappend(&so->so_rcv.sb, m);
648                         } else
649 #endif
650                         if (packetp) {
651                                 sbappendrecord(&so->so_rcv.sb, m);
652                         } else {
653                                 cb->s_rhdr = *mtod(m, struct spxhdr *);
654                                 m->m_data += SPINC;
655                                 m->m_len -= SPINC;
656                                 m->m_pkthdr.len -= SPINC;
657                                 sbappend(&so->so_rcv.sb, m);
658                         }
659                   } else
660                         break;
661         }
662         if (wakeup)
663                 sorwakeup(so);
664         return (0);
665 }
666
667 void
668 spx_ctlinput(int cmd, struct sockaddr *arg_as_sa, void *dummy)
669 {
670         caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
671         struct ipx_addr *na;
672         struct sockaddr_ipx *sipx;
673
674         if (cmd < 0 || cmd > PRC_NCMDS)
675                 return;
676
677         switch (cmd) {
678
679         case PRC_ROUTEDEAD:
680                 return;
681
682         case PRC_IFDOWN:
683         case PRC_HOSTDEAD:
684         case PRC_HOSTUNREACH:
685                 sipx = (struct sockaddr_ipx *)arg;
686                 if (sipx->sipx_family != AF_IPX)
687                         return;
688                 na = &sipx->sipx_addr;
689                 break;
690
691         default:
692                 break;
693         }
694 }
695
696 #ifdef notdef
697 int
698 spx_fixmtu(struct ipxpcb *ipxp)
699 {
700         struct spxpcb *cb = (struct spxpcb *)(ipxp->ipxp_pcb);
701         struct mbuf *m;
702         struct spx *si;
703         struct ipx_errp *ep;
704         struct signalsockbuf *ssb;
705         int badseq, len;
706         struct mbuf *firstbad, *m0;
707
708         if (cb != NULL) {
709                 /* 
710                  * The notification that we have sent
711                  * too much is bad news -- we will
712                  * have to go through queued up so far
713                  * splitting ones which are too big and
714                  * reassigning sequence numbers and checksums.
715                  * we should then retransmit all packets from
716                  * one above the offending packet to the last one
717                  * we had sent (or our allocation)
718                  * then the offending one so that the any queued
719                  * data at our destination will be discarded.
720                  */
721                  ep = (struct ipx_errp *)ipxp->ipxp_notify_param;
722                  ssb = &ipxp->ipxp_socket->so_snd;
723                  cb->s_mtu = ep->ipx_err_param;
724                  badseq = ep->ipx_err_ipx.si_seq;
725                  for (m = ssb->ssb_mb; m != NULL; m = m->m_nextpkt) {
726                         si = mtod(m, struct spx *);
727                         if (si->si_seq == badseq)
728                                 break;
729                  }
730                  if (m == NULL)
731                         return;
732                  firstbad = m;
733                  /*for (;;) {*/
734                         /* calculate length */
735                         for (m0 = m, len = 0; m != NULL; m = m->m_next)
736                                 len += m->m_len;
737                         if (len > cb->s_mtu) {
738                         }
739                 /* FINISH THIS
740                 } */
741         }
742 }
743 #endif
744
745 static int
746 spx_output(struct spxpcb *cb, struct mbuf *m0)
747 {
748         struct socket *so = cb->s_ipxpcb->ipxp_socket;
749         struct mbuf *m = NULL;
750         struct spx *si = NULL;
751         struct signalsockbuf *ssb = &so->so_snd;
752         int len = 0, win, rcv_win;
753         short span, off, recordp = 0;
754         u_short alo;
755         int error = 0, sendalot;
756 #ifdef notdef
757         int idle;
758 #endif
759         struct mbuf *mprev;
760
761         if (m0 != NULL) {
762                 int mtu = cb->s_mtu;
763                 int datalen;
764                 /*
765                  * Make sure that packet isn't too big.
766                  */
767                 for (m = m0; m != NULL; m = m->m_next) {
768                         mprev = m;
769                         len += m->m_len;
770                         if (m->m_flags & M_EOR)
771                                 recordp = 1;
772                 }
773                 datalen = (cb->s_flags & SF_HO) ?
774                                 len - sizeof(struct spxhdr) : len;
775                 if (datalen > mtu) {
776                         if (cb->s_flags & SF_PI) {
777                                 m_freem(m0);
778                                 return (EMSGSIZE);
779                         } else {
780                                 int oldEM = cb->s_cc & SPX_EM;
781
782                                 cb->s_cc &= ~SPX_EM;
783                                 while (len > mtu) {
784                                         /*
785                                          * Here we are only being called
786                                          * from usrreq(), so it is OK to
787                                          * block.
788                                          */
789                                         m = m_copym(m0, 0, mtu, MB_WAIT);
790                                         if (cb->s_flags & SF_NEWCALL) {
791                                             struct mbuf *mm = m;
792                                             spx_newchecks[7]++;
793                                             while (mm != NULL) {
794                                                 mm->m_flags &= ~M_EOR;
795                                                 mm = mm->m_next;
796                                             }
797                                         }
798                                         error = spx_output(cb, m);
799                                         if (error) {
800                                                 cb->s_cc |= oldEM;
801                                                 m_freem(m0);
802                                                 return (error);
803                                         }
804                                         m_adj(m0, mtu);
805                                         len -= mtu;
806                                 }
807                                 cb->s_cc |= oldEM;
808                         }
809                 }
810                 /*
811                  * Force length even, by adding a "garbage byte" if
812                  * necessary.
813                  */
814                 if (len & 1) {
815                         m = mprev;
816                         if (M_TRAILINGSPACE(m) >= 1)
817                                 m->m_len++;
818                         else {
819                                 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
820
821                                 if (m1 == NULL) {
822                                         m_freem(m0);
823                                         return (ENOBUFS);
824                                 }
825                                 m1->m_len = 1;
826                                 *(mtod(m1, u_char *)) = 0;
827                                 m->m_next = m1;
828                         }
829                 }
830                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
831                 if (m == NULL) {
832                         m_freem(m0);
833                         return (ENOBUFS);
834                 }
835                 /*
836                  * Fill in mbuf with extended SP header
837                  * and addresses and length put into network format.
838                  */
839                 MH_ALIGN(m, sizeof(struct spx));
840                 m->m_len = sizeof(struct spx);
841                 m->m_next = m0;
842                 si = mtod(m, struct spx *);
843                 si->si_i = *cb->s_ipx;
844                 si->si_s = cb->s_shdr;
845                 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
846                         struct spxhdr *sh;
847                         if (m0->m_len < sizeof(*sh)) {
848                                 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
849                                         m_free(m);
850                                         m_freem(m0);
851                                         return (EINVAL);
852                                 }
853                                 m->m_next = m0;
854                         }
855                         sh = mtod(m0, struct spxhdr *);
856                         si->si_dt = sh->spx_dt;
857                         si->si_cc |= sh->spx_cc & SPX_EM;
858                         m0->m_len -= sizeof(*sh);
859                         m0->m_data += sizeof(*sh);
860                         len -= sizeof(*sh);
861                 }
862                 len += sizeof(*si);
863                 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
864                         si->si_cc |= SPX_EM;
865                         spx_newchecks[8]++;
866                 }
867                 if (cb->s_oobflags & SF_SOOB) {
868                         /*
869                          * Per jqj@cornell:
870                          * make sure OB packets convey exactly 1 byte.
871                          * If the packet is 1 byte or larger, we
872                          * have already guaranted there to be at least
873                          * one garbage byte for the checksum, and
874                          * extra bytes shouldn't hurt!
875                          */
876                         if (len > sizeof(*si)) {
877                                 si->si_cc |= SPX_OB;
878                                 len = (1 + sizeof(*si));
879                         }
880                 }
881                 si->si_len = htons((u_short)len);
882                 m->m_pkthdr.len = ((len - 1) | 1) + 1;
883                 /*
884                  * queue stuff up for output
885                  */
886                 sbappendrecord(&ssb->sb, m);
887                 cb->s_seq++;
888         }
889 #ifdef notdef
890         idle = (cb->s_smax == (cb->s_rack - 1));
891 #endif
892 again:
893         sendalot = 0;
894         off = cb->s_snxt - cb->s_rack;
895         win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
896
897         /*
898          * If in persist timeout with window of 0, send a probe.
899          * Otherwise, if window is small but nonzero
900          * and timer expired, send what we can and go into
901          * transmit state.
902          */
903         if (cb->s_force == 1 + SPXT_PERSIST) {
904                 if (win != 0) {
905                         cb->s_timer[SPXT_PERSIST] = 0;
906                         cb->s_rxtshift = 0;
907                 }
908         }
909         span = cb->s_seq - cb->s_rack;
910         len = min(span, win) - off;
911
912         if (len < 0) {
913                 /*
914                  * Window shrank after we went into it.
915                  * If window shrank to 0, cancel pending
916                  * restransmission and pull s_snxt back
917                  * to (closed) window.  We will enter persist
918                  * state below.  If the widndow didn't close completely,
919                  * just wait for an ACK.
920                  */
921                 len = 0;
922                 if (win == 0) {
923                         cb->s_timer[SPXT_REXMT] = 0;
924                         cb->s_snxt = cb->s_rack;
925                 }
926         }
927         if (len > 1)
928                 sendalot = 1;
929         rcv_win = ssb_space(&so->so_rcv);
930
931         /*
932          * Send if we owe peer an ACK.
933          */
934         if (cb->s_oobflags & SF_SOOB) {
935                 /*
936                  * must transmit this out of band packet
937                  */
938                 cb->s_oobflags &= ~ SF_SOOB;
939                 sendalot = 1;
940                 spxstat.spxs_sndurg++;
941                 goto found;
942         }
943         if (cb->s_flags & SF_ACKNOW)
944                 goto send;
945         if (cb->s_state < TCPS_ESTABLISHED)
946                 goto send;
947         /*
948          * Silly window can't happen in spx.
949          * Code from tcp deleted.
950          */
951         if (len)
952                 goto send;
953         /*
954          * Compare available window to amount of window
955          * known to peer (as advertised window less
956          * next expected input.)  If the difference is at least two
957          * packets or at least 35% of the mximum possible window,
958          * then want to send a window update to peer.
959          */
960         if (rcv_win > 0) {
961                 u_short delta =  1 + cb->s_alo - cb->s_ack;
962                 int adv = rcv_win - (delta * cb->s_mtu);
963                 
964                 if ((so->so_rcv.ssb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
965                     (100 * adv / so->so_rcv.ssb_hiwat >= 35)) {
966                         spxstat.spxs_sndwinup++;
967                         cb->s_flags |= SF_ACKNOW;
968                         goto send;
969                 }
970
971         }
972         /*
973          * Many comments from tcp_output.c are appropriate here
974          * including . . .
975          * If send window is too small, there is data to transmit, and no
976          * retransmit or persist is pending, then go to persist state.
977          * If nothing happens soon, send when timer expires:
978          * if window is nonzero, transmit what we can,
979          * otherwise send a probe.
980          */
981         if (so->so_snd.ssb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
982                 cb->s_timer[SPXT_PERSIST] == 0) {
983                         cb->s_rxtshift = 0;
984                         spx_setpersist(cb);
985         }
986         /*
987          * No reason to send a packet, just return.
988          */
989         cb->s_outx = 1;
990         return (0);
991
992 send:
993         /*
994          * Find requested packet.
995          */
996         si = NULL;
997         if (len > 0) {
998                 cb->s_want = cb->s_snxt;
999                 for (m = ssb->ssb_mb; m != NULL; m = m->m_nextpkt) {
1000                         si = mtod(m, struct spx *);
1001                         if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
1002                                 break;
1003                 }
1004         found:
1005                 if (si != NULL) {
1006                         if (si->si_seq == cb->s_snxt)
1007                                         cb->s_snxt++;
1008                                 else
1009                                         spxstat.spxs_sndvoid++, si = 0;
1010                 }
1011         }
1012         /*
1013          * update window
1014          */
1015         if (rcv_win < 0)
1016                 rcv_win = 0;
1017         alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
1018         if (SSEQ_LT(alo, cb->s_alo)) 
1019                 alo = cb->s_alo;
1020
1021         if (si != NULL) {
1022                 /*
1023                  * must make a copy of this packet for
1024                  * ipx_output to monkey with
1025                  */
1026                 m = m_copy(m, 0, (int)M_COPYALL);
1027                 if (m == NULL) {
1028                         return (ENOBUFS);
1029                 }
1030                 si = mtod(m, struct spx *);
1031                 if (SSEQ_LT(si->si_seq, cb->s_smax))
1032                         spxstat.spxs_sndrexmitpack++;
1033                 else
1034                         spxstat.spxs_sndpack++;
1035         } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
1036                 /*
1037                  * Must send an acknowledgement or a probe
1038                  */
1039                 if (cb->s_force)
1040                         spxstat.spxs_sndprobe++;
1041                 if (cb->s_flags & SF_ACKNOW)
1042                         spxstat.spxs_sndacks++;
1043                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1044                 if (m == NULL)
1045                         return (ENOBUFS);
1046                 /*
1047                  * Fill in mbuf with extended SP header
1048                  * and addresses and length put into network format.
1049                  */
1050                 MH_ALIGN(m, sizeof(struct spx));
1051                 m->m_len = sizeof(*si);
1052                 m->m_pkthdr.len = sizeof(*si);
1053                 si = mtod(m, struct spx *);
1054                 si->si_i = *cb->s_ipx;
1055                 si->si_s = cb->s_shdr;
1056                 si->si_seq = cb->s_smax + 1;
1057                 si->si_len = htons(sizeof(*si));
1058                 si->si_cc |= SPX_SP;
1059         } else {
1060                 cb->s_outx = 3;
1061                 if (so->so_options & SO_DEBUG || traceallspxs)
1062                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1063                 return (0);
1064         }
1065         /*
1066          * Stuff checksum and output datagram.
1067          */
1068         if ((si->si_cc & SPX_SP) == 0) {
1069                 if (cb->s_force != (1 + SPXT_PERSIST) ||
1070                     cb->s_timer[SPXT_PERSIST] == 0) {
1071                         /*
1072                          * If this is a new packet and we are not currently 
1073                          * timing anything, time this one.
1074                          */
1075                         if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1076                                 cb->s_smax = si->si_seq;
1077                                 if (cb->s_rtt == 0) {
1078                                         spxstat.spxs_segstimed++;
1079                                         cb->s_rtseq = si->si_seq;
1080                                         cb->s_rtt = 1;
1081                                 }
1082                         }
1083                         /*
1084                          * Set rexmt timer if not currently set,
1085                          * Initial value for retransmit timer is smoothed
1086                          * round-trip time + 2 * round-trip time variance.
1087                          * Initialize shift counter which is used for backoff
1088                          * of retransmit time.
1089                          */
1090                         if (cb->s_timer[SPXT_REXMT] == 0 &&
1091                             cb->s_snxt != cb->s_rack) {
1092                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1093                                 if (cb->s_timer[SPXT_PERSIST]) {
1094                                         cb->s_timer[SPXT_PERSIST] = 0;
1095                                         cb->s_rxtshift = 0;
1096                                 }
1097                         }
1098                 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1099                         cb->s_smax = si->si_seq;
1100                 }
1101         } else if (cb->s_state < TCPS_ESTABLISHED) {
1102                 if (cb->s_rtt == 0)
1103                         cb->s_rtt = 1; /* Time initial handshake */
1104                 if (cb->s_timer[SPXT_REXMT] == 0)
1105                         cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1106         }
1107         {
1108                 /*
1109                  * Do not request acks when we ack their data packets or
1110                  * when we do a gratuitous window update.
1111                  */
1112                 if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1113                                 si->si_cc |= SPX_SA;
1114                 si->si_seq = htons(si->si_seq);
1115                 si->si_alo = htons(alo);
1116                 si->si_ack = htons(cb->s_ack);
1117
1118                 if (ipxcksum) {
1119                         si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1120                 } else
1121                         si->si_sum = 0xffff;
1122
1123                 cb->s_outx = 4;
1124                 if (so->so_options & SO_DEBUG || traceallspxs)
1125                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1126
1127                 if (so->so_options & SO_DONTROUTE)
1128                         error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1129                 else
1130                         error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1131         }
1132         if (error) {
1133                 return (error);
1134         }
1135         spxstat.spxs_sndtotal++;
1136         /*
1137          * Data sent (as far as we can tell).
1138          * If this advertises a larger window than any other segment,
1139          * then remember the size of the advertized window.
1140          * Any pending ACK has now been sent.
1141          */
1142         cb->s_force = 0;
1143         cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1144         if (SSEQ_GT(alo, cb->s_alo))
1145                 cb->s_alo = alo;
1146         if (sendalot)
1147                 goto again;
1148         cb->s_outx = 5;
1149         return (0);
1150 }
1151
1152 static int spx_do_persist_panics = 0;
1153
1154 static void
1155 spx_setpersist(struct spxpcb *cb)
1156 {
1157         int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1158
1159         if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1160                 panic("spx_output REXMT");
1161         /*
1162          * Start/restart persistance timer.
1163          */
1164         SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1165             t*spx_backoff[cb->s_rxtshift],
1166             SPXTV_PERSMIN, SPXTV_PERSMAX);
1167         if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1168                 cb->s_rxtshift++;
1169 }
1170
1171 int
1172 spx_ctloutput(struct socket *so, struct sockopt *sopt)
1173 {
1174         struct ipxpcb *ipxp = sotoipxpcb(so);
1175         struct spxpcb *cb;
1176         int mask, error;
1177         short soptval;
1178         u_short usoptval;
1179         int optval;
1180
1181         error = 0;
1182
1183         if (sopt->sopt_level != IPXPROTO_SPX) {
1184                 /* This will have to be changed when we do more general
1185                    stacking of protocols */
1186                 return (ipx_ctloutput(so, sopt));
1187         }
1188         if (ipxp == NULL)
1189                 return (EINVAL);
1190         else
1191                 cb = ipxtospxpcb(ipxp);
1192
1193         switch (sopt->sopt_dir) {
1194         case SOPT_GET:
1195                 switch (sopt->sopt_name) {
1196                 case SO_HEADERS_ON_INPUT:
1197                         mask = SF_HI;
1198                         goto get_flags;
1199
1200                 case SO_HEADERS_ON_OUTPUT:
1201                         mask = SF_HO;
1202                 get_flags:
1203                         soptval = cb->s_flags & mask;
1204                         error = sooptcopyout(sopt, &soptval, sizeof soptval);
1205                         break;
1206
1207                 case SO_MTU:
1208                         usoptval = cb->s_mtu;
1209                         error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1210                         break;
1211
1212                 case SO_LAST_HEADER:
1213                         error = sooptcopyout(sopt, &cb->s_rhdr, 
1214                                              sizeof cb->s_rhdr);
1215                         break;
1216
1217                 case SO_DEFAULT_HEADERS:
1218                         error = sooptcopyout(sopt, &cb->s_shdr, 
1219                                              sizeof cb->s_shdr);
1220                         break;
1221
1222                 default:
1223                         error = ENOPROTOOPT;
1224                 }
1225                 break;
1226
1227         case SOPT_SET:
1228                 switch (sopt->sopt_name) {
1229                         /* XXX why are these shorts on get and ints on set?
1230                            that doesn't make any sense... */
1231                 case SO_HEADERS_ON_INPUT:
1232                         mask = SF_HI;
1233                         goto set_head;
1234
1235                 case SO_HEADERS_ON_OUTPUT:
1236                         mask = SF_HO;
1237                 set_head:
1238                         error = sooptcopyin(sopt, &optval, sizeof optval,
1239                                             sizeof optval);
1240                         if (error)
1241                                 break;
1242
1243                         if (cb->s_flags & SF_PI) {
1244                                 if (optval)
1245                                         cb->s_flags |= mask;
1246                                 else
1247                                         cb->s_flags &= ~mask;
1248                         } else error = EINVAL;
1249                         break;
1250
1251                 case SO_MTU:
1252                         error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1253                                             sizeof usoptval);
1254                         if (error)
1255                                 break;
1256                         cb->s_mtu = usoptval;
1257                         break;
1258
1259 #ifdef SF_NEWCALL
1260                 case SO_NEWCALL:
1261                         error = sooptcopyin(sopt, &optval, sizeof optval,
1262                                             sizeof optval);
1263                         if (error)
1264                                 break;
1265                         if (optval) {
1266                                 cb->s_flags2 |= SF_NEWCALL;
1267                                 spx_newchecks[5]++;
1268                         } else {
1269                                 cb->s_flags2 &= ~SF_NEWCALL;
1270                                 spx_newchecks[6]++;
1271                         }
1272                         break;
1273 #endif
1274
1275                 case SO_DEFAULT_HEADERS:
1276                         {
1277                                 struct spxhdr sp;
1278
1279                                 error = sooptcopyin(sopt, &sp, sizeof sp,
1280                                                     sizeof sp);
1281                                 if (error)
1282                                         break;
1283                                 cb->s_dt = sp.spx_dt;
1284                                 cb->s_cc = sp.spx_cc & SPX_EM;
1285                         }
1286                         break;
1287
1288                 default:
1289                         error = ENOPROTOOPT;
1290                 }
1291                 break;
1292         }
1293         return (error);
1294 }
1295
1296 /*
1297  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1298  *       will sofree() it when we return.
1299  */
1300 static int
1301 spx_usr_abort(struct socket *so)
1302 {
1303         struct ipxpcb *ipxp;
1304         struct spxpcb *cb;
1305
1306         ipxp = sotoipxpcb(so);
1307         cb = ipxtospxpcb(ipxp);
1308
1309         spx_drop(cb, ECONNABORTED);
1310
1311         return (0);
1312 }
1313
1314 /*
1315  * Accept a connection.  Essentially all the work is
1316  * done at higher levels; just return the address
1317  * of the peer, storing through addr.
1318  */
1319 static int
1320 spx_accept(struct socket *so, struct sockaddr **nam)
1321 {
1322         struct ipxpcb *ipxp;
1323         struct sockaddr_ipx *sipx, ssipx;
1324
1325         ipxp = sotoipxpcb(so);
1326         sipx = &ssipx;
1327         bzero(sipx, sizeof *sipx);
1328         sipx->sipx_len = sizeof *sipx;
1329         sipx->sipx_family = AF_IPX;
1330         sipx->sipx_addr = ipxp->ipxp_faddr;
1331         *nam = dup_sockaddr((struct sockaddr *)sipx);
1332         return (0);
1333 }
1334
1335 static int
1336 spx_attach(struct socket *so, int proto, struct pru_attach_info *ai)
1337 {
1338         int error;
1339         struct ipxpcb *ipxp;
1340         struct spxpcb *cb;
1341         struct mbuf *mm;
1342         struct signalsockbuf *ssb;
1343
1344         ipxp = sotoipxpcb(so);
1345         cb = ipxtospxpcb(ipxp);
1346
1347         if (ipxp != NULL)
1348                 return (EISCONN);
1349         crit_enter();
1350         error = ipx_pcballoc(so, &ipxpcb);
1351         if (error)
1352                 goto spx_attach_end;
1353         if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
1354                 error = soreserve(so, (u_long) 3072, (u_long) 3072,
1355                                   ai->sb_rlimit);
1356                 if (error)
1357                         goto spx_attach_end;
1358         }
1359         ipxp = sotoipxpcb(so);
1360
1361         MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_INTWAIT | M_ZERO);
1362         ssb = &so->so_snd;
1363
1364         mm = m_getclr(MB_DONTWAIT, MT_HEADER);
1365         if (mm == NULL) {
1366                 FREE(cb, M_PCB);
1367                 error = ENOBUFS;
1368                 goto spx_attach_end;
1369         }
1370         cb->s_ipx_m = mm;
1371         cb->s_ipx = mtod(mm, struct ipx *);
1372         cb->s_state = TCPS_LISTEN;
1373         cb->s_smax = -1;
1374         cb->s_swl1 = -1;
1375         cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1376         cb->s_ipxpcb = ipxp;
1377         cb->s_mtu = 576 - sizeof(struct spx);
1378         cb->s_cwnd = ssb_space(ssb) * CUNIT / cb->s_mtu;
1379         cb->s_ssthresh = cb->s_cwnd;
1380         cb->s_cwmx = ssb_space(ssb) * CUNIT / (2 * sizeof(struct spx));
1381         /* Above is recomputed when connecting to account
1382            for changed buffering or mtu's */
1383         cb->s_rtt = SPXTV_SRTTBASE;
1384         cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1385         SPXT_RANGESET(cb->s_rxtcur,
1386             ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1387             SPXTV_MIN, SPXTV_REXMTMAX);
1388         ipxp->ipxp_pcb = (caddr_t)cb; 
1389 spx_attach_end:
1390         crit_exit();
1391         return (error);
1392 }
1393
1394 static int
1395 spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1396 {  
1397         struct ipxpcb *ipxp;
1398
1399         ipxp = sotoipxpcb(so);
1400
1401         return (ipx_pcbbind(ipxp, nam, td));
1402 }  
1403    
1404 /*
1405  * Initiate connection to peer.
1406  * Enter SYN_SENT state, and mark socket as connecting.
1407  * Start keep-alive timer, setup prototype header,
1408  * Send initial system packet requesting connection.
1409  */
1410 static int
1411 spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1412 {
1413         int error;
1414         struct ipxpcb *ipxp;
1415         struct spxpcb *cb;
1416
1417         ipxp = sotoipxpcb(so);
1418         cb = ipxtospxpcb(ipxp);
1419
1420         crit_enter();
1421         if (ipxp->ipxp_lport == 0) {
1422                 error = ipx_pcbbind(ipxp, NULL, td);
1423                 if (error)
1424                         goto spx_connect_end;
1425         }
1426         error = ipx_pcbconnect(ipxp, nam, td);
1427         if (error)
1428                 goto spx_connect_end;
1429         soisconnecting(so);
1430         spxstat.spxs_connattempt++;
1431         cb->s_state = TCPS_SYN_SENT;
1432         cb->s_did = 0;
1433         spx_template(cb);
1434         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1435         cb->s_force = 1 + SPXTV_KEEP;
1436         /*
1437          * Other party is required to respond to
1438          * the port I send from, but he is not
1439          * required to answer from where I am sending to,
1440          * so allow wildcarding.
1441          * original port I am sending to is still saved in
1442          * cb->s_dport.
1443          */
1444         ipxp->ipxp_fport = 0;
1445         error = spx_output(cb, NULL);
1446 spx_connect_end:
1447         crit_exit();
1448         return (error);
1449 }
1450
1451 static int
1452 spx_detach(struct socket *so)
1453 {
1454         struct ipxpcb *ipxp;
1455         struct spxpcb *cb;
1456
1457         ipxp = sotoipxpcb(so);
1458         cb = ipxtospxpcb(ipxp);
1459
1460         if (ipxp == NULL)
1461                 return (ENOTCONN);
1462         crit_enter();
1463         if (cb->s_state > TCPS_LISTEN)
1464                 spx_disconnect(cb);
1465         else
1466                 spx_close(cb);
1467         crit_exit();
1468         return (0);
1469 }
1470
1471 /*
1472  * We may decide later to implement connection closing
1473  * handshaking at the spx level optionally.
1474  * here is the hook to do it:
1475  */
1476 static int
1477 spx_usr_disconnect(struct socket *so)
1478 {
1479         struct ipxpcb *ipxp;
1480         struct spxpcb *cb;
1481
1482         ipxp = sotoipxpcb(so);
1483         cb = ipxtospxpcb(ipxp);
1484
1485         crit_enter();
1486         spx_disconnect(cb);
1487         crit_exit();
1488         return (0);
1489 }
1490
1491 static int
1492 spx_listen(struct socket *so, struct thread *td)
1493 {
1494         int error;
1495         struct ipxpcb *ipxp;
1496         struct spxpcb *cb;
1497
1498         error = 0;
1499         ipxp = sotoipxpcb(so);
1500         cb = ipxtospxpcb(ipxp);
1501
1502         if (ipxp->ipxp_lport == 0)
1503                 error = ipx_pcbbind(ipxp, NULL, td);
1504         if (error == 0)
1505                 cb->s_state = TCPS_LISTEN;
1506         return (error);
1507 }
1508
1509 /*
1510  * After a receive, possibly send acknowledgment
1511  * updating allocation.
1512  */
1513 static int
1514 spx_rcvd(struct socket *so, int flags)
1515 {
1516         struct ipxpcb *ipxp;
1517         struct spxpcb *cb;
1518
1519         ipxp = sotoipxpcb(so);
1520         cb = ipxtospxpcb(ipxp);
1521
1522         crit_enter();
1523         cb->s_flags |= SF_RVD;
1524         spx_output(cb, NULL);
1525         cb->s_flags &= ~SF_RVD;
1526         crit_exit();
1527         return (0);
1528 }
1529
1530 static int
1531 spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1532 {
1533         struct ipxpcb *ipxp;
1534         struct spxpcb *cb;
1535
1536         ipxp = sotoipxpcb(so);
1537         cb = ipxtospxpcb(ipxp);
1538
1539         if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1540             (so->so_state & SS_RCVATMARK)) {
1541                 m->m_len = 1;
1542                 *mtod(m, caddr_t) = cb->s_iobc;
1543                 return (0);
1544         }
1545         return (EINVAL);
1546 }
1547
1548 static int
1549 spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1550         struct mbuf *controlp, struct thread *td)
1551 {
1552         int error;
1553         struct ipxpcb *ipxp;
1554         struct spxpcb *cb;
1555
1556         error = 0;
1557         ipxp = sotoipxpcb(so);
1558         cb = ipxtospxpcb(ipxp);
1559
1560         crit_enter();
1561         if (flags & PRUS_OOB) {
1562                 if (ssb_space(&so->so_snd) < -512) {
1563                         error = ENOBUFS;
1564                         goto spx_send_end;
1565                 }
1566                 cb->s_oobflags |= SF_SOOB;
1567         }
1568         if (controlp != NULL) {
1569                 u_short *p = mtod(controlp, u_short *);
1570                 spx_newchecks[2]++;
1571                 if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1572                         cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1573                         spx_newchecks[3]++;
1574                 }
1575                 m_freem(controlp);
1576         }
1577         controlp = NULL;
1578         error = spx_output(cb, m);
1579         m = NULL;
1580 spx_send_end:
1581         if (controlp != NULL)
1582                 m_freem(controlp);
1583         if (m != NULL)
1584                 m_freem(m);
1585         crit_exit();
1586         return (error);
1587 }
1588
1589 static int
1590 spx_shutdown(struct socket *so)
1591 {
1592         int error;
1593         struct ipxpcb *ipxp;
1594         struct spxpcb *cb;
1595
1596         error = 0;
1597         ipxp = sotoipxpcb(so);
1598         cb = ipxtospxpcb(ipxp);
1599
1600         crit_enter();
1601         socantsendmore(so);
1602         cb = spx_usrclosed(cb);
1603         if (cb != NULL)
1604                 error = spx_output(cb, NULL);
1605         crit_exit();
1606         return (error);
1607 }
1608
1609 static int
1610 spx_sp_attach(struct socket *so, int proto, struct pru_attach_info *ai)
1611 {
1612         int error;
1613         struct ipxpcb *ipxp;
1614
1615         error = spx_attach(so, proto, ai);
1616         if (error == 0) {
1617                 ipxp = sotoipxpcb(so);
1618                 ((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1619                                         (SF_HI | SF_HO | SF_PI);
1620         }
1621         return (error);
1622 }
1623
1624 /*
1625  * Create template to be used to send spx packets on a connection.
1626  * Called after host entry created, fills
1627  * in a skeletal spx header (choosing connection id),
1628  * minimizing the amount of work necessary when the connection is used.
1629  */
1630 static void
1631 spx_template(struct spxpcb *cb)
1632 {
1633         struct ipxpcb *ipxp = cb->s_ipxpcb;
1634         struct ipx *ipx = cb->s_ipx;
1635         struct signalsockbuf *ssb = &(ipxp->ipxp_socket->so_snd);
1636
1637         ipx->ipx_pt = IPXPROTO_SPX;
1638         ipx->ipx_sna = ipxp->ipxp_laddr;
1639         ipx->ipx_dna = ipxp->ipxp_faddr;
1640         cb->s_sid = htons(spx_iss);
1641         spx_iss += SPX_ISSINCR/2;
1642         cb->s_alo = 1;
1643         cb->s_cwnd = (ssb_space(ssb) * CUNIT) / cb->s_mtu;
1644         cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1645                                         of large packets */
1646         cb->s_cwmx = (ssb_space(ssb) * CUNIT) / (2 * sizeof(struct spx));
1647         cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1648                 /* But allow for lots of little packets as well */
1649 }
1650
1651 /*
1652  * Close a SPIP control block:
1653  *      discard spx control block itself
1654  *      discard ipx protocol control block
1655  *      wake up any sleepers
1656  */
1657 static struct spxpcb *
1658 spx_close(struct spxpcb *cb)
1659 {
1660         struct spx_q *q;
1661         struct spx_q *oq;
1662         struct ipxpcb *ipxp = cb->s_ipxpcb;
1663         struct socket *so = ipxp->ipxp_socket;
1664         struct mbuf *m;
1665
1666         q = cb->s_q.si_next;
1667         while (q != &(cb->s_q)) {
1668                 oq = q;
1669                 q = q->si_next;
1670                 m = oq->si_mbuf;
1671                 remque(oq);
1672                 m_freem(m);
1673                 kfree(oq, M_SPX_Q);
1674         }
1675         m_free(cb->s_ipx_m);
1676         FREE(cb, M_PCB);
1677         ipxp->ipxp_pcb = 0;
1678         soisdisconnected(so);
1679         ipx_pcbdetach(ipxp);
1680         spxstat.spxs_closed++;
1681         return (NULL);
1682 }
1683
1684 /*
1685  *      Someday we may do level 3 handshaking
1686  *      to close a connection or send a xerox style error.
1687  *      For now, just close.
1688  */
1689 static struct spxpcb *
1690 spx_usrclosed(struct spxpcb *cb)
1691 {
1692         return (spx_close(cb));
1693 }
1694
1695 static struct spxpcb *
1696 spx_disconnect(struct spxpcb *cb)
1697 {
1698         return (spx_close(cb));
1699 }
1700
1701 /*
1702  * Drop connection, reporting
1703  * the specified error.
1704  */
1705 static struct spxpcb *
1706 spx_drop(struct spxpcb *cb, int errno)
1707 {
1708         struct socket *so = cb->s_ipxpcb->ipxp_socket;
1709
1710         /*
1711          * someday, in the xerox world
1712          * we will generate error protocol packets
1713          * announcing that the socket has gone away.
1714          */
1715         if (TCPS_HAVERCVDSYN(cb->s_state)) {
1716                 spxstat.spxs_drops++;
1717                 cb->s_state = TCPS_CLOSED;
1718                 /*tcp_output(cb);*/
1719         } else
1720                 spxstat.spxs_conndrops++;
1721         so->so_error = errno;
1722         return (spx_close(cb));
1723 }
1724
1725 /*
1726  * Fast timeout routine for processing delayed acks
1727  */
1728 void
1729 spx_fasttimo(void)
1730 {
1731         struct ipxpcb *ipxp;
1732         struct spxpcb *cb;
1733
1734         crit_enter();
1735         ipxp = ipxpcb.ipxp_next;
1736         if (ipxp != NULL) {
1737             for (; ipxp != &ipxpcb; ipxp = ipxp->ipxp_next) {
1738                 if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1739                     (cb->s_flags & SF_DELACK)) {
1740                         cb->s_flags &= ~SF_DELACK;
1741                         cb->s_flags |= SF_ACKNOW;
1742                         spxstat.spxs_delack++;
1743                         spx_output(cb, NULL);
1744                 }
1745             }
1746         }
1747         crit_exit();
1748 }
1749
1750 /*
1751  * spx protocol timeout routine called every 500 ms.
1752  * Updates the timers in all active pcb's and
1753  * causes finite state machine actions if timers expire.
1754  */
1755 void
1756 spx_slowtimo(void)
1757 {
1758         struct ipxpcb *ip, *ipnxt;
1759         struct spxpcb *cb;
1760         int i;
1761
1762         /*
1763          * Search through tcb's and update active timers.
1764          */
1765         crit_enter();
1766         ip = ipxpcb.ipxp_next;
1767         if (ip == NULL) {
1768                 crit_exit();
1769                 return;
1770         }
1771         while (ip != &ipxpcb) {
1772                 cb = ipxtospxpcb(ip);
1773                 ipnxt = ip->ipxp_next;
1774                 if (cb == NULL)
1775                         goto tpgone;
1776                 for (i = 0; i < SPXT_NTIMERS; i++) {
1777                         if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1778                                 spx_timers(cb, i);
1779                                 if (ipnxt->ipxp_prev != ip)
1780                                         goto tpgone;
1781                         }
1782                 }
1783                 cb->s_idle++;
1784                 if (cb->s_rtt)
1785                         cb->s_rtt++;
1786 tpgone:
1787                 ip = ipnxt;
1788         }
1789         spx_iss += SPX_ISSINCR/PR_SLOWHZ;               /* increment iss */
1790         crit_exit();
1791 }
1792
1793 /*
1794  * SPX timer processing.
1795  */
1796 static struct spxpcb *
1797 spx_timers(struct spxpcb *cb, int timer)
1798 {
1799         long rexmt;
1800         int win;
1801
1802         cb->s_force = 1 + timer;
1803         switch (timer) {
1804
1805         /*
1806          * 2 MSL timeout in shutdown went off.  TCP deletes connection
1807          * control block.
1808          */
1809         case SPXT_2MSL:
1810                 kprintf("spx: SPXT_2MSL went off for no reason\n");
1811                 cb->s_timer[timer] = 0;
1812                 break;
1813
1814         /*
1815          * Retransmission timer went off.  Message has not
1816          * been acked within retransmit interval.  Back off
1817          * to a longer retransmit interval and retransmit one packet.
1818          */
1819         case SPXT_REXMT:
1820                 if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1821                         cb->s_rxtshift = SPX_MAXRXTSHIFT;
1822                         spxstat.spxs_timeoutdrop++;
1823                         cb = spx_drop(cb, ETIMEDOUT);
1824                         break;
1825                 }
1826                 spxstat.spxs_rexmttimeo++;
1827                 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1828                 rexmt *= spx_backoff[cb->s_rxtshift];
1829                 SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1830                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1831                 /*
1832                  * If we have backed off fairly far, our srtt
1833                  * estimate is probably bogus.  Clobber it
1834                  * so we'll take the next rtt measurement as our srtt;
1835                  * move the current srtt into rttvar to keep the current
1836                  * retransmit times until then.
1837                  */
1838                 if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1839                         cb->s_rttvar += (cb->s_srtt >> 2);
1840                         cb->s_srtt = 0;
1841                 }
1842                 cb->s_snxt = cb->s_rack;
1843                 /*
1844                  * If timing a packet, stop the timer.
1845                  */
1846                 cb->s_rtt = 0;
1847                 /*
1848                  * See very long discussion in tcp_timer.c about congestion
1849                  * window and sstrhesh
1850                  */
1851                 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1852                 if (win < 2)
1853                         win = 2;
1854                 cb->s_cwnd = CUNIT;
1855                 cb->s_ssthresh = win * CUNIT;
1856                 spx_output(cb, NULL);
1857                 break;
1858
1859         /*
1860          * Persistance timer into zero window.
1861          * Force a probe to be sent.
1862          */
1863         case SPXT_PERSIST:
1864                 spxstat.spxs_persisttimeo++;
1865                 spx_setpersist(cb);
1866                 spx_output(cb, NULL);
1867                 break;
1868
1869         /*
1870          * Keep-alive timer went off; send something
1871          * or drop connection if idle for too long.
1872          */
1873         case SPXT_KEEP:
1874                 spxstat.spxs_keeptimeo++;
1875                 if (cb->s_state < TCPS_ESTABLISHED)
1876                         goto dropit;
1877                 if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1878                         if (cb->s_idle >= SPXTV_MAXIDLE)
1879                                 goto dropit;
1880                         spxstat.spxs_keepprobe++;
1881                         spx_output(cb, NULL);
1882                 } else
1883                         cb->s_idle = 0;
1884                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1885                 break;
1886         dropit:
1887                 spxstat.spxs_keepdrops++;
1888                 cb = spx_drop(cb, ETIMEDOUT);
1889                 break;
1890         }
1891         return (cb);
1892 }