Correct BSD License clause numbering from 1-2-4 to 1-2-3.
[dragonfly.git] / sys / netproto / ipx / spx_usrreq.c
1 /*
2  * Copyright (c) 1995, Mike Mitchell
3  * Copyright (c) 1984, 1985, 1986, 1987, 1993
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *      @(#)spx_usrreq.h
31  *
32  * $FreeBSD: src/sys/netipx/spx_usrreq.c,v 1.27.2.1 2001/02/22 09:44:18 bp Exp $
33  */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/proc.h>
41 #include <sys/protosw.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/socketvar2.h>
45
46 #include <sys/thread2.h>
47 #include <sys/msgport2.h>
48
49 #include <net/route.h>
50 #include <netinet/tcp_fsm.h>
51
52 #include "ipx.h"
53 #include "ipx_pcb.h"
54 #include "ipx_var.h"
55 #include "spx.h"
56 #include "spx_timer.h"
57 #include "spx_var.h"
58 #include "spx_debug.h"
59
60 /*
61  * SPX protocol implementation.
62  */
63 static u_short  spx_iss;
64 static u_short  spx_newchecks[50];
65 static int      spx_hardnosed;
66 static int      spx_use_delack = 0;
67 static int      traceallspxs = 0;
68 static struct   spx     spx_savesi;
69 static struct   spx_istat spx_istat;
70
71 /* Following was struct spxstat spxstat; */
72 #ifndef spxstat 
73 #define spxstat spx_istat.newstats
74 #endif  
75
76 static int spx_backoff[SPX_MAXRXTSHIFT+1] =
77     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
78
79 static  struct spxpcb *spx_close(struct spxpcb *cb);
80 static  struct spxpcb *spx_disconnect(struct spxpcb *cb);
81 static  struct spxpcb *spx_drop(struct spxpcb *cb, int error);
82 static  int spx_output(struct spxpcb *cb, struct mbuf *m0);
83 static  int spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m);
84 static  void spx_setpersist(struct spxpcb *cb);
85 static  void spx_template(struct spxpcb *cb);
86 static  struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
87 static  struct spxpcb *spx_usrclosed(struct spxpcb *cb);
88
89 static  void spx_usr_abort(netmsg_t);
90 static  void spx_accept(netmsg_t);
91 static  void spx_attach(netmsg_t);
92 static  void spx_bind(netmsg_t);
93 static  void spx_connect(netmsg_t);
94 static  void spx_detach(netmsg_t);
95 static  void spx_usr_disconnect(netmsg_t);
96 static  void spx_listen(netmsg_t);
97 static  void spx_rcvd(netmsg_t);
98 static  void spx_rcvoob(netmsg_t);
99 static  void spx_send(netmsg_t);
100 static  void spx_shutdown(netmsg_t);
101 static  void spx_sp_attach(netmsg_t);
102
103 struct  pr_usrreqs spx_usrreqs = {
104         .pru_abort = spx_usr_abort,
105         .pru_accept = spx_accept,
106         .pru_attach = spx_attach,
107         .pru_bind = spx_bind,
108         .pru_connect = spx_connect,
109         .pru_connect2 = pr_generic_notsupp,
110         .pru_control = ipx_control,
111         .pru_detach = spx_detach,
112         .pru_disconnect = spx_usr_disconnect,
113         .pru_listen = spx_listen,
114         .pru_peeraddr = ipx_peeraddr,
115         .pru_rcvd = spx_rcvd,
116         .pru_rcvoob = spx_rcvoob,
117         .pru_send = spx_send,
118         .pru_sense = pru_sense_null,
119         .pru_shutdown = spx_shutdown,
120         .pru_sockaddr = ipx_sockaddr,
121         .pru_sosend = sosend,
122         .pru_soreceive = soreceive
123 };
124
125 struct  pr_usrreqs spx_usrreq_sps = {
126         .pru_abort = spx_usr_abort,
127         .pru_accept = spx_accept,
128         .pru_attach = spx_sp_attach,
129         .pru_bind = spx_bind,
130         .pru_connect = spx_connect,
131         .pru_connect2 = pr_generic_notsupp,
132         .pru_control = ipx_control,
133         .pru_detach = spx_detach,
134         .pru_disconnect = spx_usr_disconnect,
135         .pru_listen = spx_listen,
136         .pru_peeraddr = ipx_peeraddr,
137         .pru_rcvd = spx_rcvd,
138         .pru_rcvoob = spx_rcvoob,
139         .pru_send = spx_send,
140         .pru_sense = pru_sense_null,
141         .pru_shutdown = spx_shutdown,
142         .pru_sockaddr = ipx_sockaddr,
143         .pru_sosend = sosend,
144         .pru_soreceive = soreceive
145 };
146
147 static MALLOC_DEFINE(M_SPX_Q, "ipx_spx_q", "IPX Packet Management");
148
149 void
150 spx_init(void)
151 {
152
153         spx_iss = 1; /* WRONG !! should fish it out of TODR */
154 }
155
156 void
157 spx_input(struct mbuf *m, struct ipxpcb *ipxp)
158 {
159         struct spxpcb *cb;
160         struct spx *si;
161         struct socket *so;
162         int dropsocket = 0;
163         short ostate = 0;
164
165         spxstat.spxs_rcvtotal++;
166         if (ipxp == NULL) {
167                 panic("No ipxpcb in spx_input");
168                 return;
169         }
170
171         cb = ipxtospxpcb(ipxp);
172         if (cb == NULL)
173                 goto bad;
174
175         if (m->m_len < sizeof(struct spx)) {
176                 if ((m = m_pullup(m, sizeof(*si))) == NULL) {
177                         spxstat.spxs_rcvshort++;
178                         return;
179                 }
180         }
181         si = mtod(m, struct spx *);
182         si->si_seq = ntohs(si->si_seq);
183         si->si_ack = ntohs(si->si_ack);
184         si->si_alo = ntohs(si->si_alo);
185
186         so = ipxp->ipxp_socket;
187
188         if (so->so_options & SO_DEBUG || traceallspxs) {
189                 ostate = cb->s_state;
190                 spx_savesi = *si;
191         }
192         if (so->so_options & SO_ACCEPTCONN) {
193                 struct spxpcb *ocb = cb;
194
195                 so = sonewconn(so, 0);
196                 if (so == NULL) {
197                         goto drop;
198                 }
199                 /*
200                  * This is ugly, but ....
201                  *
202                  * Mark socket as temporary until we're
203                  * committed to keeping it.  The code at
204                  * ``drop'' and ``dropwithreset'' check the
205                  * flag dropsocket to see if the temporary
206                  * socket created here should be discarded.
207                  * We mark the socket as discardable until
208                  * we're committed to it below in TCPS_LISTEN.
209                  */
210                 dropsocket++;
211                 ipxp = (struct ipxpcb *)so->so_pcb;
212                 ipxp->ipxp_laddr = si->si_dna;
213                 cb = ipxtospxpcb(ipxp);
214                 cb->s_mtu = ocb->s_mtu;         /* preserve sockopts */
215                 cb->s_flags = ocb->s_flags;     /* preserve sockopts */
216                 cb->s_flags2 = ocb->s_flags2;   /* preserve sockopts */
217                 cb->s_state = TCPS_LISTEN;
218         }
219
220         /*
221          * Packet received on connection.
222          * reset idle time and keep-alive timer;
223          */
224         cb->s_idle = 0;
225         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
226
227         switch (cb->s_state) {
228
229         case TCPS_LISTEN:{
230                 struct sockaddr_ipx *sipx, ssipx;
231                 struct ipx_addr laddr;
232
233                 /*
234                  * If somebody here was carying on a conversation
235                  * and went away, and his pen pal thinks he can
236                  * still talk, we get the misdirected packet.
237                  */
238                 if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
239                         spx_istat.gonawy++;
240                         goto dropwithreset;
241                 }
242                 sipx = &ssipx;
243                 bzero(sipx, sizeof *sipx);
244                 sipx->sipx_len = sizeof(*sipx);
245                 sipx->sipx_family = AF_IPX;
246                 sipx->sipx_addr = si->si_sna;
247                 laddr = ipxp->ipxp_laddr;
248                 if (ipx_nullhost(laddr))
249                         ipxp->ipxp_laddr = si->si_dna;
250                 if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
251                         ipxp->ipxp_laddr = laddr;
252                         spx_istat.noconn++;
253                         goto drop;
254                 }
255                 spx_template(cb);
256                 dropsocket = 0;         /* committed to socket */
257                 cb->s_did = si->si_sid;
258                 cb->s_rack = si->si_ack;
259                 cb->s_ralo = si->si_alo;
260 #define THREEWAYSHAKE
261 #ifdef THREEWAYSHAKE
262                 cb->s_state = TCPS_SYN_RECEIVED;
263                 cb->s_force = 1 + SPXT_KEEP;
264                 spxstat.spxs_accepts++;
265                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
266                 }
267                 break;
268         /*
269          * This state means that we have heard a response
270          * to our acceptance of their connection
271          * It is probably logically unnecessary in this
272          * implementation.
273          */
274          case TCPS_SYN_RECEIVED: {
275                 if (si->si_did != cb->s_sid) {
276                         spx_istat.wrncon++;
277                         goto drop;
278                 }
279 #endif
280                 ipxp->ipxp_fport =  si->si_sport;
281                 cb->s_timer[SPXT_REXMT] = 0;
282                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
283                 soisconnected(so);
284                 cb->s_state = TCPS_ESTABLISHED;
285                 spxstat.spxs_accepts++;
286                 }
287                 break;
288
289         /*
290          * This state means that we have gotten a response
291          * to our attempt to establish a connection.
292          * We fill in the data from the other side,
293          * telling us which port to respond to, instead of the well-
294          * known one we might have sent to in the first place.
295          * We also require that this is a response to our
296          * connection id.
297          */
298         case TCPS_SYN_SENT:
299                 if (si->si_did != cb->s_sid) {
300                         spx_istat.notme++;
301                         goto drop;
302                 }
303                 spxstat.spxs_connects++;
304                 cb->s_did = si->si_sid;
305                 cb->s_rack = si->si_ack;
306                 cb->s_ralo = si->si_alo;
307                 cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
308                 cb->s_timer[SPXT_REXMT] = 0;
309                 cb->s_flags |= SF_ACKNOW;
310                 soisconnected(so);
311                 cb->s_state = TCPS_ESTABLISHED;
312                 /* Use roundtrip time of connection request for initial rtt */
313                 if (cb->s_rtt) {
314                         cb->s_srtt = cb->s_rtt << 3;
315                         cb->s_rttvar = cb->s_rtt << 1;
316                         SPXT_RANGESET(cb->s_rxtcur,
317                             ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
318                             SPXTV_MIN, SPXTV_REXMTMAX);
319                             cb->s_rtt = 0;
320                 }
321         }
322         if (so->so_options & SO_DEBUG || traceallspxs)
323                 spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
324
325         m->m_len -= sizeof(struct ipx);
326         m->m_pkthdr.len -= sizeof(struct ipx);
327         m->m_data += sizeof(struct ipx);
328
329         if (spx_reass(cb, si, m)) {
330                 m_freem(m);
331         }
332         if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
333                 spx_output(cb, NULL);
334         cb->s_flags &= ~(SF_WIN|SF_RXT);
335         return;
336
337 dropwithreset:
338         if (dropsocket)
339                 soabort(so);
340         si->si_seq = ntohs(si->si_seq);
341         si->si_ack = ntohs(si->si_ack);
342         si->si_alo = ntohs(si->si_alo);
343         m_freem(m);
344         if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
345                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
346         return;
347
348 drop:
349 bad:
350         if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
351             traceallspxs)
352                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
353         m_freem(m);
354 }
355
356 static int spxrexmtthresh = 3;
357
358 /*
359  * This is structurally similar to the tcp reassembly routine
360  * but its function is somewhat different:  It merely queues
361  * packets up, and suppresses duplicates.
362  */
363 static int
364 spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m)
365 {
366         struct spx_q *q, *nq, *q_temp;
367         struct mbuf *m;
368         struct socket *so = cb->s_ipxpcb->ipxp_socket;
369         char packetp = cb->s_flags & SF_HI;
370         int incr;
371         char wakeup = 0;
372
373         if (si == NULL)
374                 goto present;
375         /*
376          * Update our news from them.
377          */
378         if (si->si_cc & SPX_SA)
379                 cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
380         if (SSEQ_GT(si->si_alo, cb->s_ralo))
381                 cb->s_flags |= SF_WIN;
382         if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
383                 if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
384                         spxstat.spxs_rcvdupack++;
385                         /*
386                          * If this is a completely duplicate ack
387                          * and other conditions hold, we assume
388                          * a packet has been dropped and retransmit
389                          * it exactly as in tcp_input().
390                          */
391                         if (si->si_ack != cb->s_rack ||
392                             si->si_alo != cb->s_ralo)
393                                 cb->s_dupacks = 0;
394                         else if (++cb->s_dupacks == spxrexmtthresh) {
395                                 u_short onxt = cb->s_snxt;
396                                 int cwnd = cb->s_cwnd;
397
398                                 cb->s_snxt = si->si_ack;
399                                 cb->s_cwnd = CUNIT;
400                                 cb->s_force = 1 + SPXT_REXMT;
401                                 spx_output(cb, NULL);
402                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
403                                 cb->s_rtt = 0;
404                                 if (cwnd >= 4 * CUNIT)
405                                         cb->s_cwnd = cwnd / 2;
406                                 if (SSEQ_GT(onxt, cb->s_snxt))
407                                         cb->s_snxt = onxt;
408                                 return (1);
409                         }
410                 } else
411                         cb->s_dupacks = 0;
412                 goto update_window;
413         }
414         cb->s_dupacks = 0;
415         /*
416          * If our correspondent acknowledges data we haven't sent
417          * TCP would drop the packet after acking.  We'll be a little
418          * more permissive
419          */
420         if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
421                 spxstat.spxs_rcvacktoomuch++;
422                 si->si_ack = cb->s_smax + 1;
423         }
424         spxstat.spxs_rcvackpack++;
425         /*
426          * If transmit timer is running and timed sequence
427          * number was acked, update smoothed round trip time.
428          * See discussion of algorithm in tcp_input.c
429          */
430         if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
431                 spxstat.spxs_rttupdated++;
432                 if (cb->s_srtt != 0) {
433                         short delta;
434                         delta = cb->s_rtt - (cb->s_srtt >> 3);
435                         if ((cb->s_srtt += delta) <= 0)
436                                 cb->s_srtt = 1;
437                         if (delta < 0)
438                                 delta = -delta;
439                         delta -= (cb->s_rttvar >> 2);
440                         if ((cb->s_rttvar += delta) <= 0)
441                                 cb->s_rttvar = 1;
442                 } else {
443                         /*
444                          * No rtt measurement yet
445                          */
446                         cb->s_srtt = cb->s_rtt << 3;
447                         cb->s_rttvar = cb->s_rtt << 1;
448                 }
449                 cb->s_rtt = 0;
450                 cb->s_rxtshift = 0;
451                 SPXT_RANGESET(cb->s_rxtcur,
452                         ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
453                         SPXTV_MIN, SPXTV_REXMTMAX);
454         }
455         /*
456          * If all outstanding data is acked, stop retransmit
457          * timer and remember to restart (more output or persist).
458          * If there is more data to be acked, restart retransmit
459          * timer, using current (possibly backed-off) value;
460          */
461         if (si->si_ack == cb->s_smax + 1) {
462                 cb->s_timer[SPXT_REXMT] = 0;
463                 cb->s_flags |= SF_RXT;
464         } else if (cb->s_timer[SPXT_PERSIST] == 0)
465                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
466         /*
467          * When new data is acked, open the congestion window.
468          * If the window gives us less than ssthresh packets
469          * in flight, open exponentially (maxseg at a time).
470          * Otherwise open linearly (maxseg^2 / cwnd at a time).
471          */
472         incr = CUNIT;
473         if (cb->s_cwnd > cb->s_ssthresh)
474                 incr = max(incr * incr / cb->s_cwnd, 1);
475         cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
476         /*
477          * Trim Acked data from output queue.
478          */
479         while ((m = so->so_snd.ssb_mb) != NULL) {
480                 if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
481                         sbdroprecord(&so->so_snd.sb);
482                 else
483                         break;
484         }
485         sowwakeup(so);
486         cb->s_rack = si->si_ack;
487 update_window:
488         if (SSEQ_LT(cb->s_snxt, cb->s_rack))
489                 cb->s_snxt = cb->s_rack;
490         if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
491             (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
492              (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
493                 /* keep track of pure window updates */
494                 if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
495                     && SSEQ_LT(cb->s_ralo, si->si_alo)) {
496                         spxstat.spxs_rcvwinupd++;
497                         spxstat.spxs_rcvdupack--;
498                 }
499                 cb->s_ralo = si->si_alo;
500                 cb->s_swl1 = si->si_seq;
501                 cb->s_swl2 = si->si_ack;
502                 cb->s_swnd = (1 + si->si_alo - si->si_ack);
503                 if (cb->s_swnd > cb->s_smxw)
504                         cb->s_smxw = cb->s_swnd;
505                 cb->s_flags |= SF_WIN;
506         }
507         /*
508          * If this packet number is higher than that which
509          * we have allocated refuse it, unless urgent
510          */
511         if (SSEQ_GT(si->si_seq, cb->s_alo)) {
512                 if (si->si_cc & SPX_SP) {
513                         spxstat.spxs_rcvwinprobe++;
514                         return (1);
515                 } else
516                         spxstat.spxs_rcvpackafterwin++;
517                 if (si->si_cc & SPX_OB) {
518                         if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
519                                 m_freem(si_m);
520                                 return (0);
521                         } /* else queue this packet; */
522                 } else {
523                         /*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
524                         if (so->so_state && SS_NOFDREF) {
525                                 spx_close(cb);
526                         } else
527                                        would crash system*/
528                         spx_istat.notyet++;
529                         m_freem(si_m);
530                         return (0);
531                 }
532         }
533         /*
534          * If this is a system packet, we don't need to
535          * queue it up, and won't update acknowledge #
536          */
537         if (si->si_cc & SPX_SP) {
538                 return (1);
539         }
540         /*
541          * We have already seen this packet, so drop.
542          */
543         if (SSEQ_LT(si->si_seq, cb->s_ack)) {
544                 spx_istat.bdreas++;
545                 spxstat.spxs_rcvduppack++;
546                 if (si->si_seq == cb->s_ack - 1)
547                         spx_istat.lstdup++;
548                 return (1);
549         }
550         /*
551          * Loop through all packets queued up to insert in
552          * appropriate sequence.
553          */
554         LIST_FOREACH(q, &cb->s_q, sq_entry) {
555                 if (si->si_seq == SI(q)->si_seq) {
556                         spxstat.spxs_rcvduppack++;
557                         return (1);
558                 }
559                 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
560                         spxstat.spxs_rcvoopack++;
561                         break;
562                 }
563         }
564         nq = kmalloc(sizeof(struct spx_q), M_SPX_Q, M_INTNOWAIT);
565         if (nq == NULL) {
566                 m_freem(si_m);
567                 return (0);
568         }
569         if (q == NULL)
570                 LIST_INSERT_HEAD(&cb->s_q, nq, sq_entry);
571         else
572                 LIST_INSERT_BEFORE(q, nq, sq_entry);
573         nq->si_mbuf = si_m;
574         /*
575          * If this packet is urgent, inform process
576          */
577         if (si->si_cc & SPX_OB) {
578                 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
579                 sohasoutofband(so);
580                 cb->s_oobflags |= SF_IOOB;
581         }
582 present:
583 #define SPINC sizeof(struct spxhdr)
584         /*
585          * Loop through all packets queued up to update acknowledge
586          * number, and present all acknowledged data to user;
587          * If in packet interface mode, show packet headers.
588          */
589         LIST_FOREACH_MUTABLE(q, &cb->s_q, sq_entry, q_temp) {
590                   if (SI(q)->si_seq == cb->s_ack) {
591                         cb->s_ack++;
592                         m = q->si_mbuf;
593                         if (SI(q)->si_cc & SPX_OB) {
594                                 cb->s_oobflags &= ~SF_IOOB;
595                                 if (so->so_rcv.ssb_cc)
596                                         so->so_oobmark = so->so_rcv.ssb_cc;
597                                 else
598                                         sosetstate(so, SS_RCVATMARK);
599                         }
600                         LIST_REMOVE(q, sq_entry);
601                         kfree(q, M_SPX_Q);
602                         wakeup = 1;
603                         spxstat.spxs_rcvpack++;
604 #ifdef SF_NEWCALL
605                         if (cb->s_flags2 & SF_NEWCALL) {
606                                 struct spxhdr *sp = mtod(m, struct spxhdr *);
607                                 u_char dt = sp->spx_dt;
608                                 spx_newchecks[4]++;
609                                 if (dt != cb->s_rhdr.spx_dt) {
610                                         struct mbuf *mm =
611                                            m_getclr(MB_DONTWAIT, MT_CONTROL);
612                                         spx_newchecks[0]++;
613                                         if (mm != NULL) {
614                                                 u_short *s =
615                                                         mtod(mm, u_short *);
616                                                 cb->s_rhdr.spx_dt = dt;
617                                                 mm->m_len = 5; /*XXX*/
618                                                 s[0] = 5;
619                                                 s[1] = 1;
620                                                 *(u_char *)(&s[2]) = dt;
621                                                 sbappend(&so->so_rcv.sb, mm);
622                                         }
623                                 }
624                                 if (sp->spx_cc & SPX_OB) {
625                                         m_chtype(m, MT_OOBDATA);
626                                         spx_newchecks[1]++;
627                                         so->so_oobmark = 0;
628                                         soclrstate(so, SS_RCVATMARK);
629                                 }
630                                 if (packetp == 0) {
631                                         m->m_data += SPINC;
632                                         m->m_len -= SPINC;
633                                         m->m_pkthdr.len -= SPINC;
634                                 }
635                                 if ((sp->spx_cc & SPX_EM) || packetp) {
636                                         sbappendrecord(&so->so_rcv.sb, m);
637                                         spx_newchecks[9]++;
638                                 } else
639                                         sbappend(&so->so_rcv.sb, m);
640                         } else
641 #endif
642                         if (packetp) {
643                                 sbappendrecord(&so->so_rcv.sb, m);
644                         } else {
645                                 cb->s_rhdr = *mtod(m, struct spxhdr *);
646                                 m->m_data += SPINC;
647                                 m->m_len -= SPINC;
648                                 m->m_pkthdr.len -= SPINC;
649                                 sbappend(&so->so_rcv.sb, m);
650                         }
651                   } else
652                         break;
653         }
654         if (wakeup)
655                 sorwakeup(so);
656         return (0);
657 }
658
659 void
660 spx_ctlinput(netmsg_t msg)
661 {
662         /*struct socket *so = msg->base.nm_so;*/
663         int cmd = msg->ctlinput.nm_cmd;
664         struct sockaddr *arg_as_sa = msg->ctlinput.nm_arg;
665         caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
666         struct sockaddr_ipx *sipx;
667
668         if (cmd < 0 || cmd > PRC_NCMDS)
669                 goto out;
670
671         switch (cmd) {
672         case PRC_ROUTEDEAD:
673                 break;
674         case PRC_IFDOWN:
675         case PRC_HOSTDEAD:
676         case PRC_HOSTUNREACH:
677                 sipx = (struct sockaddr_ipx *)arg;
678                 if (sipx->sipx_family != AF_IPX)
679                         break;
680                 break;
681         default:
682                 break;
683         }
684 out:
685         lwkt_replymsg(&msg->lmsg, 0);
686 }
687
688 static int
689 spx_output(struct spxpcb *cb, struct mbuf *m0)
690 {
691         struct socket *so = cb->s_ipxpcb->ipxp_socket;
692         struct mbuf *m = NULL;
693         struct spx *si = NULL;
694         struct signalsockbuf *ssb = &so->so_snd;
695         int len = 0, win, rcv_win;
696         short span, off, recordp = 0;
697         u_short alo;
698         int error = 0, sendalot;
699 #ifdef notdef
700         int idle;
701 #endif
702         struct mbuf *mprev;
703
704         if (m0 != NULL) {
705                 int mtu = cb->s_mtu;
706                 int datalen;
707                 /*
708                  * Make sure that packet isn't too big.
709                  */
710                 for (m = m0; m != NULL; m = m->m_next) {
711                         mprev = m;
712                         len += m->m_len;
713                         if (m->m_flags & M_EOR)
714                                 recordp = 1;
715                 }
716                 datalen = (cb->s_flags & SF_HO) ?
717                                 len - sizeof(struct spxhdr) : len;
718                 if (datalen > mtu) {
719                         if (cb->s_flags & SF_PI) {
720                                 m_freem(m0);
721                                 return (EMSGSIZE);
722                         } else {
723                                 int oldEM = cb->s_cc & SPX_EM;
724
725                                 cb->s_cc &= ~SPX_EM;
726                                 while (len > mtu) {
727                                         /*
728                                          * Here we are only being called
729                                          * from usrreq(), so it is OK to
730                                          * block.
731                                          */
732                                         m = m_copym(m0, 0, mtu, MB_WAIT);
733                                         if (cb->s_flags & SF_NEWCALL) {
734                                             struct mbuf *mm = m;
735                                             spx_newchecks[7]++;
736                                             while (mm != NULL) {
737                                                 mm->m_flags &= ~M_EOR;
738                                                 mm = mm->m_next;
739                                             }
740                                         }
741                                         error = spx_output(cb, m);
742                                         if (error) {
743                                                 cb->s_cc |= oldEM;
744                                                 m_freem(m0);
745                                                 return (error);
746                                         }
747                                         m_adj(m0, mtu);
748                                         len -= mtu;
749                                 }
750                                 cb->s_cc |= oldEM;
751                         }
752                 }
753                 /*
754                  * Force length even, by adding a "garbage byte" if
755                  * necessary.
756                  */
757                 if (len & 1) {
758                         m = mprev;
759                         if (M_TRAILINGSPACE(m) >= 1)
760                                 m->m_len++;
761                         else {
762                                 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
763
764                                 if (m1 == NULL) {
765                                         m_freem(m0);
766                                         return (ENOBUFS);
767                                 }
768                                 m1->m_len = 1;
769                                 *(mtod(m1, u_char *)) = 0;
770                                 m->m_next = m1;
771                         }
772                 }
773                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
774                 if (m == NULL) {
775                         m_freem(m0);
776                         return (ENOBUFS);
777                 }
778                 /*
779                  * Fill in mbuf with extended SP header
780                  * and addresses and length put into network format.
781                  */
782                 MH_ALIGN(m, sizeof(struct spx));
783                 m->m_len = sizeof(struct spx);
784                 m->m_next = m0;
785                 si = mtod(m, struct spx *);
786                 si->si_i = *cb->s_ipx;
787                 si->si_s = cb->s_shdr;
788                 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
789                         struct spxhdr *sh;
790                         if (m0->m_len < sizeof(*sh)) {
791                                 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
792                                         m_free(m);
793                                         m_freem(m0);
794                                         return (EINVAL);
795                                 }
796                                 m->m_next = m0;
797                         }
798                         sh = mtod(m0, struct spxhdr *);
799                         si->si_dt = sh->spx_dt;
800                         si->si_cc |= sh->spx_cc & SPX_EM;
801                         m0->m_len -= sizeof(*sh);
802                         m0->m_data += sizeof(*sh);
803                         len -= sizeof(*sh);
804                 }
805                 len += sizeof(*si);
806                 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
807                         si->si_cc |= SPX_EM;
808                         spx_newchecks[8]++;
809                 }
810                 if (cb->s_oobflags & SF_SOOB) {
811                         /*
812                          * Per jqj@cornell:
813                          * make sure OB packets convey exactly 1 byte.
814                          * If the packet is 1 byte or larger, we
815                          * have already guaranted there to be at least
816                          * one garbage byte for the checksum, and
817                          * extra bytes shouldn't hurt!
818                          */
819                         if (len > sizeof(*si)) {
820                                 si->si_cc |= SPX_OB;
821                                 len = (1 + sizeof(*si));
822                         }
823                 }
824                 si->si_len = htons((u_short)len);
825                 m->m_pkthdr.len = ((len - 1) | 1) + 1;
826                 /*
827                  * queue stuff up for output
828                  */
829                 sbappendrecord(&ssb->sb, m);
830                 cb->s_seq++;
831         }
832 #ifdef notdef
833         idle = (cb->s_smax == (cb->s_rack - 1));
834 #endif
835 again:
836         sendalot = 0;
837         off = cb->s_snxt - cb->s_rack;
838         win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
839
840         /*
841          * If in persist timeout with window of 0, send a probe.
842          * Otherwise, if window is small but nonzero
843          * and timer expired, send what we can and go into
844          * transmit state.
845          */
846         if (cb->s_force == 1 + SPXT_PERSIST) {
847                 if (win != 0) {
848                         cb->s_timer[SPXT_PERSIST] = 0;
849                         cb->s_rxtshift = 0;
850                 }
851         }
852         span = cb->s_seq - cb->s_rack;
853         len = min(span, win) - off;
854
855         if (len < 0) {
856                 /*
857                  * Window shrank after we went into it.
858                  * If window shrank to 0, cancel pending
859                  * restransmission and pull s_snxt back
860                  * to (closed) window.  We will enter persist
861                  * state below.  If the widndow didn't close completely,
862                  * just wait for an ACK.
863                  */
864                 len = 0;
865                 if (win == 0) {
866                         cb->s_timer[SPXT_REXMT] = 0;
867                         cb->s_snxt = cb->s_rack;
868                 }
869         }
870         if (len > 1)
871                 sendalot = 1;
872         rcv_win = ssb_space(&so->so_rcv);
873
874         /*
875          * Send if we owe peer an ACK.
876          */
877         if (cb->s_oobflags & SF_SOOB) {
878                 /*
879                  * must transmit this out of band packet
880                  */
881                 cb->s_oobflags &= ~ SF_SOOB;
882                 sendalot = 1;
883                 spxstat.spxs_sndurg++;
884                 goto found;
885         }
886         if (cb->s_flags & SF_ACKNOW)
887                 goto send;
888         if (cb->s_state < TCPS_ESTABLISHED)
889                 goto send;
890         /*
891          * Silly window can't happen in spx.
892          * Code from tcp deleted.
893          */
894         if (len)
895                 goto send;
896         /*
897          * Compare available window to amount of window
898          * known to peer (as advertised window less
899          * next expected input.)  If the difference is at least two
900          * packets or at least 35% of the mximum possible window,
901          * then want to send a window update to peer.
902          */
903         if (rcv_win > 0) {
904                 u_short delta =  1 + cb->s_alo - cb->s_ack;
905                 int adv = rcv_win - (delta * cb->s_mtu);
906                 
907                 if ((so->so_rcv.ssb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
908                     (100 * adv / so->so_rcv.ssb_hiwat >= 35)) {
909                         spxstat.spxs_sndwinup++;
910                         cb->s_flags |= SF_ACKNOW;
911                         goto send;
912                 }
913
914         }
915         /*
916          * Many comments from tcp_output.c are appropriate here
917          * including . . .
918          * If send window is too small, there is data to transmit, and no
919          * retransmit or persist is pending, then go to persist state.
920          * If nothing happens soon, send when timer expires:
921          * if window is nonzero, transmit what we can,
922          * otherwise send a probe.
923          */
924         if (so->so_snd.ssb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
925                 cb->s_timer[SPXT_PERSIST] == 0) {
926                         cb->s_rxtshift = 0;
927                         spx_setpersist(cb);
928         }
929         /*
930          * No reason to send a packet, just return.
931          */
932         cb->s_outx = 1;
933         return (0);
934
935 send:
936         /*
937          * Find requested packet.
938          */
939         si = NULL;
940         if (len > 0) {
941                 cb->s_want = cb->s_snxt;
942                 for (m = ssb->ssb_mb; m != NULL; m = m->m_nextpkt) {
943                         si = mtod(m, struct spx *);
944                         if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
945                                 break;
946                 }
947         found:
948                 if (si != NULL) {
949                         if (si->si_seq == cb->s_snxt)
950                                         cb->s_snxt++;
951                                 else
952                                         spxstat.spxs_sndvoid++, si = NULL;
953                 }
954         }
955         /*
956          * update window
957          */
958         if (rcv_win < 0)
959                 rcv_win = 0;
960         alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
961         if (SSEQ_LT(alo, cb->s_alo)) 
962                 alo = cb->s_alo;
963
964         if (si != NULL) {
965                 /*
966                  * must make a copy of this packet for
967                  * ipx_output to monkey with
968                  */
969                 m = m_copy(m, 0, (int)M_COPYALL);
970                 if (m == NULL) {
971                         return (ENOBUFS);
972                 }
973                 si = mtod(m, struct spx *);
974                 if (SSEQ_LT(si->si_seq, cb->s_smax))
975                         spxstat.spxs_sndrexmitpack++;
976                 else
977                         spxstat.spxs_sndpack++;
978         } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
979                 /*
980                  * Must send an acknowledgement or a probe
981                  */
982                 if (cb->s_force)
983                         spxstat.spxs_sndprobe++;
984                 if (cb->s_flags & SF_ACKNOW)
985                         spxstat.spxs_sndacks++;
986                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
987                 if (m == NULL)
988                         return (ENOBUFS);
989                 /*
990                  * Fill in mbuf with extended SP header
991                  * and addresses and length put into network format.
992                  */
993                 MH_ALIGN(m, sizeof(struct spx));
994                 m->m_len = sizeof(*si);
995                 m->m_pkthdr.len = sizeof(*si);
996                 si = mtod(m, struct spx *);
997                 si->si_i = *cb->s_ipx;
998                 si->si_s = cb->s_shdr;
999                 si->si_seq = cb->s_smax + 1;
1000                 si->si_len = htons(sizeof(*si));
1001                 si->si_cc |= SPX_SP;
1002         } else {
1003                 cb->s_outx = 3;
1004                 if (so->so_options & SO_DEBUG || traceallspxs)
1005                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1006                 return (0);
1007         }
1008         /*
1009          * Stuff checksum and output datagram.
1010          */
1011         if ((si->si_cc & SPX_SP) == 0) {
1012                 if (cb->s_force != (1 + SPXT_PERSIST) ||
1013                     cb->s_timer[SPXT_PERSIST] == 0) {
1014                         /*
1015                          * If this is a new packet and we are not currently 
1016                          * timing anything, time this one.
1017                          */
1018                         if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1019                                 cb->s_smax = si->si_seq;
1020                                 if (cb->s_rtt == 0) {
1021                                         spxstat.spxs_segstimed++;
1022                                         cb->s_rtseq = si->si_seq;
1023                                         cb->s_rtt = 1;
1024                                 }
1025                         }
1026                         /*
1027                          * Set rexmt timer if not currently set,
1028                          * Initial value for retransmit timer is smoothed
1029                          * round-trip time + 2 * round-trip time variance.
1030                          * Initialize shift counter which is used for backoff
1031                          * of retransmit time.
1032                          */
1033                         if (cb->s_timer[SPXT_REXMT] == 0 &&
1034                             cb->s_snxt != cb->s_rack) {
1035                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1036                                 if (cb->s_timer[SPXT_PERSIST]) {
1037                                         cb->s_timer[SPXT_PERSIST] = 0;
1038                                         cb->s_rxtshift = 0;
1039                                 }
1040                         }
1041                 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1042                         cb->s_smax = si->si_seq;
1043                 }
1044         } else if (cb->s_state < TCPS_ESTABLISHED) {
1045                 if (cb->s_rtt == 0)
1046                         cb->s_rtt = 1; /* Time initial handshake */
1047                 if (cb->s_timer[SPXT_REXMT] == 0)
1048                         cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1049         }
1050         {
1051                 /*
1052                  * Do not request acks when we ack their data packets or
1053                  * when we do a gratuitous window update.
1054                  */
1055                 if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1056                                 si->si_cc |= SPX_SA;
1057                 si->si_seq = htons(si->si_seq);
1058                 si->si_alo = htons(alo);
1059                 si->si_ack = htons(cb->s_ack);
1060
1061                 if (ipxcksum) {
1062                         si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1063                 } else
1064                         si->si_sum = 0xffff;
1065
1066                 cb->s_outx = 4;
1067                 if (so->so_options & SO_DEBUG || traceallspxs)
1068                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1069
1070                 if (so->so_options & SO_DONTROUTE)
1071                         error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1072                 else
1073                         error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1074         }
1075         if (error) {
1076                 return (error);
1077         }
1078         spxstat.spxs_sndtotal++;
1079         /*
1080          * Data sent (as far as we can tell).
1081          * If this advertises a larger window than any other segment,
1082          * then remember the size of the advertized window.
1083          * Any pending ACK has now been sent.
1084          */
1085         cb->s_force = 0;
1086         cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1087         if (SSEQ_GT(alo, cb->s_alo))
1088                 cb->s_alo = alo;
1089         if (sendalot)
1090                 goto again;
1091         cb->s_outx = 5;
1092         return (0);
1093 }
1094
1095 static int spx_do_persist_panics = 0;
1096
1097 static void
1098 spx_setpersist(struct spxpcb *cb)
1099 {
1100         int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1101
1102         if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1103                 panic("spx_output REXMT");
1104         /*
1105          * Start/restart persistance timer.
1106          */
1107         SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1108             t*spx_backoff[cb->s_rxtshift],
1109             SPXTV_PERSMIN, SPXTV_PERSMAX);
1110         if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1111                 cb->s_rxtshift++;
1112 }
1113
1114 void
1115 spx_ctloutput(netmsg_t msg)
1116 {
1117         struct socket *so = msg->base.nm_so;
1118         struct ipxpcb *ipxp = sotoipxpcb(so);
1119         struct sockopt *sopt = msg->ctloutput.nm_sopt;
1120         struct spxpcb *cb;
1121         int mask, error;
1122         short soptval;
1123         u_short usoptval;
1124         int optval;
1125
1126         error = 0;
1127
1128         if (sopt->sopt_level != IPXPROTO_SPX) {
1129                 /* This will have to be changed when we do more general
1130                    stacking of protocols */
1131                 ipx_ctloutput(msg);
1132                 /* msg now invalid */
1133                 return;
1134         }
1135         if (ipxp == NULL) {
1136                 error = EINVAL;
1137                 goto out;
1138         }
1139         cb = ipxtospxpcb(ipxp);
1140
1141         switch (sopt->sopt_dir) {
1142         case SOPT_GET:
1143                 switch (sopt->sopt_name) {
1144                 case SO_HEADERS_ON_INPUT:
1145                         mask = SF_HI;
1146                         goto get_flags;
1147
1148                 case SO_HEADERS_ON_OUTPUT:
1149                         mask = SF_HO;
1150                 get_flags:
1151                         soptval = cb->s_flags & mask;
1152                         error = sooptcopyout(sopt, &soptval, sizeof soptval);
1153                         break;
1154
1155                 case SO_MTU:
1156                         usoptval = cb->s_mtu;
1157                         error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1158                         break;
1159
1160                 case SO_LAST_HEADER:
1161                         error = sooptcopyout(sopt, &cb->s_rhdr, 
1162                                              sizeof cb->s_rhdr);
1163                         break;
1164
1165                 case SO_DEFAULT_HEADERS:
1166                         error = sooptcopyout(sopt, &cb->s_shdr, 
1167                                              sizeof cb->s_shdr);
1168                         break;
1169
1170                 default:
1171                         error = ENOPROTOOPT;
1172                 }
1173                 break;
1174
1175         case SOPT_SET:
1176                 switch (sopt->sopt_name) {
1177                         /* XXX why are these shorts on get and ints on set?
1178                            that doesn't make any sense... */
1179                 case SO_HEADERS_ON_INPUT:
1180                         mask = SF_HI;
1181                         goto set_head;
1182
1183                 case SO_HEADERS_ON_OUTPUT:
1184                         mask = SF_HO;
1185                 set_head:
1186                         error = sooptcopyin(sopt, &optval, sizeof optval,
1187                                             sizeof optval);
1188                         if (error)
1189                                 break;
1190
1191                         if (cb->s_flags & SF_PI) {
1192                                 if (optval)
1193                                         cb->s_flags |= mask;
1194                                 else
1195                                         cb->s_flags &= ~mask;
1196                         } else error = EINVAL;
1197                         break;
1198
1199                 case SO_MTU:
1200                         error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1201                                             sizeof usoptval);
1202                         if (error)
1203                                 break;
1204                         cb->s_mtu = usoptval;
1205                         break;
1206
1207 #ifdef SF_NEWCALL
1208                 case SO_NEWCALL:
1209                         error = sooptcopyin(sopt, &optval, sizeof optval,
1210                                             sizeof optval);
1211                         if (error)
1212                                 break;
1213                         if (optval) {
1214                                 cb->s_flags2 |= SF_NEWCALL;
1215                                 spx_newchecks[5]++;
1216                         } else {
1217                                 cb->s_flags2 &= ~SF_NEWCALL;
1218                                 spx_newchecks[6]++;
1219                         }
1220                         break;
1221 #endif
1222
1223                 case SO_DEFAULT_HEADERS:
1224                         {
1225                                 struct spxhdr sp;
1226
1227                                 error = sooptcopyin(sopt, &sp, sizeof sp,
1228                                                     sizeof sp);
1229                                 if (error)
1230                                         break;
1231                                 cb->s_dt = sp.spx_dt;
1232                                 cb->s_cc = sp.spx_cc & SPX_EM;
1233                         }
1234                         break;
1235
1236                 default:
1237                         error = ENOPROTOOPT;
1238                 }
1239                 break;
1240         }
1241 out:
1242         lwkt_replymsg(&msg->lmsg, error);
1243 }
1244
1245 /*
1246  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1247  *       will sofree() it when we return.
1248  */
1249 static void
1250 spx_usr_abort(netmsg_t msg)
1251 {
1252         struct socket *so = msg->base.nm_so;
1253         struct ipxpcb *ipxp;
1254         struct spxpcb *cb;
1255
1256         ipxp = sotoipxpcb(so);
1257         cb = ipxtospxpcb(ipxp);
1258
1259         spx_drop(cb, ECONNABORTED);
1260
1261         lwkt_replymsg(&msg->lmsg, 0);
1262 }
1263
1264 /*
1265  * Accept a connection.  Essentially all the work is
1266  * done at higher levels; just return the address
1267  * of the peer, storing through addr.
1268  */
1269 static void
1270 spx_accept(netmsg_t msg)
1271 {
1272         struct socket *so = msg->base.nm_so;
1273         struct sockaddr **nam = msg->accept.nm_nam;
1274         struct ipxpcb *ipxp;
1275         struct sockaddr_ipx *sipx, ssipx;
1276
1277         ipxp = sotoipxpcb(so);
1278         sipx = &ssipx;
1279         bzero(sipx, sizeof *sipx);
1280         sipx->sipx_len = sizeof *sipx;
1281         sipx->sipx_family = AF_IPX;
1282         sipx->sipx_addr = ipxp->ipxp_faddr;
1283         *nam = dup_sockaddr((struct sockaddr *)sipx);
1284
1285         lwkt_replymsg(&msg->lmsg, 0);
1286 }
1287
1288 static int
1289 spx_attach_oncpu(struct socket *so, int proto, struct pru_attach_info *ai)
1290 {
1291         struct ipxpcb *ipxp;
1292         struct spxpcb *cb;
1293         struct mbuf *mm;
1294         struct signalsockbuf *ssb;
1295         int error;
1296
1297         ipxp = sotoipxpcb(so);
1298         cb = ipxtospxpcb(ipxp);
1299
1300         crit_enter();
1301         if (ipxp != NULL) {
1302                 error = EISCONN;
1303                 goto spx_attach_end;
1304         }
1305         error = ipx_pcballoc(so, &ipxpcb_list);
1306         if (error)
1307                 goto spx_attach_end;
1308         if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
1309                 error = soreserve(so, (u_long) 3072, (u_long) 3072,
1310                                   ai->sb_rlimit);
1311                 if (error)
1312                         goto spx_attach_end;
1313         }
1314         ipxp = sotoipxpcb(so);
1315
1316         cb = kmalloc(sizeof *cb, M_PCB, M_INTWAIT | M_ZERO);
1317         ssb = &so->so_snd;
1318
1319         mm = m_getclr(MB_DONTWAIT, MT_HEADER);
1320         if (mm == NULL) {
1321                 kfree(cb, M_PCB);
1322                 error = ENOBUFS;
1323                 goto spx_attach_end;
1324         }
1325         cb->s_ipx_m = mm;
1326         cb->s_ipx = mtod(mm, struct ipx *);
1327         cb->s_state = TCPS_LISTEN;
1328         cb->s_smax = -1;
1329         cb->s_swl1 = -1;
1330         LIST_INIT(&cb->s_q);
1331         cb->s_ipxpcb = ipxp;
1332         cb->s_mtu = 576 - sizeof(struct spx);
1333         cb->s_cwnd = ssb_space(ssb) * CUNIT / cb->s_mtu;
1334         cb->s_ssthresh = cb->s_cwnd;
1335         cb->s_cwmx = ssb_space(ssb) * CUNIT / (2 * sizeof(struct spx));
1336         /* Above is recomputed when connecting to account
1337            for changed buffering or mtu's */
1338         cb->s_rtt = SPXTV_SRTTBASE;
1339         cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1340         SPXT_RANGESET(cb->s_rxtcur,
1341             ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1342             SPXTV_MIN, SPXTV_REXMTMAX);
1343         ipxp->ipxp_pcb = (caddr_t)cb; 
1344 spx_attach_end:
1345         crit_exit();
1346         return error;
1347 }
1348
1349 static void
1350 spx_attach(netmsg_t msg)
1351 {
1352         int error;
1353
1354         error = spx_attach_oncpu(msg->base.nm_so,
1355                                  msg->attach.nm_proto,
1356                                  msg->attach.nm_ai);
1357         lwkt_replymsg(&msg->lmsg, error);
1358 }
1359
1360
1361 static void
1362 spx_bind(netmsg_t msg)
1363 {  
1364         struct socket *so = msg->base.nm_so;
1365         struct ipxpcb *ipxp;
1366         int error;
1367
1368         ipxp = sotoipxpcb(so);
1369
1370         error = ipx_pcbbind(ipxp, msg->bind.nm_nam, msg->bind.nm_td);
1371         lwkt_replymsg(&msg->lmsg, error);
1372 }  
1373    
1374 /*
1375  * Initiate connection to peer.
1376  * Enter SYN_SENT state, and mark socket as connecting.
1377  * Start keep-alive timer, setup prototype header,
1378  * Send initial system packet requesting connection.
1379  */
1380 static void
1381 spx_connect(netmsg_t msg)
1382 {
1383         struct socket *so = msg->base.nm_so;
1384         struct sockaddr *nam = msg->connect.nm_nam;
1385         struct thread *td = msg->connect.nm_td;
1386         struct ipxpcb *ipxp;
1387         struct spxpcb *cb;
1388         int error;
1389
1390         ipxp = sotoipxpcb(so);
1391         cb = ipxtospxpcb(ipxp);
1392
1393         crit_enter();
1394         if (ipxp->ipxp_lport == 0) {
1395                 error = ipx_pcbbind(ipxp, NULL, td);
1396                 if (error)
1397                         goto spx_connect_end;
1398         }
1399         error = ipx_pcbconnect(ipxp, nam, td);
1400         if (error)
1401                 goto spx_connect_end;
1402         soisconnecting(so);
1403         spxstat.spxs_connattempt++;
1404         cb->s_state = TCPS_SYN_SENT;
1405         cb->s_did = 0;
1406         spx_template(cb);
1407         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1408         cb->s_force = 1 + SPXTV_KEEP;
1409         /*
1410          * Other party is required to respond to
1411          * the port I send from, but he is not
1412          * required to answer from where I am sending to,
1413          * so allow wildcarding.
1414          * original port I am sending to is still saved in
1415          * cb->s_dport.
1416          */
1417         ipxp->ipxp_fport = 0;
1418         error = spx_output(cb, NULL);
1419 spx_connect_end:
1420         crit_exit();
1421         lwkt_replymsg(&msg->lmsg, error);
1422 }
1423
1424 static void
1425 spx_detach(netmsg_t msg)
1426 {
1427         struct socket *so = msg->base.nm_so;
1428         struct ipxpcb *ipxp;
1429         struct spxpcb *cb;
1430         int error;
1431
1432         ipxp = sotoipxpcb(so);
1433         cb = ipxtospxpcb(ipxp);
1434
1435         if (ipxp) {
1436                 crit_enter();
1437                 if (cb->s_state > TCPS_LISTEN)
1438                         spx_disconnect(cb);
1439                 else
1440                         spx_close(cb);
1441                 crit_exit();
1442                 error = 0;
1443         } else {
1444                 error = ENOTCONN;
1445         }
1446         lwkt_replymsg(&msg->lmsg, error);
1447 }
1448
1449 /*
1450  * We may decide later to implement connection closing
1451  * handshaking at the spx level optionally.
1452  * here is the hook to do it:
1453  */
1454 static void
1455 spx_usr_disconnect(netmsg_t msg)
1456 {
1457         struct socket *so = msg->base.nm_so;
1458         struct ipxpcb *ipxp;
1459         struct spxpcb *cb;
1460
1461         ipxp = sotoipxpcb(so);
1462         cb = ipxtospxpcb(ipxp);
1463
1464         crit_enter();
1465         spx_disconnect(cb);
1466         crit_exit();
1467
1468         lwkt_replymsg(&msg->lmsg, 0);
1469 }
1470
1471 static void
1472 spx_listen(netmsg_t msg)
1473 {
1474         struct socket *so = msg->base.nm_so;
1475         struct ipxpcb *ipxp;
1476         struct spxpcb *cb;
1477         int error;
1478
1479         error = 0;
1480         ipxp = sotoipxpcb(so);
1481         cb = ipxtospxpcb(ipxp);
1482
1483         if (ipxp->ipxp_lport == 0)
1484                 error = ipx_pcbbind(ipxp, NULL, msg->listen.nm_td);
1485         if (error == 0)
1486                 cb->s_state = TCPS_LISTEN;
1487         lwkt_replymsg(&msg->lmsg, error);
1488 }
1489
1490 /*
1491  * After a receive, possibly send acknowledgment
1492  * updating allocation.
1493  */
1494 static void
1495 spx_rcvd(netmsg_t msg)
1496 {
1497         struct socket *so = msg->base.nm_so;
1498         struct ipxpcb *ipxp;
1499         struct spxpcb *cb;
1500
1501         ipxp = sotoipxpcb(so);
1502         cb = ipxtospxpcb(ipxp);
1503
1504         crit_enter();
1505         cb->s_flags |= SF_RVD;
1506         spx_output(cb, NULL);
1507         cb->s_flags &= ~SF_RVD;
1508         crit_exit();
1509
1510         lwkt_replymsg(&msg->lmsg, 0);
1511 }
1512
1513 static void
1514 spx_rcvoob(netmsg_t msg)
1515 {
1516         struct mbuf *m = msg->rcvoob.nm_m;
1517         struct socket *so = msg->base.nm_so;
1518         struct ipxpcb *ipxp;
1519         struct spxpcb *cb;
1520         int error;
1521
1522         ipxp = sotoipxpcb(so);
1523         cb = ipxtospxpcb(ipxp);
1524
1525         if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1526             (so->so_state & SS_RCVATMARK)) {
1527                 m->m_len = 1;
1528                 *mtod(m, caddr_t) = cb->s_iobc;
1529                 error = 0;
1530         } else {
1531                 error = EINVAL;
1532         }
1533         lwkt_replymsg(&msg->lmsg, error);
1534 }
1535
1536 static void
1537 spx_send(netmsg_t msg)
1538 {
1539         struct socket *so = msg->base.nm_so;
1540         struct mbuf *m = msg->send.nm_m;
1541         struct mbuf *controlp = msg->send.nm_control;
1542         int flags = msg->send.nm_flags;
1543         struct ipxpcb *ipxp;
1544         struct spxpcb *cb;
1545         int error;
1546
1547         error = 0;
1548         ipxp = sotoipxpcb(so);
1549         cb = ipxtospxpcb(ipxp);
1550
1551         crit_enter();
1552         if (flags & PRUS_OOB) {
1553                 if (ssb_space(&so->so_snd) < -512) {
1554                         error = ENOBUFS;
1555                         goto spx_send_end;
1556                 }
1557                 cb->s_oobflags |= SF_SOOB;
1558         }
1559         if (controlp != NULL) {
1560                 u_short *p = mtod(controlp, u_short *);
1561                 spx_newchecks[2]++;
1562                 if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1563                         cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1564                         spx_newchecks[3]++;
1565                 }
1566                 m_freem(controlp);
1567         }
1568         controlp = NULL;
1569         error = spx_output(cb, m);
1570         m = NULL;
1571 spx_send_end:
1572         if (controlp != NULL)
1573                 m_freem(controlp);
1574         if (m != NULL)
1575                 m_freem(m);
1576         crit_exit();
1577         lwkt_replymsg(&msg->lmsg, error);
1578 }
1579
1580 static void
1581 spx_shutdown(netmsg_t msg)
1582 {
1583         struct socket *so = msg->base.nm_so;
1584         struct ipxpcb *ipxp;
1585         struct spxpcb *cb;
1586         int error;
1587
1588         error = 0;
1589         ipxp = sotoipxpcb(so);
1590         cb = ipxtospxpcb(ipxp);
1591
1592         crit_enter();
1593         socantsendmore(so);
1594         cb = spx_usrclosed(cb);
1595         if (cb != NULL)
1596                 error = spx_output(cb, NULL);
1597         crit_exit();
1598         lwkt_replymsg(&msg->lmsg, error);
1599 }
1600
1601 static void
1602 spx_sp_attach(netmsg_t msg)
1603 {
1604         struct socket *so = msg->base.nm_so;
1605         struct ipxpcb *ipxp;
1606         int error;
1607
1608         error = spx_attach_oncpu(so, msg->attach.nm_proto, msg->attach.nm_ai);
1609         if (error == 0) {
1610                 ipxp = sotoipxpcb(so);
1611                 ((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1612                                         (SF_HI | SF_HO | SF_PI);
1613         }
1614         lwkt_replymsg(&msg->lmsg, error);
1615 }
1616
1617 /*
1618  * Create template to be used to send spx packets on a connection.
1619  * Called after host entry created, fills
1620  * in a skeletal spx header (choosing connection id),
1621  * minimizing the amount of work necessary when the connection is used.
1622  */
1623 static void
1624 spx_template(struct spxpcb *cb)
1625 {
1626         struct ipxpcb *ipxp = cb->s_ipxpcb;
1627         struct ipx *ipx = cb->s_ipx;
1628         struct signalsockbuf *ssb = &(ipxp->ipxp_socket->so_snd);
1629
1630         ipx->ipx_pt = IPXPROTO_SPX;
1631         ipx->ipx_sna = ipxp->ipxp_laddr;
1632         ipx->ipx_dna = ipxp->ipxp_faddr;
1633         cb->s_sid = htons(spx_iss);
1634         spx_iss += SPX_ISSINCR/2;
1635         cb->s_alo = 1;
1636         cb->s_cwnd = (ssb_space(ssb) * CUNIT) / cb->s_mtu;
1637         cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1638                                         of large packets */
1639         cb->s_cwmx = (ssb_space(ssb) * CUNIT) / (2 * sizeof(struct spx));
1640         cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1641                 /* But allow for lots of little packets as well */
1642 }
1643
1644 /*
1645  * Close a SPIP control block:
1646  *      discard spx control block itself
1647  *      discard ipx protocol control block
1648  *      wake up any sleepers
1649  */
1650 static struct spxpcb *
1651 spx_close(struct spxpcb *cb)
1652 {
1653         struct spx_q *q;
1654         struct ipxpcb *ipxp = cb->s_ipxpcb;
1655         struct socket *so = ipxp->ipxp_socket;
1656
1657         while (!LIST_EMPTY(&cb->s_q)) {
1658                 q = LIST_FIRST(&cb->s_q);
1659                 LIST_REMOVE(q, sq_entry);
1660                 m_freem(q->si_mbuf);
1661                 kfree(q, M_SPX_Q);
1662         }
1663         m_free(cb->s_ipx_m);
1664         kfree(cb, M_PCB);
1665         ipxp->ipxp_pcb = 0;
1666         soisdisconnected(so);
1667         ipx_pcbdetach(ipxp);
1668         spxstat.spxs_closed++;
1669         return (NULL);
1670 }
1671
1672 /*
1673  *      Someday we may do level 3 handshaking
1674  *      to close a connection or send a xerox style error.
1675  *      For now, just close.
1676  */
1677 static struct spxpcb *
1678 spx_usrclosed(struct spxpcb *cb)
1679 {
1680         return (spx_close(cb));
1681 }
1682
1683 static struct spxpcb *
1684 spx_disconnect(struct spxpcb *cb)
1685 {
1686         return (spx_close(cb));
1687 }
1688
1689 /*
1690  * Drop connection, reporting
1691  * the specified error.
1692  */
1693 static struct spxpcb *
1694 spx_drop(struct spxpcb *cb, int error)
1695 {
1696         struct socket *so = cb->s_ipxpcb->ipxp_socket;
1697
1698         /*
1699          * someday, in the xerox world
1700          * we will generate error protocol packets
1701          * announcing that the socket has gone away.
1702          */
1703         if (TCPS_HAVERCVDSYN(cb->s_state)) {
1704                 spxstat.spxs_drops++;
1705                 cb->s_state = TCPS_CLOSED;
1706                 /*tcp_output(cb);*/
1707         } else
1708                 spxstat.spxs_conndrops++;
1709         so->so_error = error;
1710         return (spx_close(cb));
1711 }
1712
1713 /*
1714  * Fast timeout routine for processing delayed acks
1715  */
1716 void
1717 spx_fasttimo(void)
1718 {
1719         struct ipxpcb *ipxp;
1720         struct spxpcb *cb;
1721
1722         crit_enter();
1723         LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1724                 if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1725                     (cb->s_flags & SF_DELACK)) {
1726                         cb->s_flags &= ~SF_DELACK;
1727                         cb->s_flags |= SF_ACKNOW;
1728                         spxstat.spxs_delack++;
1729                         spx_output(cb, NULL);
1730                 }
1731         }
1732         crit_exit();
1733 }
1734
1735 /*
1736  * spx protocol timeout routine called every 500 ms.
1737  * Updates the timers in all active pcb's and
1738  * causes finite state machine actions if timers expire.
1739  */
1740 void
1741 spx_slowtimo(void)
1742 {
1743         struct ipxpcb *ip, *ip_temp;
1744         struct spxpcb *cb;
1745         int i;
1746
1747         /*
1748          * Search through tcb's and update active timers.
1749          */
1750         crit_enter();
1751         LIST_FOREACH_MUTABLE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1752                 cb = ipxtospxpcb(ip);
1753                 if (cb == NULL)
1754                         continue;
1755                 for (i = 0; i < SPXT_NTIMERS; i++) {
1756                         if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1757                                 if (spx_timers(cb, i) == NULL)
1758                                         continue;
1759                         }
1760                 }
1761                 cb->s_idle++;
1762                 if (cb->s_rtt)
1763                         cb->s_rtt++;
1764         }
1765         spx_iss += SPX_ISSINCR/PR_SLOWHZ;               /* increment iss */
1766         crit_exit();
1767 }
1768
1769 /*
1770  * SPX timer processing.
1771  */
1772 static struct spxpcb *
1773 spx_timers(struct spxpcb *cb, int timer)
1774 {
1775         long rexmt;
1776         int win;
1777
1778         cb->s_force = 1 + timer;
1779         switch (timer) {
1780
1781         /*
1782          * 2 MSL timeout in shutdown went off.  TCP deletes connection
1783          * control block.
1784          */
1785         case SPXT_2MSL:
1786                 kprintf("spx: SPXT_2MSL went off for no reason\n");
1787                 cb->s_timer[timer] = 0;
1788                 break;
1789
1790         /*
1791          * Retransmission timer went off.  Message has not
1792          * been acked within retransmit interval.  Back off
1793          * to a longer retransmit interval and retransmit one packet.
1794          */
1795         case SPXT_REXMT:
1796                 if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1797                         cb->s_rxtshift = SPX_MAXRXTSHIFT;
1798                         spxstat.spxs_timeoutdrop++;
1799                         cb = spx_drop(cb, ETIMEDOUT);
1800                         break;
1801                 }
1802                 spxstat.spxs_rexmttimeo++;
1803                 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1804                 rexmt *= spx_backoff[cb->s_rxtshift];
1805                 SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1806                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1807                 /*
1808                  * If we have backed off fairly far, our srtt
1809                  * estimate is probably bogus.  Clobber it
1810                  * so we'll take the next rtt measurement as our srtt;
1811                  * move the current srtt into rttvar to keep the current
1812                  * retransmit times until then.
1813                  */
1814                 if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1815                         cb->s_rttvar += (cb->s_srtt >> 2);
1816                         cb->s_srtt = 0;
1817                 }
1818                 cb->s_snxt = cb->s_rack;
1819                 /*
1820                  * If timing a packet, stop the timer.
1821                  */
1822                 cb->s_rtt = 0;
1823                 /*
1824                  * See very long discussion in tcp_timer.c about congestion
1825                  * window and sstrhesh
1826                  */
1827                 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1828                 if (win < 2)
1829                         win = 2;
1830                 cb->s_cwnd = CUNIT;
1831                 cb->s_ssthresh = win * CUNIT;
1832                 spx_output(cb, NULL);
1833                 break;
1834
1835         /*
1836          * Persistance timer into zero window.
1837          * Force a probe to be sent.
1838          */
1839         case SPXT_PERSIST:
1840                 spxstat.spxs_persisttimeo++;
1841                 spx_setpersist(cb);
1842                 spx_output(cb, NULL);
1843                 break;
1844
1845         /*
1846          * Keep-alive timer went off; send something
1847          * or drop connection if idle for too long.
1848          */
1849         case SPXT_KEEP:
1850                 spxstat.spxs_keeptimeo++;
1851                 if (cb->s_state < TCPS_ESTABLISHED)
1852                         goto dropit;
1853                 if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1854                         if (cb->s_idle >= SPXTV_MAXIDLE)
1855                                 goto dropit;
1856                         spxstat.spxs_keepprobe++;
1857                         spx_output(cb, NULL);
1858                 } else
1859                         cb->s_idle = 0;
1860                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1861                 break;
1862         dropit:
1863                 spxstat.spxs_keepdrops++;
1864                 cb = spx_drop(cb, ETIMEDOUT);
1865                 break;
1866         }
1867         return (cb);
1868 }