a1c93481b2330478dfaa79c4e22dcbdee3903efb
[dragonfly.git] / sys / netproto / ipx / spx_usrreq.c
1 /*
2  * Copyright (c) 1995, Mike Mitchell
3  * Copyright (c) 1984, 1985, 1986, 1987, 1993
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by the University of
17  *      California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)spx_usrreq.h
35  *
36  * $FreeBSD: src/sys/netipx/spx_usrreq.c,v 1.27.2.1 2001/02/22 09:44:18 bp Exp $
37  */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/proc.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/socketvar2.h>
49
50 #include <sys/thread2.h>
51 #include <sys/msgport2.h>
52
53 #include <net/route.h>
54 #include <netinet/tcp_fsm.h>
55
56 #include "ipx.h"
57 #include "ipx_pcb.h"
58 #include "ipx_var.h"
59 #include "spx.h"
60 #include "spx_timer.h"
61 #include "spx_var.h"
62 #include "spx_debug.h"
63
64 /*
65  * SPX protocol implementation.
66  */
67 static u_short  spx_iss;
68 static u_short  spx_newchecks[50];
69 static int      spx_hardnosed;
70 static int      spx_use_delack = 0;
71 static int      traceallspxs = 0;
72 static struct   spx     spx_savesi;
73 static struct   spx_istat spx_istat;
74
75 /* Following was struct spxstat spxstat; */
76 #ifndef spxstat 
77 #define spxstat spx_istat.newstats
78 #endif  
79
80 static int spx_backoff[SPX_MAXRXTSHIFT+1] =
81     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
82
83 static  struct spxpcb *spx_close(struct spxpcb *cb);
84 static  struct spxpcb *spx_disconnect(struct spxpcb *cb);
85 static  struct spxpcb *spx_drop(struct spxpcb *cb, int error);
86 static  int spx_output(struct spxpcb *cb, struct mbuf *m0);
87 static  int spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m);
88 static  void spx_setpersist(struct spxpcb *cb);
89 static  void spx_template(struct spxpcb *cb);
90 static  struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
91 static  struct spxpcb *spx_usrclosed(struct spxpcb *cb);
92
93 static  void spx_usr_abort(netmsg_t);
94 static  void spx_accept(netmsg_t);
95 static  void spx_attach(netmsg_t);
96 static  void spx_bind(netmsg_t);
97 static  void spx_connect(netmsg_t);
98 static  void spx_detach(netmsg_t);
99 static  void spx_usr_disconnect(netmsg_t);
100 static  void spx_listen(netmsg_t);
101 static  void spx_rcvd(netmsg_t);
102 static  void spx_rcvoob(netmsg_t);
103 static  void spx_send(netmsg_t);
104 static  void spx_shutdown(netmsg_t);
105 static  void spx_sp_attach(netmsg_t);
106
107 struct  pr_usrreqs spx_usrreqs = {
108         .pru_abort = spx_usr_abort,
109         .pru_accept = spx_accept,
110         .pru_attach = spx_attach,
111         .pru_bind = spx_bind,
112         .pru_connect = spx_connect,
113         .pru_connect2 = pr_generic_notsupp,
114         .pru_control = ipx_control,
115         .pru_detach = spx_detach,
116         .pru_disconnect = spx_usr_disconnect,
117         .pru_listen = spx_listen,
118         .pru_peeraddr = ipx_peeraddr,
119         .pru_rcvd = spx_rcvd,
120         .pru_rcvoob = spx_rcvoob,
121         .pru_send = spx_send,
122         .pru_sense = pru_sense_null,
123         .pru_shutdown = spx_shutdown,
124         .pru_sockaddr = ipx_sockaddr,
125         .pru_sosend = sosend,
126         .pru_soreceive = soreceive
127 };
128
129 struct  pr_usrreqs spx_usrreq_sps = {
130         .pru_abort = spx_usr_abort,
131         .pru_accept = spx_accept,
132         .pru_attach = spx_sp_attach,
133         .pru_bind = spx_bind,
134         .pru_connect = spx_connect,
135         .pru_connect2 = pr_generic_notsupp,
136         .pru_control = ipx_control,
137         .pru_detach = spx_detach,
138         .pru_disconnect = spx_usr_disconnect,
139         .pru_listen = spx_listen,
140         .pru_peeraddr = ipx_peeraddr,
141         .pru_rcvd = spx_rcvd,
142         .pru_rcvoob = spx_rcvoob,
143         .pru_send = spx_send,
144         .pru_sense = pru_sense_null,
145         .pru_shutdown = spx_shutdown,
146         .pru_sockaddr = ipx_sockaddr,
147         .pru_sosend = sosend,
148         .pru_soreceive = soreceive
149 };
150
151 static MALLOC_DEFINE(M_SPX_Q, "ipx_spx_q", "IPX Packet Management");
152
153 void
154 spx_init(void)
155 {
156
157         spx_iss = 1; /* WRONG !! should fish it out of TODR */
158 }
159
160 void
161 spx_input(struct mbuf *m, struct ipxpcb *ipxp)
162 {
163         struct spxpcb *cb;
164         struct spx *si;
165         struct socket *so;
166         int dropsocket = 0;
167         short ostate = 0;
168
169         spxstat.spxs_rcvtotal++;
170         if (ipxp == NULL) {
171                 panic("No ipxpcb in spx_input");
172                 return;
173         }
174
175         cb = ipxtospxpcb(ipxp);
176         if (cb == NULL)
177                 goto bad;
178
179         if (m->m_len < sizeof(struct spx)) {
180                 if ((m = m_pullup(m, sizeof(*si))) == NULL) {
181                         spxstat.spxs_rcvshort++;
182                         return;
183                 }
184         }
185         si = mtod(m, struct spx *);
186         si->si_seq = ntohs(si->si_seq);
187         si->si_ack = ntohs(si->si_ack);
188         si->si_alo = ntohs(si->si_alo);
189
190         so = ipxp->ipxp_socket;
191
192         if (so->so_options & SO_DEBUG || traceallspxs) {
193                 ostate = cb->s_state;
194                 spx_savesi = *si;
195         }
196         if (so->so_options & SO_ACCEPTCONN) {
197                 struct spxpcb *ocb = cb;
198
199                 so = sonewconn(so, 0);
200                 if (so == NULL) {
201                         goto drop;
202                 }
203                 /*
204                  * This is ugly, but ....
205                  *
206                  * Mark socket as temporary until we're
207                  * committed to keeping it.  The code at
208                  * ``drop'' and ``dropwithreset'' check the
209                  * flag dropsocket to see if the temporary
210                  * socket created here should be discarded.
211                  * We mark the socket as discardable until
212                  * we're committed to it below in TCPS_LISTEN.
213                  */
214                 dropsocket++;
215                 ipxp = (struct ipxpcb *)so->so_pcb;
216                 ipxp->ipxp_laddr = si->si_dna;
217                 cb = ipxtospxpcb(ipxp);
218                 cb->s_mtu = ocb->s_mtu;         /* preserve sockopts */
219                 cb->s_flags = ocb->s_flags;     /* preserve sockopts */
220                 cb->s_flags2 = ocb->s_flags2;   /* preserve sockopts */
221                 cb->s_state = TCPS_LISTEN;
222         }
223
224         /*
225          * Packet received on connection.
226          * reset idle time and keep-alive timer;
227          */
228         cb->s_idle = 0;
229         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
230
231         switch (cb->s_state) {
232
233         case TCPS_LISTEN:{
234                 struct sockaddr_ipx *sipx, ssipx;
235                 struct ipx_addr laddr;
236
237                 /*
238                  * If somebody here was carying on a conversation
239                  * and went away, and his pen pal thinks he can
240                  * still talk, we get the misdirected packet.
241                  */
242                 if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
243                         spx_istat.gonawy++;
244                         goto dropwithreset;
245                 }
246                 sipx = &ssipx;
247                 bzero(sipx, sizeof *sipx);
248                 sipx->sipx_len = sizeof(*sipx);
249                 sipx->sipx_family = AF_IPX;
250                 sipx->sipx_addr = si->si_sna;
251                 laddr = ipxp->ipxp_laddr;
252                 if (ipx_nullhost(laddr))
253                         ipxp->ipxp_laddr = si->si_dna;
254                 if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
255                         ipxp->ipxp_laddr = laddr;
256                         spx_istat.noconn++;
257                         goto drop;
258                 }
259                 spx_template(cb);
260                 dropsocket = 0;         /* committed to socket */
261                 cb->s_did = si->si_sid;
262                 cb->s_rack = si->si_ack;
263                 cb->s_ralo = si->si_alo;
264 #define THREEWAYSHAKE
265 #ifdef THREEWAYSHAKE
266                 cb->s_state = TCPS_SYN_RECEIVED;
267                 cb->s_force = 1 + SPXT_KEEP;
268                 spxstat.spxs_accepts++;
269                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
270                 }
271                 break;
272         /*
273          * This state means that we have heard a response
274          * to our acceptance of their connection
275          * It is probably logically unnecessary in this
276          * implementation.
277          */
278          case TCPS_SYN_RECEIVED: {
279                 if (si->si_did != cb->s_sid) {
280                         spx_istat.wrncon++;
281                         goto drop;
282                 }
283 #endif
284                 ipxp->ipxp_fport =  si->si_sport;
285                 cb->s_timer[SPXT_REXMT] = 0;
286                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
287                 soisconnected(so);
288                 cb->s_state = TCPS_ESTABLISHED;
289                 spxstat.spxs_accepts++;
290                 }
291                 break;
292
293         /*
294          * This state means that we have gotten a response
295          * to our attempt to establish a connection.
296          * We fill in the data from the other side,
297          * telling us which port to respond to, instead of the well-
298          * known one we might have sent to in the first place.
299          * We also require that this is a response to our
300          * connection id.
301          */
302         case TCPS_SYN_SENT:
303                 if (si->si_did != cb->s_sid) {
304                         spx_istat.notme++;
305                         goto drop;
306                 }
307                 spxstat.spxs_connects++;
308                 cb->s_did = si->si_sid;
309                 cb->s_rack = si->si_ack;
310                 cb->s_ralo = si->si_alo;
311                 cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
312                 cb->s_timer[SPXT_REXMT] = 0;
313                 cb->s_flags |= SF_ACKNOW;
314                 soisconnected(so);
315                 cb->s_state = TCPS_ESTABLISHED;
316                 /* Use roundtrip time of connection request for initial rtt */
317                 if (cb->s_rtt) {
318                         cb->s_srtt = cb->s_rtt << 3;
319                         cb->s_rttvar = cb->s_rtt << 1;
320                         SPXT_RANGESET(cb->s_rxtcur,
321                             ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
322                             SPXTV_MIN, SPXTV_REXMTMAX);
323                             cb->s_rtt = 0;
324                 }
325         }
326         if (so->so_options & SO_DEBUG || traceallspxs)
327                 spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
328
329         m->m_len -= sizeof(struct ipx);
330         m->m_pkthdr.len -= sizeof(struct ipx);
331         m->m_data += sizeof(struct ipx);
332
333         if (spx_reass(cb, si, m)) {
334                 m_freem(m);
335         }
336         if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
337                 spx_output(cb, NULL);
338         cb->s_flags &= ~(SF_WIN|SF_RXT);
339         return;
340
341 dropwithreset:
342         if (dropsocket)
343                 soabort(so);
344         si->si_seq = ntohs(si->si_seq);
345         si->si_ack = ntohs(si->si_ack);
346         si->si_alo = ntohs(si->si_alo);
347         m_freem(m);
348         if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
349                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
350         return;
351
352 drop:
353 bad:
354         if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
355             traceallspxs)
356                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
357         m_freem(m);
358 }
359
360 static int spxrexmtthresh = 3;
361
362 /*
363  * This is structurally similar to the tcp reassembly routine
364  * but its function is somewhat different:  It merely queues
365  * packets up, and suppresses duplicates.
366  */
367 static int
368 spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m)
369 {
370         struct spx_q *q, *nq, *q_temp;
371         struct mbuf *m;
372         struct socket *so = cb->s_ipxpcb->ipxp_socket;
373         char packetp = cb->s_flags & SF_HI;
374         int incr;
375         char wakeup = 0;
376
377         if (si == NULL)
378                 goto present;
379         /*
380          * Update our news from them.
381          */
382         if (si->si_cc & SPX_SA)
383                 cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
384         if (SSEQ_GT(si->si_alo, cb->s_ralo))
385                 cb->s_flags |= SF_WIN;
386         if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
387                 if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
388                         spxstat.spxs_rcvdupack++;
389                         /*
390                          * If this is a completely duplicate ack
391                          * and other conditions hold, we assume
392                          * a packet has been dropped and retransmit
393                          * it exactly as in tcp_input().
394                          */
395                         if (si->si_ack != cb->s_rack ||
396                             si->si_alo != cb->s_ralo)
397                                 cb->s_dupacks = 0;
398                         else if (++cb->s_dupacks == spxrexmtthresh) {
399                                 u_short onxt = cb->s_snxt;
400                                 int cwnd = cb->s_cwnd;
401
402                                 cb->s_snxt = si->si_ack;
403                                 cb->s_cwnd = CUNIT;
404                                 cb->s_force = 1 + SPXT_REXMT;
405                                 spx_output(cb, NULL);
406                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
407                                 cb->s_rtt = 0;
408                                 if (cwnd >= 4 * CUNIT)
409                                         cb->s_cwnd = cwnd / 2;
410                                 if (SSEQ_GT(onxt, cb->s_snxt))
411                                         cb->s_snxt = onxt;
412                                 return (1);
413                         }
414                 } else
415                         cb->s_dupacks = 0;
416                 goto update_window;
417         }
418         cb->s_dupacks = 0;
419         /*
420          * If our correspondent acknowledges data we haven't sent
421          * TCP would drop the packet after acking.  We'll be a little
422          * more permissive
423          */
424         if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
425                 spxstat.spxs_rcvacktoomuch++;
426                 si->si_ack = cb->s_smax + 1;
427         }
428         spxstat.spxs_rcvackpack++;
429         /*
430          * If transmit timer is running and timed sequence
431          * number was acked, update smoothed round trip time.
432          * See discussion of algorithm in tcp_input.c
433          */
434         if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
435                 spxstat.spxs_rttupdated++;
436                 if (cb->s_srtt != 0) {
437                         short delta;
438                         delta = cb->s_rtt - (cb->s_srtt >> 3);
439                         if ((cb->s_srtt += delta) <= 0)
440                                 cb->s_srtt = 1;
441                         if (delta < 0)
442                                 delta = -delta;
443                         delta -= (cb->s_rttvar >> 2);
444                         if ((cb->s_rttvar += delta) <= 0)
445                                 cb->s_rttvar = 1;
446                 } else {
447                         /*
448                          * No rtt measurement yet
449                          */
450                         cb->s_srtt = cb->s_rtt << 3;
451                         cb->s_rttvar = cb->s_rtt << 1;
452                 }
453                 cb->s_rtt = 0;
454                 cb->s_rxtshift = 0;
455                 SPXT_RANGESET(cb->s_rxtcur,
456                         ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
457                         SPXTV_MIN, SPXTV_REXMTMAX);
458         }
459         /*
460          * If all outstanding data is acked, stop retransmit
461          * timer and remember to restart (more output or persist).
462          * If there is more data to be acked, restart retransmit
463          * timer, using current (possibly backed-off) value;
464          */
465         if (si->si_ack == cb->s_smax + 1) {
466                 cb->s_timer[SPXT_REXMT] = 0;
467                 cb->s_flags |= SF_RXT;
468         } else if (cb->s_timer[SPXT_PERSIST] == 0)
469                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
470         /*
471          * When new data is acked, open the congestion window.
472          * If the window gives us less than ssthresh packets
473          * in flight, open exponentially (maxseg at a time).
474          * Otherwise open linearly (maxseg^2 / cwnd at a time).
475          */
476         incr = CUNIT;
477         if (cb->s_cwnd > cb->s_ssthresh)
478                 incr = max(incr * incr / cb->s_cwnd, 1);
479         cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
480         /*
481          * Trim Acked data from output queue.
482          */
483         while ((m = so->so_snd.ssb_mb) != NULL) {
484                 if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
485                         sbdroprecord(&so->so_snd.sb);
486                 else
487                         break;
488         }
489         sowwakeup(so);
490         cb->s_rack = si->si_ack;
491 update_window:
492         if (SSEQ_LT(cb->s_snxt, cb->s_rack))
493                 cb->s_snxt = cb->s_rack;
494         if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
495             (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
496              (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
497                 /* keep track of pure window updates */
498                 if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
499                     && SSEQ_LT(cb->s_ralo, si->si_alo)) {
500                         spxstat.spxs_rcvwinupd++;
501                         spxstat.spxs_rcvdupack--;
502                 }
503                 cb->s_ralo = si->si_alo;
504                 cb->s_swl1 = si->si_seq;
505                 cb->s_swl2 = si->si_ack;
506                 cb->s_swnd = (1 + si->si_alo - si->si_ack);
507                 if (cb->s_swnd > cb->s_smxw)
508                         cb->s_smxw = cb->s_swnd;
509                 cb->s_flags |= SF_WIN;
510         }
511         /*
512          * If this packet number is higher than that which
513          * we have allocated refuse it, unless urgent
514          */
515         if (SSEQ_GT(si->si_seq, cb->s_alo)) {
516                 if (si->si_cc & SPX_SP) {
517                         spxstat.spxs_rcvwinprobe++;
518                         return (1);
519                 } else
520                         spxstat.spxs_rcvpackafterwin++;
521                 if (si->si_cc & SPX_OB) {
522                         if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
523                                 m_freem(si_m);
524                                 return (0);
525                         } /* else queue this packet; */
526                 } else {
527                         /*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
528                         if (so->so_state && SS_NOFDREF) {
529                                 spx_close(cb);
530                         } else
531                                        would crash system*/
532                         spx_istat.notyet++;
533                         m_freem(si_m);
534                         return (0);
535                 }
536         }
537         /*
538          * If this is a system packet, we don't need to
539          * queue it up, and won't update acknowledge #
540          */
541         if (si->si_cc & SPX_SP) {
542                 return (1);
543         }
544         /*
545          * We have already seen this packet, so drop.
546          */
547         if (SSEQ_LT(si->si_seq, cb->s_ack)) {
548                 spx_istat.bdreas++;
549                 spxstat.spxs_rcvduppack++;
550                 if (si->si_seq == cb->s_ack - 1)
551                         spx_istat.lstdup++;
552                 return (1);
553         }
554         /*
555          * Loop through all packets queued up to insert in
556          * appropriate sequence.
557          */
558         LIST_FOREACH(q, &cb->s_q, sq_entry) {
559                 if (si->si_seq == SI(q)->si_seq) {
560                         spxstat.spxs_rcvduppack++;
561                         return (1);
562                 }
563                 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
564                         spxstat.spxs_rcvoopack++;
565                         break;
566                 }
567         }
568         nq = kmalloc(sizeof(struct spx_q), M_SPX_Q, M_INTNOWAIT);
569         if (nq == NULL) {
570                 m_freem(si_m);
571                 return (0);
572         }
573         if (q == NULL)
574                 LIST_INSERT_HEAD(&cb->s_q, nq, sq_entry);
575         else
576                 LIST_INSERT_BEFORE(q, nq, sq_entry);
577         nq->si_mbuf = si_m;
578         /*
579          * If this packet is urgent, inform process
580          */
581         if (si->si_cc & SPX_OB) {
582                 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
583                 sohasoutofband(so);
584                 cb->s_oobflags |= SF_IOOB;
585         }
586 present:
587 #define SPINC sizeof(struct spxhdr)
588         /*
589          * Loop through all packets queued up to update acknowledge
590          * number, and present all acknowledged data to user;
591          * If in packet interface mode, show packet headers.
592          */
593         LIST_FOREACH_MUTABLE(q, &cb->s_q, sq_entry, q_temp) {
594                   if (SI(q)->si_seq == cb->s_ack) {
595                         cb->s_ack++;
596                         m = q->si_mbuf;
597                         if (SI(q)->si_cc & SPX_OB) {
598                                 cb->s_oobflags &= ~SF_IOOB;
599                                 if (so->so_rcv.ssb_cc)
600                                         so->so_oobmark = so->so_rcv.ssb_cc;
601                                 else
602                                         sosetstate(so, SS_RCVATMARK);
603                         }
604                         LIST_REMOVE(q, sq_entry);
605                         kfree(q, M_SPX_Q);
606                         wakeup = 1;
607                         spxstat.spxs_rcvpack++;
608 #ifdef SF_NEWCALL
609                         if (cb->s_flags2 & SF_NEWCALL) {
610                                 struct spxhdr *sp = mtod(m, struct spxhdr *);
611                                 u_char dt = sp->spx_dt;
612                                 spx_newchecks[4]++;
613                                 if (dt != cb->s_rhdr.spx_dt) {
614                                         struct mbuf *mm =
615                                            m_getclr(MB_DONTWAIT, MT_CONTROL);
616                                         spx_newchecks[0]++;
617                                         if (mm != NULL) {
618                                                 u_short *s =
619                                                         mtod(mm, u_short *);
620                                                 cb->s_rhdr.spx_dt = dt;
621                                                 mm->m_len = 5; /*XXX*/
622                                                 s[0] = 5;
623                                                 s[1] = 1;
624                                                 *(u_char *)(&s[2]) = dt;
625                                                 sbappend(&so->so_rcv.sb, mm);
626                                         }
627                                 }
628                                 if (sp->spx_cc & SPX_OB) {
629                                         m_chtype(m, MT_OOBDATA);
630                                         spx_newchecks[1]++;
631                                         so->so_oobmark = 0;
632                                         soclrstate(so, SS_RCVATMARK);
633                                 }
634                                 if (packetp == 0) {
635                                         m->m_data += SPINC;
636                                         m->m_len -= SPINC;
637                                         m->m_pkthdr.len -= SPINC;
638                                 }
639                                 if ((sp->spx_cc & SPX_EM) || packetp) {
640                                         sbappendrecord(&so->so_rcv.sb, m);
641                                         spx_newchecks[9]++;
642                                 } else
643                                         sbappend(&so->so_rcv.sb, m);
644                         } else
645 #endif
646                         if (packetp) {
647                                 sbappendrecord(&so->so_rcv.sb, m);
648                         } else {
649                                 cb->s_rhdr = *mtod(m, struct spxhdr *);
650                                 m->m_data += SPINC;
651                                 m->m_len -= SPINC;
652                                 m->m_pkthdr.len -= SPINC;
653                                 sbappend(&so->so_rcv.sb, m);
654                         }
655                   } else
656                         break;
657         }
658         if (wakeup)
659                 sorwakeup(so);
660         return (0);
661 }
662
663 void
664 spx_ctlinput(netmsg_t msg)
665 {
666         /*struct socket *so = msg->base.nm_so;*/
667         int cmd = msg->ctlinput.nm_cmd;
668         struct sockaddr *arg_as_sa = msg->ctlinput.nm_arg;
669         caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
670         struct sockaddr_ipx *sipx;
671
672         if (cmd < 0 || cmd > PRC_NCMDS)
673                 goto out;
674
675         switch (cmd) {
676         case PRC_ROUTEDEAD:
677                 break;
678         case PRC_IFDOWN:
679         case PRC_HOSTDEAD:
680         case PRC_HOSTUNREACH:
681                 sipx = (struct sockaddr_ipx *)arg;
682                 if (sipx->sipx_family != AF_IPX)
683                         break;
684                 break;
685         default:
686                 break;
687         }
688 out:
689         lwkt_replymsg(&msg->lmsg, 0);
690 }
691
692 static int
693 spx_output(struct spxpcb *cb, struct mbuf *m0)
694 {
695         struct socket *so = cb->s_ipxpcb->ipxp_socket;
696         struct mbuf *m = NULL;
697         struct spx *si = NULL;
698         struct signalsockbuf *ssb = &so->so_snd;
699         int len = 0, win, rcv_win;
700         short span, off, recordp = 0;
701         u_short alo;
702         int error = 0, sendalot;
703 #ifdef notdef
704         int idle;
705 #endif
706         struct mbuf *mprev;
707
708         if (m0 != NULL) {
709                 int mtu = cb->s_mtu;
710                 int datalen;
711                 /*
712                  * Make sure that packet isn't too big.
713                  */
714                 for (m = m0; m != NULL; m = m->m_next) {
715                         mprev = m;
716                         len += m->m_len;
717                         if (m->m_flags & M_EOR)
718                                 recordp = 1;
719                 }
720                 datalen = (cb->s_flags & SF_HO) ?
721                                 len - sizeof(struct spxhdr) : len;
722                 if (datalen > mtu) {
723                         if (cb->s_flags & SF_PI) {
724                                 m_freem(m0);
725                                 return (EMSGSIZE);
726                         } else {
727                                 int oldEM = cb->s_cc & SPX_EM;
728
729                                 cb->s_cc &= ~SPX_EM;
730                                 while (len > mtu) {
731                                         /*
732                                          * Here we are only being called
733                                          * from usrreq(), so it is OK to
734                                          * block.
735                                          */
736                                         m = m_copym(m0, 0, mtu, MB_WAIT);
737                                         if (cb->s_flags & SF_NEWCALL) {
738                                             struct mbuf *mm = m;
739                                             spx_newchecks[7]++;
740                                             while (mm != NULL) {
741                                                 mm->m_flags &= ~M_EOR;
742                                                 mm = mm->m_next;
743                                             }
744                                         }
745                                         error = spx_output(cb, m);
746                                         if (error) {
747                                                 cb->s_cc |= oldEM;
748                                                 m_freem(m0);
749                                                 return (error);
750                                         }
751                                         m_adj(m0, mtu);
752                                         len -= mtu;
753                                 }
754                                 cb->s_cc |= oldEM;
755                         }
756                 }
757                 /*
758                  * Force length even, by adding a "garbage byte" if
759                  * necessary.
760                  */
761                 if (len & 1) {
762                         m = mprev;
763                         if (M_TRAILINGSPACE(m) >= 1)
764                                 m->m_len++;
765                         else {
766                                 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
767
768                                 if (m1 == NULL) {
769                                         m_freem(m0);
770                                         return (ENOBUFS);
771                                 }
772                                 m1->m_len = 1;
773                                 *(mtod(m1, u_char *)) = 0;
774                                 m->m_next = m1;
775                         }
776                 }
777                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
778                 if (m == NULL) {
779                         m_freem(m0);
780                         return (ENOBUFS);
781                 }
782                 /*
783                  * Fill in mbuf with extended SP header
784                  * and addresses and length put into network format.
785                  */
786                 MH_ALIGN(m, sizeof(struct spx));
787                 m->m_len = sizeof(struct spx);
788                 m->m_next = m0;
789                 si = mtod(m, struct spx *);
790                 si->si_i = *cb->s_ipx;
791                 si->si_s = cb->s_shdr;
792                 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
793                         struct spxhdr *sh;
794                         if (m0->m_len < sizeof(*sh)) {
795                                 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
796                                         m_free(m);
797                                         m_freem(m0);
798                                         return (EINVAL);
799                                 }
800                                 m->m_next = m0;
801                         }
802                         sh = mtod(m0, struct spxhdr *);
803                         si->si_dt = sh->spx_dt;
804                         si->si_cc |= sh->spx_cc & SPX_EM;
805                         m0->m_len -= sizeof(*sh);
806                         m0->m_data += sizeof(*sh);
807                         len -= sizeof(*sh);
808                 }
809                 len += sizeof(*si);
810                 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
811                         si->si_cc |= SPX_EM;
812                         spx_newchecks[8]++;
813                 }
814                 if (cb->s_oobflags & SF_SOOB) {
815                         /*
816                          * Per jqj@cornell:
817                          * make sure OB packets convey exactly 1 byte.
818                          * If the packet is 1 byte or larger, we
819                          * have already guaranted there to be at least
820                          * one garbage byte for the checksum, and
821                          * extra bytes shouldn't hurt!
822                          */
823                         if (len > sizeof(*si)) {
824                                 si->si_cc |= SPX_OB;
825                                 len = (1 + sizeof(*si));
826                         }
827                 }
828                 si->si_len = htons((u_short)len);
829                 m->m_pkthdr.len = ((len - 1) | 1) + 1;
830                 /*
831                  * queue stuff up for output
832                  */
833                 sbappendrecord(&ssb->sb, m);
834                 cb->s_seq++;
835         }
836 #ifdef notdef
837         idle = (cb->s_smax == (cb->s_rack - 1));
838 #endif
839 again:
840         sendalot = 0;
841         off = cb->s_snxt - cb->s_rack;
842         win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
843
844         /*
845          * If in persist timeout with window of 0, send a probe.
846          * Otherwise, if window is small but nonzero
847          * and timer expired, send what we can and go into
848          * transmit state.
849          */
850         if (cb->s_force == 1 + SPXT_PERSIST) {
851                 if (win != 0) {
852                         cb->s_timer[SPXT_PERSIST] = 0;
853                         cb->s_rxtshift = 0;
854                 }
855         }
856         span = cb->s_seq - cb->s_rack;
857         len = min(span, win) - off;
858
859         if (len < 0) {
860                 /*
861                  * Window shrank after we went into it.
862                  * If window shrank to 0, cancel pending
863                  * restransmission and pull s_snxt back
864                  * to (closed) window.  We will enter persist
865                  * state below.  If the widndow didn't close completely,
866                  * just wait for an ACK.
867                  */
868                 len = 0;
869                 if (win == 0) {
870                         cb->s_timer[SPXT_REXMT] = 0;
871                         cb->s_snxt = cb->s_rack;
872                 }
873         }
874         if (len > 1)
875                 sendalot = 1;
876         rcv_win = ssb_space(&so->so_rcv);
877
878         /*
879          * Send if we owe peer an ACK.
880          */
881         if (cb->s_oobflags & SF_SOOB) {
882                 /*
883                  * must transmit this out of band packet
884                  */
885                 cb->s_oobflags &= ~ SF_SOOB;
886                 sendalot = 1;
887                 spxstat.spxs_sndurg++;
888                 goto found;
889         }
890         if (cb->s_flags & SF_ACKNOW)
891                 goto send;
892         if (cb->s_state < TCPS_ESTABLISHED)
893                 goto send;
894         /*
895          * Silly window can't happen in spx.
896          * Code from tcp deleted.
897          */
898         if (len)
899                 goto send;
900         /*
901          * Compare available window to amount of window
902          * known to peer (as advertised window less
903          * next expected input.)  If the difference is at least two
904          * packets or at least 35% of the mximum possible window,
905          * then want to send a window update to peer.
906          */
907         if (rcv_win > 0) {
908                 u_short delta =  1 + cb->s_alo - cb->s_ack;
909                 int adv = rcv_win - (delta * cb->s_mtu);
910                 
911                 if ((so->so_rcv.ssb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
912                     (100 * adv / so->so_rcv.ssb_hiwat >= 35)) {
913                         spxstat.spxs_sndwinup++;
914                         cb->s_flags |= SF_ACKNOW;
915                         goto send;
916                 }
917
918         }
919         /*
920          * Many comments from tcp_output.c are appropriate here
921          * including . . .
922          * If send window is too small, there is data to transmit, and no
923          * retransmit or persist is pending, then go to persist state.
924          * If nothing happens soon, send when timer expires:
925          * if window is nonzero, transmit what we can,
926          * otherwise send a probe.
927          */
928         if (so->so_snd.ssb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
929                 cb->s_timer[SPXT_PERSIST] == 0) {
930                         cb->s_rxtshift = 0;
931                         spx_setpersist(cb);
932         }
933         /*
934          * No reason to send a packet, just return.
935          */
936         cb->s_outx = 1;
937         return (0);
938
939 send:
940         /*
941          * Find requested packet.
942          */
943         si = NULL;
944         if (len > 0) {
945                 cb->s_want = cb->s_snxt;
946                 for (m = ssb->ssb_mb; m != NULL; m = m->m_nextpkt) {
947                         si = mtod(m, struct spx *);
948                         if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
949                                 break;
950                 }
951         found:
952                 if (si != NULL) {
953                         if (si->si_seq == cb->s_snxt)
954                                         cb->s_snxt++;
955                                 else
956                                         spxstat.spxs_sndvoid++, si = NULL;
957                 }
958         }
959         /*
960          * update window
961          */
962         if (rcv_win < 0)
963                 rcv_win = 0;
964         alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
965         if (SSEQ_LT(alo, cb->s_alo)) 
966                 alo = cb->s_alo;
967
968         if (si != NULL) {
969                 /*
970                  * must make a copy of this packet for
971                  * ipx_output to monkey with
972                  */
973                 m = m_copy(m, 0, (int)M_COPYALL);
974                 if (m == NULL) {
975                         return (ENOBUFS);
976                 }
977                 si = mtod(m, struct spx *);
978                 if (SSEQ_LT(si->si_seq, cb->s_smax))
979                         spxstat.spxs_sndrexmitpack++;
980                 else
981                         spxstat.spxs_sndpack++;
982         } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
983                 /*
984                  * Must send an acknowledgement or a probe
985                  */
986                 if (cb->s_force)
987                         spxstat.spxs_sndprobe++;
988                 if (cb->s_flags & SF_ACKNOW)
989                         spxstat.spxs_sndacks++;
990                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
991                 if (m == NULL)
992                         return (ENOBUFS);
993                 /*
994                  * Fill in mbuf with extended SP header
995                  * and addresses and length put into network format.
996                  */
997                 MH_ALIGN(m, sizeof(struct spx));
998                 m->m_len = sizeof(*si);
999                 m->m_pkthdr.len = sizeof(*si);
1000                 si = mtod(m, struct spx *);
1001                 si->si_i = *cb->s_ipx;
1002                 si->si_s = cb->s_shdr;
1003                 si->si_seq = cb->s_smax + 1;
1004                 si->si_len = htons(sizeof(*si));
1005                 si->si_cc |= SPX_SP;
1006         } else {
1007                 cb->s_outx = 3;
1008                 if (so->so_options & SO_DEBUG || traceallspxs)
1009                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1010                 return (0);
1011         }
1012         /*
1013          * Stuff checksum and output datagram.
1014          */
1015         if ((si->si_cc & SPX_SP) == 0) {
1016                 if (cb->s_force != (1 + SPXT_PERSIST) ||
1017                     cb->s_timer[SPXT_PERSIST] == 0) {
1018                         /*
1019                          * If this is a new packet and we are not currently 
1020                          * timing anything, time this one.
1021                          */
1022                         if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1023                                 cb->s_smax = si->si_seq;
1024                                 if (cb->s_rtt == 0) {
1025                                         spxstat.spxs_segstimed++;
1026                                         cb->s_rtseq = si->si_seq;
1027                                         cb->s_rtt = 1;
1028                                 }
1029                         }
1030                         /*
1031                          * Set rexmt timer if not currently set,
1032                          * Initial value for retransmit timer is smoothed
1033                          * round-trip time + 2 * round-trip time variance.
1034                          * Initialize shift counter which is used for backoff
1035                          * of retransmit time.
1036                          */
1037                         if (cb->s_timer[SPXT_REXMT] == 0 &&
1038                             cb->s_snxt != cb->s_rack) {
1039                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1040                                 if (cb->s_timer[SPXT_PERSIST]) {
1041                                         cb->s_timer[SPXT_PERSIST] = 0;
1042                                         cb->s_rxtshift = 0;
1043                                 }
1044                         }
1045                 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1046                         cb->s_smax = si->si_seq;
1047                 }
1048         } else if (cb->s_state < TCPS_ESTABLISHED) {
1049                 if (cb->s_rtt == 0)
1050                         cb->s_rtt = 1; /* Time initial handshake */
1051                 if (cb->s_timer[SPXT_REXMT] == 0)
1052                         cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1053         }
1054         {
1055                 /*
1056                  * Do not request acks when we ack their data packets or
1057                  * when we do a gratuitous window update.
1058                  */
1059                 if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1060                                 si->si_cc |= SPX_SA;
1061                 si->si_seq = htons(si->si_seq);
1062                 si->si_alo = htons(alo);
1063                 si->si_ack = htons(cb->s_ack);
1064
1065                 if (ipxcksum) {
1066                         si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1067                 } else
1068                         si->si_sum = 0xffff;
1069
1070                 cb->s_outx = 4;
1071                 if (so->so_options & SO_DEBUG || traceallspxs)
1072                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1073
1074                 if (so->so_options & SO_DONTROUTE)
1075                         error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1076                 else
1077                         error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1078         }
1079         if (error) {
1080                 return (error);
1081         }
1082         spxstat.spxs_sndtotal++;
1083         /*
1084          * Data sent (as far as we can tell).
1085          * If this advertises a larger window than any other segment,
1086          * then remember the size of the advertized window.
1087          * Any pending ACK has now been sent.
1088          */
1089         cb->s_force = 0;
1090         cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1091         if (SSEQ_GT(alo, cb->s_alo))
1092                 cb->s_alo = alo;
1093         if (sendalot)
1094                 goto again;
1095         cb->s_outx = 5;
1096         return (0);
1097 }
1098
1099 static int spx_do_persist_panics = 0;
1100
1101 static void
1102 spx_setpersist(struct spxpcb *cb)
1103 {
1104         int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1105
1106         if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1107                 panic("spx_output REXMT");
1108         /*
1109          * Start/restart persistance timer.
1110          */
1111         SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1112             t*spx_backoff[cb->s_rxtshift],
1113             SPXTV_PERSMIN, SPXTV_PERSMAX);
1114         if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1115                 cb->s_rxtshift++;
1116 }
1117
1118 void
1119 spx_ctloutput(netmsg_t msg)
1120 {
1121         struct socket *so = msg->base.nm_so;
1122         struct ipxpcb *ipxp = sotoipxpcb(so);
1123         struct sockopt *sopt = msg->ctloutput.nm_sopt;
1124         struct spxpcb *cb;
1125         int mask, error;
1126         short soptval;
1127         u_short usoptval;
1128         int optval;
1129
1130         error = 0;
1131
1132         if (sopt->sopt_level != IPXPROTO_SPX) {
1133                 /* This will have to be changed when we do more general
1134                    stacking of protocols */
1135                 ipx_ctloutput(msg);
1136                 /* msg now invalid */
1137                 return;
1138         }
1139         if (ipxp == NULL) {
1140                 error = EINVAL;
1141                 goto out;
1142         }
1143         cb = ipxtospxpcb(ipxp);
1144
1145         switch (sopt->sopt_dir) {
1146         case SOPT_GET:
1147                 switch (sopt->sopt_name) {
1148                 case SO_HEADERS_ON_INPUT:
1149                         mask = SF_HI;
1150                         goto get_flags;
1151
1152                 case SO_HEADERS_ON_OUTPUT:
1153                         mask = SF_HO;
1154                 get_flags:
1155                         soptval = cb->s_flags & mask;
1156                         error = sooptcopyout(sopt, &soptval, sizeof soptval);
1157                         break;
1158
1159                 case SO_MTU:
1160                         usoptval = cb->s_mtu;
1161                         error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1162                         break;
1163
1164                 case SO_LAST_HEADER:
1165                         error = sooptcopyout(sopt, &cb->s_rhdr, 
1166                                              sizeof cb->s_rhdr);
1167                         break;
1168
1169                 case SO_DEFAULT_HEADERS:
1170                         error = sooptcopyout(sopt, &cb->s_shdr, 
1171                                              sizeof cb->s_shdr);
1172                         break;
1173
1174                 default:
1175                         error = ENOPROTOOPT;
1176                 }
1177                 break;
1178
1179         case SOPT_SET:
1180                 switch (sopt->sopt_name) {
1181                         /* XXX why are these shorts on get and ints on set?
1182                            that doesn't make any sense... */
1183                 case SO_HEADERS_ON_INPUT:
1184                         mask = SF_HI;
1185                         goto set_head;
1186
1187                 case SO_HEADERS_ON_OUTPUT:
1188                         mask = SF_HO;
1189                 set_head:
1190                         error = sooptcopyin(sopt, &optval, sizeof optval,
1191                                             sizeof optval);
1192                         if (error)
1193                                 break;
1194
1195                         if (cb->s_flags & SF_PI) {
1196                                 if (optval)
1197                                         cb->s_flags |= mask;
1198                                 else
1199                                         cb->s_flags &= ~mask;
1200                         } else error = EINVAL;
1201                         break;
1202
1203                 case SO_MTU:
1204                         error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1205                                             sizeof usoptval);
1206                         if (error)
1207                                 break;
1208                         cb->s_mtu = usoptval;
1209                         break;
1210
1211 #ifdef SF_NEWCALL
1212                 case SO_NEWCALL:
1213                         error = sooptcopyin(sopt, &optval, sizeof optval,
1214                                             sizeof optval);
1215                         if (error)
1216                                 break;
1217                         if (optval) {
1218                                 cb->s_flags2 |= SF_NEWCALL;
1219                                 spx_newchecks[5]++;
1220                         } else {
1221                                 cb->s_flags2 &= ~SF_NEWCALL;
1222                                 spx_newchecks[6]++;
1223                         }
1224                         break;
1225 #endif
1226
1227                 case SO_DEFAULT_HEADERS:
1228                         {
1229                                 struct spxhdr sp;
1230
1231                                 error = sooptcopyin(sopt, &sp, sizeof sp,
1232                                                     sizeof sp);
1233                                 if (error)
1234                                         break;
1235                                 cb->s_dt = sp.spx_dt;
1236                                 cb->s_cc = sp.spx_cc & SPX_EM;
1237                         }
1238                         break;
1239
1240                 default:
1241                         error = ENOPROTOOPT;
1242                 }
1243                 break;
1244         }
1245 out:
1246         lwkt_replymsg(&msg->lmsg, error);
1247 }
1248
1249 /*
1250  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1251  *       will sofree() it when we return.
1252  */
1253 static void
1254 spx_usr_abort(netmsg_t msg)
1255 {
1256         struct socket *so = msg->base.nm_so;
1257         struct ipxpcb *ipxp;
1258         struct spxpcb *cb;
1259
1260         ipxp = sotoipxpcb(so);
1261         cb = ipxtospxpcb(ipxp);
1262
1263         spx_drop(cb, ECONNABORTED);
1264
1265         lwkt_replymsg(&msg->lmsg, 0);
1266 }
1267
1268 /*
1269  * Accept a connection.  Essentially all the work is
1270  * done at higher levels; just return the address
1271  * of the peer, storing through addr.
1272  */
1273 static void
1274 spx_accept(netmsg_t msg)
1275 {
1276         struct socket *so = msg->base.nm_so;
1277         struct sockaddr **nam = msg->accept.nm_nam;
1278         struct ipxpcb *ipxp;
1279         struct sockaddr_ipx *sipx, ssipx;
1280
1281         ipxp = sotoipxpcb(so);
1282         sipx = &ssipx;
1283         bzero(sipx, sizeof *sipx);
1284         sipx->sipx_len = sizeof *sipx;
1285         sipx->sipx_family = AF_IPX;
1286         sipx->sipx_addr = ipxp->ipxp_faddr;
1287         *nam = dup_sockaddr((struct sockaddr *)sipx);
1288
1289         lwkt_replymsg(&msg->lmsg, 0);
1290 }
1291
1292 static int
1293 spx_attach_oncpu(struct socket *so, int proto, struct pru_attach_info *ai)
1294 {
1295         struct ipxpcb *ipxp;
1296         struct spxpcb *cb;
1297         struct mbuf *mm;
1298         struct signalsockbuf *ssb;
1299         int error;
1300
1301         ipxp = sotoipxpcb(so);
1302         cb = ipxtospxpcb(ipxp);
1303
1304         crit_enter();
1305         if (ipxp != NULL) {
1306                 error = EISCONN;
1307                 goto spx_attach_end;
1308         }
1309         error = ipx_pcballoc(so, &ipxpcb_list);
1310         if (error)
1311                 goto spx_attach_end;
1312         if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
1313                 error = soreserve(so, (u_long) 3072, (u_long) 3072,
1314                                   ai->sb_rlimit);
1315                 if (error)
1316                         goto spx_attach_end;
1317         }
1318         ipxp = sotoipxpcb(so);
1319
1320         cb = kmalloc(sizeof *cb, M_PCB, M_INTWAIT | M_ZERO);
1321         ssb = &so->so_snd;
1322
1323         mm = m_getclr(MB_DONTWAIT, MT_HEADER);
1324         if (mm == NULL) {
1325                 kfree(cb, M_PCB);
1326                 error = ENOBUFS;
1327                 goto spx_attach_end;
1328         }
1329         cb->s_ipx_m = mm;
1330         cb->s_ipx = mtod(mm, struct ipx *);
1331         cb->s_state = TCPS_LISTEN;
1332         cb->s_smax = -1;
1333         cb->s_swl1 = -1;
1334         LIST_INIT(&cb->s_q);
1335         cb->s_ipxpcb = ipxp;
1336         cb->s_mtu = 576 - sizeof(struct spx);
1337         cb->s_cwnd = ssb_space(ssb) * CUNIT / cb->s_mtu;
1338         cb->s_ssthresh = cb->s_cwnd;
1339         cb->s_cwmx = ssb_space(ssb) * CUNIT / (2 * sizeof(struct spx));
1340         /* Above is recomputed when connecting to account
1341            for changed buffering or mtu's */
1342         cb->s_rtt = SPXTV_SRTTBASE;
1343         cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1344         SPXT_RANGESET(cb->s_rxtcur,
1345             ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1346             SPXTV_MIN, SPXTV_REXMTMAX);
1347         ipxp->ipxp_pcb = (caddr_t)cb; 
1348 spx_attach_end:
1349         crit_exit();
1350         return error;
1351 }
1352
1353 static void
1354 spx_attach(netmsg_t msg)
1355 {
1356         int error;
1357
1358         error = spx_attach_oncpu(msg->base.nm_so,
1359                                  msg->attach.nm_proto,
1360                                  msg->attach.nm_ai);
1361         lwkt_replymsg(&msg->lmsg, error);
1362 }
1363
1364
1365 static void
1366 spx_bind(netmsg_t msg)
1367 {  
1368         struct socket *so = msg->base.nm_so;
1369         struct ipxpcb *ipxp;
1370         int error;
1371
1372         ipxp = sotoipxpcb(so);
1373
1374         error = ipx_pcbbind(ipxp, msg->bind.nm_nam, msg->bind.nm_td);
1375         lwkt_replymsg(&msg->lmsg, error);
1376 }  
1377    
1378 /*
1379  * Initiate connection to peer.
1380  * Enter SYN_SENT state, and mark socket as connecting.
1381  * Start keep-alive timer, setup prototype header,
1382  * Send initial system packet requesting connection.
1383  */
1384 static void
1385 spx_connect(netmsg_t msg)
1386 {
1387         struct socket *so = msg->base.nm_so;
1388         struct sockaddr *nam = msg->connect.nm_nam;
1389         struct thread *td = msg->connect.nm_td;
1390         struct ipxpcb *ipxp;
1391         struct spxpcb *cb;
1392         int error;
1393
1394         ipxp = sotoipxpcb(so);
1395         cb = ipxtospxpcb(ipxp);
1396
1397         crit_enter();
1398         if (ipxp->ipxp_lport == 0) {
1399                 error = ipx_pcbbind(ipxp, NULL, td);
1400                 if (error)
1401                         goto spx_connect_end;
1402         }
1403         error = ipx_pcbconnect(ipxp, nam, td);
1404         if (error)
1405                 goto spx_connect_end;
1406         soisconnecting(so);
1407         spxstat.spxs_connattempt++;
1408         cb->s_state = TCPS_SYN_SENT;
1409         cb->s_did = 0;
1410         spx_template(cb);
1411         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1412         cb->s_force = 1 + SPXTV_KEEP;
1413         /*
1414          * Other party is required to respond to
1415          * the port I send from, but he is not
1416          * required to answer from where I am sending to,
1417          * so allow wildcarding.
1418          * original port I am sending to is still saved in
1419          * cb->s_dport.
1420          */
1421         ipxp->ipxp_fport = 0;
1422         error = spx_output(cb, NULL);
1423 spx_connect_end:
1424         crit_exit();
1425         lwkt_replymsg(&msg->lmsg, error);
1426 }
1427
1428 static void
1429 spx_detach(netmsg_t msg)
1430 {
1431         struct socket *so = msg->base.nm_so;
1432         struct ipxpcb *ipxp;
1433         struct spxpcb *cb;
1434         int error;
1435
1436         ipxp = sotoipxpcb(so);
1437         cb = ipxtospxpcb(ipxp);
1438
1439         if (ipxp) {
1440                 crit_enter();
1441                 if (cb->s_state > TCPS_LISTEN)
1442                         spx_disconnect(cb);
1443                 else
1444                         spx_close(cb);
1445                 crit_exit();
1446                 error = 0;
1447         } else {
1448                 error = ENOTCONN;
1449         }
1450         lwkt_replymsg(&msg->lmsg, error);
1451 }
1452
1453 /*
1454  * We may decide later to implement connection closing
1455  * handshaking at the spx level optionally.
1456  * here is the hook to do it:
1457  */
1458 static void
1459 spx_usr_disconnect(netmsg_t msg)
1460 {
1461         struct socket *so = msg->base.nm_so;
1462         struct ipxpcb *ipxp;
1463         struct spxpcb *cb;
1464
1465         ipxp = sotoipxpcb(so);
1466         cb = ipxtospxpcb(ipxp);
1467
1468         crit_enter();
1469         spx_disconnect(cb);
1470         crit_exit();
1471
1472         lwkt_replymsg(&msg->lmsg, 0);
1473 }
1474
1475 static void
1476 spx_listen(netmsg_t msg)
1477 {
1478         struct socket *so = msg->base.nm_so;
1479         struct ipxpcb *ipxp;
1480         struct spxpcb *cb;
1481         int error;
1482
1483         error = 0;
1484         ipxp = sotoipxpcb(so);
1485         cb = ipxtospxpcb(ipxp);
1486
1487         if (ipxp->ipxp_lport == 0)
1488                 error = ipx_pcbbind(ipxp, NULL, msg->listen.nm_td);
1489         if (error == 0)
1490                 cb->s_state = TCPS_LISTEN;
1491         lwkt_replymsg(&msg->lmsg, error);
1492 }
1493
1494 /*
1495  * After a receive, possibly send acknowledgment
1496  * updating allocation.
1497  */
1498 static void
1499 spx_rcvd(netmsg_t msg)
1500 {
1501         struct socket *so = msg->base.nm_so;
1502         struct ipxpcb *ipxp;
1503         struct spxpcb *cb;
1504
1505         ipxp = sotoipxpcb(so);
1506         cb = ipxtospxpcb(ipxp);
1507
1508         crit_enter();
1509         cb->s_flags |= SF_RVD;
1510         spx_output(cb, NULL);
1511         cb->s_flags &= ~SF_RVD;
1512         crit_exit();
1513
1514         lwkt_replymsg(&msg->lmsg, 0);
1515 }
1516
1517 static void
1518 spx_rcvoob(netmsg_t msg)
1519 {
1520         struct mbuf *m = msg->rcvoob.nm_m;
1521         struct socket *so = msg->base.nm_so;
1522         struct ipxpcb *ipxp;
1523         struct spxpcb *cb;
1524         int error;
1525
1526         ipxp = sotoipxpcb(so);
1527         cb = ipxtospxpcb(ipxp);
1528
1529         if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1530             (so->so_state & SS_RCVATMARK)) {
1531                 m->m_len = 1;
1532                 *mtod(m, caddr_t) = cb->s_iobc;
1533                 error = 0;
1534         } else {
1535                 error = EINVAL;
1536         }
1537         lwkt_replymsg(&msg->lmsg, error);
1538 }
1539
1540 static void
1541 spx_send(netmsg_t msg)
1542 {
1543         struct socket *so = msg->base.nm_so;
1544         struct mbuf *m = msg->send.nm_m;
1545         struct mbuf *controlp = msg->send.nm_control;
1546         int flags = msg->send.nm_flags;
1547         struct ipxpcb *ipxp;
1548         struct spxpcb *cb;
1549         int error;
1550
1551         error = 0;
1552         ipxp = sotoipxpcb(so);
1553         cb = ipxtospxpcb(ipxp);
1554
1555         crit_enter();
1556         if (flags & PRUS_OOB) {
1557                 if (ssb_space(&so->so_snd) < -512) {
1558                         error = ENOBUFS;
1559                         goto spx_send_end;
1560                 }
1561                 cb->s_oobflags |= SF_SOOB;
1562         }
1563         if (controlp != NULL) {
1564                 u_short *p = mtod(controlp, u_short *);
1565                 spx_newchecks[2]++;
1566                 if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1567                         cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1568                         spx_newchecks[3]++;
1569                 }
1570                 m_freem(controlp);
1571         }
1572         controlp = NULL;
1573         error = spx_output(cb, m);
1574         m = NULL;
1575 spx_send_end:
1576         if (controlp != NULL)
1577                 m_freem(controlp);
1578         if (m != NULL)
1579                 m_freem(m);
1580         crit_exit();
1581         lwkt_replymsg(&msg->lmsg, error);
1582 }
1583
1584 static void
1585 spx_shutdown(netmsg_t msg)
1586 {
1587         struct socket *so = msg->base.nm_so;
1588         struct ipxpcb *ipxp;
1589         struct spxpcb *cb;
1590         int error;
1591
1592         error = 0;
1593         ipxp = sotoipxpcb(so);
1594         cb = ipxtospxpcb(ipxp);
1595
1596         crit_enter();
1597         socantsendmore(so);
1598         cb = spx_usrclosed(cb);
1599         if (cb != NULL)
1600                 error = spx_output(cb, NULL);
1601         crit_exit();
1602         lwkt_replymsg(&msg->lmsg, error);
1603 }
1604
1605 static void
1606 spx_sp_attach(netmsg_t msg)
1607 {
1608         struct socket *so = msg->base.nm_so;
1609         struct ipxpcb *ipxp;
1610         int error;
1611
1612         error = spx_attach_oncpu(so, msg->attach.nm_proto, msg->attach.nm_ai);
1613         if (error == 0) {
1614                 ipxp = sotoipxpcb(so);
1615                 ((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1616                                         (SF_HI | SF_HO | SF_PI);
1617         }
1618         lwkt_replymsg(&msg->lmsg, error);
1619 }
1620
1621 /*
1622  * Create template to be used to send spx packets on a connection.
1623  * Called after host entry created, fills
1624  * in a skeletal spx header (choosing connection id),
1625  * minimizing the amount of work necessary when the connection is used.
1626  */
1627 static void
1628 spx_template(struct spxpcb *cb)
1629 {
1630         struct ipxpcb *ipxp = cb->s_ipxpcb;
1631         struct ipx *ipx = cb->s_ipx;
1632         struct signalsockbuf *ssb = &(ipxp->ipxp_socket->so_snd);
1633
1634         ipx->ipx_pt = IPXPROTO_SPX;
1635         ipx->ipx_sna = ipxp->ipxp_laddr;
1636         ipx->ipx_dna = ipxp->ipxp_faddr;
1637         cb->s_sid = htons(spx_iss);
1638         spx_iss += SPX_ISSINCR/2;
1639         cb->s_alo = 1;
1640         cb->s_cwnd = (ssb_space(ssb) * CUNIT) / cb->s_mtu;
1641         cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1642                                         of large packets */
1643         cb->s_cwmx = (ssb_space(ssb) * CUNIT) / (2 * sizeof(struct spx));
1644         cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1645                 /* But allow for lots of little packets as well */
1646 }
1647
1648 /*
1649  * Close a SPIP control block:
1650  *      discard spx control block itself
1651  *      discard ipx protocol control block
1652  *      wake up any sleepers
1653  */
1654 static struct spxpcb *
1655 spx_close(struct spxpcb *cb)
1656 {
1657         struct spx_q *q;
1658         struct ipxpcb *ipxp = cb->s_ipxpcb;
1659         struct socket *so = ipxp->ipxp_socket;
1660
1661         while (!LIST_EMPTY(&cb->s_q)) {
1662                 q = LIST_FIRST(&cb->s_q);
1663                 LIST_REMOVE(q, sq_entry);
1664                 m_freem(q->si_mbuf);
1665                 kfree(q, M_SPX_Q);
1666         }
1667         m_free(cb->s_ipx_m);
1668         kfree(cb, M_PCB);
1669         ipxp->ipxp_pcb = 0;
1670         soisdisconnected(so);
1671         ipx_pcbdetach(ipxp);
1672         spxstat.spxs_closed++;
1673         return (NULL);
1674 }
1675
1676 /*
1677  *      Someday we may do level 3 handshaking
1678  *      to close a connection or send a xerox style error.
1679  *      For now, just close.
1680  */
1681 static struct spxpcb *
1682 spx_usrclosed(struct spxpcb *cb)
1683 {
1684         return (spx_close(cb));
1685 }
1686
1687 static struct spxpcb *
1688 spx_disconnect(struct spxpcb *cb)
1689 {
1690         return (spx_close(cb));
1691 }
1692
1693 /*
1694  * Drop connection, reporting
1695  * the specified error.
1696  */
1697 static struct spxpcb *
1698 spx_drop(struct spxpcb *cb, int error)
1699 {
1700         struct socket *so = cb->s_ipxpcb->ipxp_socket;
1701
1702         /*
1703          * someday, in the xerox world
1704          * we will generate error protocol packets
1705          * announcing that the socket has gone away.
1706          */
1707         if (TCPS_HAVERCVDSYN(cb->s_state)) {
1708                 spxstat.spxs_drops++;
1709                 cb->s_state = TCPS_CLOSED;
1710                 /*tcp_output(cb);*/
1711         } else
1712                 spxstat.spxs_conndrops++;
1713         so->so_error = error;
1714         return (spx_close(cb));
1715 }
1716
1717 /*
1718  * Fast timeout routine for processing delayed acks
1719  */
1720 void
1721 spx_fasttimo(void)
1722 {
1723         struct ipxpcb *ipxp;
1724         struct spxpcb *cb;
1725
1726         crit_enter();
1727         LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1728                 if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1729                     (cb->s_flags & SF_DELACK)) {
1730                         cb->s_flags &= ~SF_DELACK;
1731                         cb->s_flags |= SF_ACKNOW;
1732                         spxstat.spxs_delack++;
1733                         spx_output(cb, NULL);
1734                 }
1735         }
1736         crit_exit();
1737 }
1738
1739 /*
1740  * spx protocol timeout routine called every 500 ms.
1741  * Updates the timers in all active pcb's and
1742  * causes finite state machine actions if timers expire.
1743  */
1744 void
1745 spx_slowtimo(void)
1746 {
1747         struct ipxpcb *ip, *ip_temp;
1748         struct spxpcb *cb;
1749         int i;
1750
1751         /*
1752          * Search through tcb's and update active timers.
1753          */
1754         crit_enter();
1755         LIST_FOREACH_MUTABLE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1756                 cb = ipxtospxpcb(ip);
1757                 if (cb == NULL)
1758                         continue;
1759                 for (i = 0; i < SPXT_NTIMERS; i++) {
1760                         if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1761                                 if (spx_timers(cb, i) == NULL)
1762                                         continue;
1763                         }
1764                 }
1765                 cb->s_idle++;
1766                 if (cb->s_rtt)
1767                         cb->s_rtt++;
1768         }
1769         spx_iss += SPX_ISSINCR/PR_SLOWHZ;               /* increment iss */
1770         crit_exit();
1771 }
1772
1773 /*
1774  * SPX timer processing.
1775  */
1776 static struct spxpcb *
1777 spx_timers(struct spxpcb *cb, int timer)
1778 {
1779         long rexmt;
1780         int win;
1781
1782         cb->s_force = 1 + timer;
1783         switch (timer) {
1784
1785         /*
1786          * 2 MSL timeout in shutdown went off.  TCP deletes connection
1787          * control block.
1788          */
1789         case SPXT_2MSL:
1790                 kprintf("spx: SPXT_2MSL went off for no reason\n");
1791                 cb->s_timer[timer] = 0;
1792                 break;
1793
1794         /*
1795          * Retransmission timer went off.  Message has not
1796          * been acked within retransmit interval.  Back off
1797          * to a longer retransmit interval and retransmit one packet.
1798          */
1799         case SPXT_REXMT:
1800                 if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1801                         cb->s_rxtshift = SPX_MAXRXTSHIFT;
1802                         spxstat.spxs_timeoutdrop++;
1803                         cb = spx_drop(cb, ETIMEDOUT);
1804                         break;
1805                 }
1806                 spxstat.spxs_rexmttimeo++;
1807                 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1808                 rexmt *= spx_backoff[cb->s_rxtshift];
1809                 SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1810                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1811                 /*
1812                  * If we have backed off fairly far, our srtt
1813                  * estimate is probably bogus.  Clobber it
1814                  * so we'll take the next rtt measurement as our srtt;
1815                  * move the current srtt into rttvar to keep the current
1816                  * retransmit times until then.
1817                  */
1818                 if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1819                         cb->s_rttvar += (cb->s_srtt >> 2);
1820                         cb->s_srtt = 0;
1821                 }
1822                 cb->s_snxt = cb->s_rack;
1823                 /*
1824                  * If timing a packet, stop the timer.
1825                  */
1826                 cb->s_rtt = 0;
1827                 /*
1828                  * See very long discussion in tcp_timer.c about congestion
1829                  * window and sstrhesh
1830                  */
1831                 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1832                 if (win < 2)
1833                         win = 2;
1834                 cb->s_cwnd = CUNIT;
1835                 cb->s_ssthresh = win * CUNIT;
1836                 spx_output(cb, NULL);
1837                 break;
1838
1839         /*
1840          * Persistance timer into zero window.
1841          * Force a probe to be sent.
1842          */
1843         case SPXT_PERSIST:
1844                 spxstat.spxs_persisttimeo++;
1845                 spx_setpersist(cb);
1846                 spx_output(cb, NULL);
1847                 break;
1848
1849         /*
1850          * Keep-alive timer went off; send something
1851          * or drop connection if idle for too long.
1852          */
1853         case SPXT_KEEP:
1854                 spxstat.spxs_keeptimeo++;
1855                 if (cb->s_state < TCPS_ESTABLISHED)
1856                         goto dropit;
1857                 if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1858                         if (cb->s_idle >= SPXTV_MAXIDLE)
1859                                 goto dropit;
1860                         spxstat.spxs_keepprobe++;
1861                         spx_output(cb, NULL);
1862                 } else
1863                         cb->s_idle = 0;
1864                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1865                 break;
1866         dropit:
1867                 spxstat.spxs_keepdrops++;
1868                 cb = spx_drop(cb, ETIMEDOUT);
1869                 break;
1870         }
1871         return (cb);
1872 }