ce57bbd265de7e2c52f4c1cf958591e64ea11609
[dragonfly.git] / sys / netproto / ipx / spx_usrreq.c
1 /*
2  * Copyright (c) 1995, Mike Mitchell
3  * Copyright (c) 1984, 1985, 1986, 1987, 1993
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by the University of
17  *      California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)spx_usrreq.h
35  *
36  * $FreeBSD: src/sys/netipx/spx_usrreq.c,v 1.27.2.1 2001/02/22 09:44:18 bp Exp $
37  * $DragonFly: src/sys/netproto/ipx/spx_usrreq.c,v 1.20 2007/04/22 01:13:15 dillon Exp $
38  */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/proc.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/socketvar2.h>
50
51 #include <sys/thread2.h>
52 #include <sys/msgport2.h>
53
54 #include <net/route.h>
55 #include <netinet/tcp_fsm.h>
56
57 #include "ipx.h"
58 #include "ipx_pcb.h"
59 #include "ipx_var.h"
60 #include "spx.h"
61 #include "spx_timer.h"
62 #include "spx_var.h"
63 #include "spx_debug.h"
64
65 /*
66  * SPX protocol implementation.
67  */
68 static u_short  spx_iss;
69 static u_short  spx_newchecks[50];
70 static int      spx_hardnosed;
71 static int      spx_use_delack = 0;
72 static int      traceallspxs = 0;
73 static struct   spx     spx_savesi;
74 static struct   spx_istat spx_istat;
75
76 /* Following was struct spxstat spxstat; */
77 #ifndef spxstat 
78 #define spxstat spx_istat.newstats
79 #endif  
80
81 static int spx_backoff[SPX_MAXRXTSHIFT+1] =
82     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
83
84 static  struct spxpcb *spx_close(struct spxpcb *cb);
85 static  struct spxpcb *spx_disconnect(struct spxpcb *cb);
86 static  struct spxpcb *spx_drop(struct spxpcb *cb, int errno);
87 static  int spx_output(struct spxpcb *cb, struct mbuf *m0);
88 static  int spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m);
89 static  void spx_setpersist(struct spxpcb *cb);
90 static  void spx_template(struct spxpcb *cb);
91 static  struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
92 static  struct spxpcb *spx_usrclosed(struct spxpcb *cb);
93
94 static  void spx_usr_abort(netmsg_t);
95 static  void spx_accept(netmsg_t);
96 static  void spx_attach(netmsg_t);
97 static  void spx_bind(netmsg_t);
98 static  void spx_connect(netmsg_t);
99 static  void spx_detach(netmsg_t);
100 static  void spx_usr_disconnect(netmsg_t);
101 static  void spx_listen(netmsg_t);
102 static  void spx_rcvd(netmsg_t);
103 static  void spx_rcvoob(netmsg_t);
104 static  void spx_send(netmsg_t);
105 static  void spx_shutdown(netmsg_t);
106 static  void spx_sp_attach(netmsg_t);
107
108 struct  pr_usrreqs spx_usrreqs = {
109         .pru_abort = spx_usr_abort,
110         .pru_accept = spx_accept,
111         .pru_attach = spx_attach,
112         .pru_bind = spx_bind,
113         .pru_connect = spx_connect,
114         .pru_connect2 = pr_generic_notsupp,
115         .pru_control = ipx_control,
116         .pru_detach = spx_detach,
117         .pru_disconnect = spx_usr_disconnect,
118         .pru_listen = spx_listen,
119         .pru_peeraddr = ipx_peeraddr,
120         .pru_rcvd = spx_rcvd,
121         .pru_rcvoob = spx_rcvoob,
122         .pru_send = spx_send,
123         .pru_sense = pru_sense_null,
124         .pru_shutdown = spx_shutdown,
125         .pru_sockaddr = ipx_sockaddr,
126         .pru_sosend = sosend,
127         .pru_soreceive = soreceive
128 };
129
130 struct  pr_usrreqs spx_usrreq_sps = {
131         .pru_abort = spx_usr_abort,
132         .pru_accept = spx_accept,
133         .pru_attach = spx_sp_attach,
134         .pru_bind = spx_bind,
135         .pru_connect = spx_connect,
136         .pru_connect2 = pr_generic_notsupp,
137         .pru_control = ipx_control,
138         .pru_detach = spx_detach,
139         .pru_disconnect = spx_usr_disconnect,
140         .pru_listen = spx_listen,
141         .pru_peeraddr = ipx_peeraddr,
142         .pru_rcvd = spx_rcvd,
143         .pru_rcvoob = spx_rcvoob,
144         .pru_send = spx_send,
145         .pru_sense = pru_sense_null,
146         .pru_shutdown = spx_shutdown,
147         .pru_sockaddr = ipx_sockaddr,
148         .pru_sosend = sosend,
149         .pru_soreceive = soreceive
150 };
151
152 static MALLOC_DEFINE(M_SPX_Q, "ipx_spx_q", "IPX Packet Management");
153
154 void
155 spx_init(void)
156 {
157
158         spx_iss = 1; /* WRONG !! should fish it out of TODR */
159 }
160
161 void
162 spx_input(struct mbuf *m, struct ipxpcb *ipxp)
163 {
164         struct spxpcb *cb;
165         struct spx *si;
166         struct socket *so;
167         int dropsocket = 0;
168         short ostate = 0;
169
170         spxstat.spxs_rcvtotal++;
171         if (ipxp == NULL) {
172                 panic("No ipxpcb in spx_input\n");
173                 return;
174         }
175
176         cb = ipxtospxpcb(ipxp);
177         if (cb == NULL)
178                 goto bad;
179
180         if (m->m_len < sizeof(struct spx)) {
181                 if ((m = m_pullup(m, sizeof(*si))) == NULL) {
182                         spxstat.spxs_rcvshort++;
183                         return;
184                 }
185         }
186         si = mtod(m, struct spx *);
187         si->si_seq = ntohs(si->si_seq);
188         si->si_ack = ntohs(si->si_ack);
189         si->si_alo = ntohs(si->si_alo);
190
191         so = ipxp->ipxp_socket;
192
193         if (so->so_options & SO_DEBUG || traceallspxs) {
194                 ostate = cb->s_state;
195                 spx_savesi = *si;
196         }
197         if (so->so_options & SO_ACCEPTCONN) {
198                 struct spxpcb *ocb = cb;
199
200                 so = sonewconn(so, 0);
201                 if (so == NULL) {
202                         goto drop;
203                 }
204                 /*
205                  * This is ugly, but ....
206                  *
207                  * Mark socket as temporary until we're
208                  * committed to keeping it.  The code at
209                  * ``drop'' and ``dropwithreset'' check the
210                  * flag dropsocket to see if the temporary
211                  * socket created here should be discarded.
212                  * We mark the socket as discardable until
213                  * we're committed to it below in TCPS_LISTEN.
214                  */
215                 dropsocket++;
216                 ipxp = (struct ipxpcb *)so->so_pcb;
217                 ipxp->ipxp_laddr = si->si_dna;
218                 cb = ipxtospxpcb(ipxp);
219                 cb->s_mtu = ocb->s_mtu;         /* preserve sockopts */
220                 cb->s_flags = ocb->s_flags;     /* preserve sockopts */
221                 cb->s_flags2 = ocb->s_flags2;   /* preserve sockopts */
222                 cb->s_state = TCPS_LISTEN;
223         }
224
225         /*
226          * Packet received on connection.
227          * reset idle time and keep-alive timer;
228          */
229         cb->s_idle = 0;
230         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
231
232         switch (cb->s_state) {
233
234         case TCPS_LISTEN:{
235                 struct sockaddr_ipx *sipx, ssipx;
236                 struct ipx_addr laddr;
237
238                 /*
239                  * If somebody here was carying on a conversation
240                  * and went away, and his pen pal thinks he can
241                  * still talk, we get the misdirected packet.
242                  */
243                 if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
244                         spx_istat.gonawy++;
245                         goto dropwithreset;
246                 }
247                 sipx = &ssipx;
248                 bzero(sipx, sizeof *sipx);
249                 sipx->sipx_len = sizeof(*sipx);
250                 sipx->sipx_family = AF_IPX;
251                 sipx->sipx_addr = si->si_sna;
252                 laddr = ipxp->ipxp_laddr;
253                 if (ipx_nullhost(laddr))
254                         ipxp->ipxp_laddr = si->si_dna;
255                 if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
256                         ipxp->ipxp_laddr = laddr;
257                         spx_istat.noconn++;
258                         goto drop;
259                 }
260                 spx_template(cb);
261                 dropsocket = 0;         /* committed to socket */
262                 cb->s_did = si->si_sid;
263                 cb->s_rack = si->si_ack;
264                 cb->s_ralo = si->si_alo;
265 #define THREEWAYSHAKE
266 #ifdef THREEWAYSHAKE
267                 cb->s_state = TCPS_SYN_RECEIVED;
268                 cb->s_force = 1 + SPXT_KEEP;
269                 spxstat.spxs_accepts++;
270                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
271                 }
272                 break;
273         /*
274          * This state means that we have heard a response
275          * to our acceptance of their connection
276          * It is probably logically unnecessary in this
277          * implementation.
278          */
279          case TCPS_SYN_RECEIVED: {
280                 if (si->si_did != cb->s_sid) {
281                         spx_istat.wrncon++;
282                         goto drop;
283                 }
284 #endif
285                 ipxp->ipxp_fport =  si->si_sport;
286                 cb->s_timer[SPXT_REXMT] = 0;
287                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
288                 soisconnected(so);
289                 cb->s_state = TCPS_ESTABLISHED;
290                 spxstat.spxs_accepts++;
291                 }
292                 break;
293
294         /*
295          * This state means that we have gotten a response
296          * to our attempt to establish a connection.
297          * We fill in the data from the other side,
298          * telling us which port to respond to, instead of the well-
299          * known one we might have sent to in the first place.
300          * We also require that this is a response to our
301          * connection id.
302          */
303         case TCPS_SYN_SENT:
304                 if (si->si_did != cb->s_sid) {
305                         spx_istat.notme++;
306                         goto drop;
307                 }
308                 spxstat.spxs_connects++;
309                 cb->s_did = si->si_sid;
310                 cb->s_rack = si->si_ack;
311                 cb->s_ralo = si->si_alo;
312                 cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
313                 cb->s_timer[SPXT_REXMT] = 0;
314                 cb->s_flags |= SF_ACKNOW;
315                 soisconnected(so);
316                 cb->s_state = TCPS_ESTABLISHED;
317                 /* Use roundtrip time of connection request for initial rtt */
318                 if (cb->s_rtt) {
319                         cb->s_srtt = cb->s_rtt << 3;
320                         cb->s_rttvar = cb->s_rtt << 1;
321                         SPXT_RANGESET(cb->s_rxtcur,
322                             ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
323                             SPXTV_MIN, SPXTV_REXMTMAX);
324                             cb->s_rtt = 0;
325                 }
326         }
327         if (so->so_options & SO_DEBUG || traceallspxs)
328                 spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
329
330         m->m_len -= sizeof(struct ipx);
331         m->m_pkthdr.len -= sizeof(struct ipx);
332         m->m_data += sizeof(struct ipx);
333
334         if (spx_reass(cb, si, m)) {
335                 m_freem(m);
336         }
337         if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
338                 spx_output(cb, NULL);
339         cb->s_flags &= ~(SF_WIN|SF_RXT);
340         return;
341
342 dropwithreset:
343         if (dropsocket)
344                 soabort(so);
345         si->si_seq = ntohs(si->si_seq);
346         si->si_ack = ntohs(si->si_ack);
347         si->si_alo = ntohs(si->si_alo);
348         m_freem(m);
349         if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
350                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
351         return;
352
353 drop:
354 bad:
355         if (cb == 0 || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
356             traceallspxs)
357                 spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
358         m_freem(m);
359 }
360
361 static int spxrexmtthresh = 3;
362
363 /*
364  * This is structurally similar to the tcp reassembly routine
365  * but its function is somewhat different:  It merely queues
366  * packets up, and suppresses duplicates.
367  */
368 static int
369 spx_reass(struct spxpcb *cb, struct spx *si, struct mbuf *si_m)
370 {
371         struct spx_q *q;
372         struct spx_q *nq;
373         struct mbuf *m;
374         struct socket *so = cb->s_ipxpcb->ipxp_socket;
375         char packetp = cb->s_flags & SF_HI;
376         int incr;
377         char wakeup = 0;
378
379         if (si == NULL)
380                 goto present;
381         /*
382          * Update our news from them.
383          */
384         if (si->si_cc & SPX_SA)
385                 cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
386         if (SSEQ_GT(si->si_alo, cb->s_ralo))
387                 cb->s_flags |= SF_WIN;
388         if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
389                 if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
390                         spxstat.spxs_rcvdupack++;
391                         /*
392                          * If this is a completely duplicate ack
393                          * and other conditions hold, we assume
394                          * a packet has been dropped and retransmit
395                          * it exactly as in tcp_input().
396                          */
397                         if (si->si_ack != cb->s_rack ||
398                             si->si_alo != cb->s_ralo)
399                                 cb->s_dupacks = 0;
400                         else if (++cb->s_dupacks == spxrexmtthresh) {
401                                 u_short onxt = cb->s_snxt;
402                                 int cwnd = cb->s_cwnd;
403
404                                 cb->s_snxt = si->si_ack;
405                                 cb->s_cwnd = CUNIT;
406                                 cb->s_force = 1 + SPXT_REXMT;
407                                 spx_output(cb, NULL);
408                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
409                                 cb->s_rtt = 0;
410                                 if (cwnd >= 4 * CUNIT)
411                                         cb->s_cwnd = cwnd / 2;
412                                 if (SSEQ_GT(onxt, cb->s_snxt))
413                                         cb->s_snxt = onxt;
414                                 return (1);
415                         }
416                 } else
417                         cb->s_dupacks = 0;
418                 goto update_window;
419         }
420         cb->s_dupacks = 0;
421         /*
422          * If our correspondent acknowledges data we haven't sent
423          * TCP would drop the packet after acking.  We'll be a little
424          * more permissive
425          */
426         if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
427                 spxstat.spxs_rcvacktoomuch++;
428                 si->si_ack = cb->s_smax + 1;
429         }
430         spxstat.spxs_rcvackpack++;
431         /*
432          * If transmit timer is running and timed sequence
433          * number was acked, update smoothed round trip time.
434          * See discussion of algorithm in tcp_input.c
435          */
436         if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
437                 spxstat.spxs_rttupdated++;
438                 if (cb->s_srtt != 0) {
439                         short delta;
440                         delta = cb->s_rtt - (cb->s_srtt >> 3);
441                         if ((cb->s_srtt += delta) <= 0)
442                                 cb->s_srtt = 1;
443                         if (delta < 0)
444                                 delta = -delta;
445                         delta -= (cb->s_rttvar >> 2);
446                         if ((cb->s_rttvar += delta) <= 0)
447                                 cb->s_rttvar = 1;
448                 } else {
449                         /*
450                          * No rtt measurement yet
451                          */
452                         cb->s_srtt = cb->s_rtt << 3;
453                         cb->s_rttvar = cb->s_rtt << 1;
454                 }
455                 cb->s_rtt = 0;
456                 cb->s_rxtshift = 0;
457                 SPXT_RANGESET(cb->s_rxtcur,
458                         ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
459                         SPXTV_MIN, SPXTV_REXMTMAX);
460         }
461         /*
462          * If all outstanding data is acked, stop retransmit
463          * timer and remember to restart (more output or persist).
464          * If there is more data to be acked, restart retransmit
465          * timer, using current (possibly backed-off) value;
466          */
467         if (si->si_ack == cb->s_smax + 1) {
468                 cb->s_timer[SPXT_REXMT] = 0;
469                 cb->s_flags |= SF_RXT;
470         } else if (cb->s_timer[SPXT_PERSIST] == 0)
471                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
472         /*
473          * When new data is acked, open the congestion window.
474          * If the window gives us less than ssthresh packets
475          * in flight, open exponentially (maxseg at a time).
476          * Otherwise open linearly (maxseg^2 / cwnd at a time).
477          */
478         incr = CUNIT;
479         if (cb->s_cwnd > cb->s_ssthresh)
480                 incr = max(incr * incr / cb->s_cwnd, 1);
481         cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
482         /*
483          * Trim Acked data from output queue.
484          */
485         while ((m = so->so_snd.ssb_mb) != NULL) {
486                 if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
487                         sbdroprecord(&so->so_snd.sb);
488                 else
489                         break;
490         }
491         sowwakeup(so);
492         cb->s_rack = si->si_ack;
493 update_window:
494         if (SSEQ_LT(cb->s_snxt, cb->s_rack))
495                 cb->s_snxt = cb->s_rack;
496         if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
497             (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
498              (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
499                 /* keep track of pure window updates */
500                 if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
501                     && SSEQ_LT(cb->s_ralo, si->si_alo)) {
502                         spxstat.spxs_rcvwinupd++;
503                         spxstat.spxs_rcvdupack--;
504                 }
505                 cb->s_ralo = si->si_alo;
506                 cb->s_swl1 = si->si_seq;
507                 cb->s_swl2 = si->si_ack;
508                 cb->s_swnd = (1 + si->si_alo - si->si_ack);
509                 if (cb->s_swnd > cb->s_smxw)
510                         cb->s_smxw = cb->s_swnd;
511                 cb->s_flags |= SF_WIN;
512         }
513         /*
514          * If this packet number is higher than that which
515          * we have allocated refuse it, unless urgent
516          */
517         if (SSEQ_GT(si->si_seq, cb->s_alo)) {
518                 if (si->si_cc & SPX_SP) {
519                         spxstat.spxs_rcvwinprobe++;
520                         return (1);
521                 } else
522                         spxstat.spxs_rcvpackafterwin++;
523                 if (si->si_cc & SPX_OB) {
524                         if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
525                                 m_freem(si_m);
526                                 return (0);
527                         } /* else queue this packet; */
528                 } else {
529                         /*register struct socket *so = cb->s_ipxpcb->ipxp_socket;
530                         if (so->so_state && SS_NOFDREF) {
531                                 spx_close(cb);
532                         } else
533                                        would crash system*/
534                         spx_istat.notyet++;
535                         m_freem(si_m);
536                         return (0);
537                 }
538         }
539         /*
540          * If this is a system packet, we don't need to
541          * queue it up, and won't update acknowledge #
542          */
543         if (si->si_cc & SPX_SP) {
544                 return (1);
545         }
546         /*
547          * We have already seen this packet, so drop.
548          */
549         if (SSEQ_LT(si->si_seq, cb->s_ack)) {
550                 spx_istat.bdreas++;
551                 spxstat.spxs_rcvduppack++;
552                 if (si->si_seq == cb->s_ack - 1)
553                         spx_istat.lstdup++;
554                 return (1);
555         }
556         /*
557          * Loop through all packets queued up to insert in
558          * appropriate sequence.
559          */
560         for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
561                 if (si->si_seq == SI(q)->si_seq) {
562                         spxstat.spxs_rcvduppack++;
563                         return (1);
564                 }
565                 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
566                         spxstat.spxs_rcvoopack++;
567                         break;
568                 }
569         }
570         nq = kmalloc(sizeof(struct spx_q), M_SPX_Q, M_INTNOWAIT);
571         if (nq == NULL) {
572                 m_freem(si_m);
573                 return (0);
574         }
575         insque(nq, q->si_prev);
576         nq->si_mbuf = si_m;
577         /*
578          * If this packet is urgent, inform process
579          */
580         if (si->si_cc & SPX_OB) {
581                 cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
582                 sohasoutofband(so);
583                 cb->s_oobflags |= SF_IOOB;
584         }
585 present:
586 #define SPINC sizeof(struct spxhdr)
587         /*
588          * Loop through all packets queued up to update acknowledge
589          * number, and present all acknowledged data to user;
590          * If in packet interface mode, show packet headers.
591          */
592         for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
593                   if (SI(q)->si_seq == cb->s_ack) {
594                         cb->s_ack++;
595                         m = q->si_mbuf;
596                         if (SI(q)->si_cc & SPX_OB) {
597                                 cb->s_oobflags &= ~SF_IOOB;
598                                 if (so->so_rcv.ssb_cc)
599                                         so->so_oobmark = so->so_rcv.ssb_cc;
600                                 else
601                                         sosetstate(so, SS_RCVATMARK);
602                         }
603                         nq = q;
604                         q = q->si_prev;
605                         remque(nq);
606                         kfree(nq, M_SPX_Q);
607                         wakeup = 1;
608                         spxstat.spxs_rcvpack++;
609 #ifdef SF_NEWCALL
610                         if (cb->s_flags2 & SF_NEWCALL) {
611                                 struct spxhdr *sp = mtod(m, struct spxhdr *);
612                                 u_char dt = sp->spx_dt;
613                                 spx_newchecks[4]++;
614                                 if (dt != cb->s_rhdr.spx_dt) {
615                                         struct mbuf *mm =
616                                            m_getclr(MB_DONTWAIT, MT_CONTROL);
617                                         spx_newchecks[0]++;
618                                         if (mm != NULL) {
619                                                 u_short *s =
620                                                         mtod(mm, u_short *);
621                                                 cb->s_rhdr.spx_dt = dt;
622                                                 mm->m_len = 5; /*XXX*/
623                                                 s[0] = 5;
624                                                 s[1] = 1;
625                                                 *(u_char *)(&s[2]) = dt;
626                                                 sbappend(&so->so_rcv.sb, mm);
627                                         }
628                                 }
629                                 if (sp->spx_cc & SPX_OB) {
630                                         m_chtype(m, MT_OOBDATA);
631                                         spx_newchecks[1]++;
632                                         so->so_oobmark = 0;
633                                         soclrstate(so, SS_RCVATMARK);
634                                 }
635                                 if (packetp == 0) {
636                                         m->m_data += SPINC;
637                                         m->m_len -= SPINC;
638                                         m->m_pkthdr.len -= SPINC;
639                                 }
640                                 if ((sp->spx_cc & SPX_EM) || packetp) {
641                                         sbappendrecord(&so->so_rcv.sb, m);
642                                         spx_newchecks[9]++;
643                                 } else
644                                         sbappend(&so->so_rcv.sb, m);
645                         } else
646 #endif
647                         if (packetp) {
648                                 sbappendrecord(&so->so_rcv.sb, m);
649                         } else {
650                                 cb->s_rhdr = *mtod(m, struct spxhdr *);
651                                 m->m_data += SPINC;
652                                 m->m_len -= SPINC;
653                                 m->m_pkthdr.len -= SPINC;
654                                 sbappend(&so->so_rcv.sb, m);
655                         }
656                   } else
657                         break;
658         }
659         if (wakeup)
660                 sorwakeup(so);
661         return (0);
662 }
663
664 void
665 spx_ctlinput(netmsg_t msg)
666 {
667         /*struct socket *so = msg->base.nm_so;*/
668         int cmd = msg->ctlinput.nm_cmd;
669         struct sockaddr *arg_as_sa = msg->ctlinput.nm_arg;
670         caddr_t arg = (/* XXX */ caddr_t)arg_as_sa;
671         struct ipx_addr *na;
672         struct sockaddr_ipx *sipx;
673
674         if (cmd < 0 || cmd > PRC_NCMDS)
675                 goto out;
676
677         switch (cmd) {
678         case PRC_ROUTEDEAD:
679                 break;
680         case PRC_IFDOWN:
681         case PRC_HOSTDEAD:
682         case PRC_HOSTUNREACH:
683                 sipx = (struct sockaddr_ipx *)arg;
684                 if (sipx->sipx_family != AF_IPX)
685                         break;
686                 na = &sipx->sipx_addr;
687                 break;
688         default:
689                 break;
690         }
691 out:
692         lwkt_replymsg(&msg->lmsg, 0);
693 }
694
695 static int
696 spx_output(struct spxpcb *cb, struct mbuf *m0)
697 {
698         struct socket *so = cb->s_ipxpcb->ipxp_socket;
699         struct mbuf *m = NULL;
700         struct spx *si = NULL;
701         struct signalsockbuf *ssb = &so->so_snd;
702         int len = 0, win, rcv_win;
703         short span, off, recordp = 0;
704         u_short alo;
705         int error = 0, sendalot;
706 #ifdef notdef
707         int idle;
708 #endif
709         struct mbuf *mprev;
710
711         if (m0 != NULL) {
712                 int mtu = cb->s_mtu;
713                 int datalen;
714                 /*
715                  * Make sure that packet isn't too big.
716                  */
717                 for (m = m0; m != NULL; m = m->m_next) {
718                         mprev = m;
719                         len += m->m_len;
720                         if (m->m_flags & M_EOR)
721                                 recordp = 1;
722                 }
723                 datalen = (cb->s_flags & SF_HO) ?
724                                 len - sizeof(struct spxhdr) : len;
725                 if (datalen > mtu) {
726                         if (cb->s_flags & SF_PI) {
727                                 m_freem(m0);
728                                 return (EMSGSIZE);
729                         } else {
730                                 int oldEM = cb->s_cc & SPX_EM;
731
732                                 cb->s_cc &= ~SPX_EM;
733                                 while (len > mtu) {
734                                         /*
735                                          * Here we are only being called
736                                          * from usrreq(), so it is OK to
737                                          * block.
738                                          */
739                                         m = m_copym(m0, 0, mtu, MB_WAIT);
740                                         if (cb->s_flags & SF_NEWCALL) {
741                                             struct mbuf *mm = m;
742                                             spx_newchecks[7]++;
743                                             while (mm != NULL) {
744                                                 mm->m_flags &= ~M_EOR;
745                                                 mm = mm->m_next;
746                                             }
747                                         }
748                                         error = spx_output(cb, m);
749                                         if (error) {
750                                                 cb->s_cc |= oldEM;
751                                                 m_freem(m0);
752                                                 return (error);
753                                         }
754                                         m_adj(m0, mtu);
755                                         len -= mtu;
756                                 }
757                                 cb->s_cc |= oldEM;
758                         }
759                 }
760                 /*
761                  * Force length even, by adding a "garbage byte" if
762                  * necessary.
763                  */
764                 if (len & 1) {
765                         m = mprev;
766                         if (M_TRAILINGSPACE(m) >= 1)
767                                 m->m_len++;
768                         else {
769                                 struct mbuf *m1 = m_get(MB_DONTWAIT, MT_DATA);
770
771                                 if (m1 == NULL) {
772                                         m_freem(m0);
773                                         return (ENOBUFS);
774                                 }
775                                 m1->m_len = 1;
776                                 *(mtod(m1, u_char *)) = 0;
777                                 m->m_next = m1;
778                         }
779                 }
780                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
781                 if (m == NULL) {
782                         m_freem(m0);
783                         return (ENOBUFS);
784                 }
785                 /*
786                  * Fill in mbuf with extended SP header
787                  * and addresses and length put into network format.
788                  */
789                 MH_ALIGN(m, sizeof(struct spx));
790                 m->m_len = sizeof(struct spx);
791                 m->m_next = m0;
792                 si = mtod(m, struct spx *);
793                 si->si_i = *cb->s_ipx;
794                 si->si_s = cb->s_shdr;
795                 if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
796                         struct spxhdr *sh;
797                         if (m0->m_len < sizeof(*sh)) {
798                                 if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
799                                         m_free(m);
800                                         m_freem(m0);
801                                         return (EINVAL);
802                                 }
803                                 m->m_next = m0;
804                         }
805                         sh = mtod(m0, struct spxhdr *);
806                         si->si_dt = sh->spx_dt;
807                         si->si_cc |= sh->spx_cc & SPX_EM;
808                         m0->m_len -= sizeof(*sh);
809                         m0->m_data += sizeof(*sh);
810                         len -= sizeof(*sh);
811                 }
812                 len += sizeof(*si);
813                 if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
814                         si->si_cc |= SPX_EM;
815                         spx_newchecks[8]++;
816                 }
817                 if (cb->s_oobflags & SF_SOOB) {
818                         /*
819                          * Per jqj@cornell:
820                          * make sure OB packets convey exactly 1 byte.
821                          * If the packet is 1 byte or larger, we
822                          * have already guaranted there to be at least
823                          * one garbage byte for the checksum, and
824                          * extra bytes shouldn't hurt!
825                          */
826                         if (len > sizeof(*si)) {
827                                 si->si_cc |= SPX_OB;
828                                 len = (1 + sizeof(*si));
829                         }
830                 }
831                 si->si_len = htons((u_short)len);
832                 m->m_pkthdr.len = ((len - 1) | 1) + 1;
833                 /*
834                  * queue stuff up for output
835                  */
836                 sbappendrecord(&ssb->sb, m);
837                 cb->s_seq++;
838         }
839 #ifdef notdef
840         idle = (cb->s_smax == (cb->s_rack - 1));
841 #endif
842 again:
843         sendalot = 0;
844         off = cb->s_snxt - cb->s_rack;
845         win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
846
847         /*
848          * If in persist timeout with window of 0, send a probe.
849          * Otherwise, if window is small but nonzero
850          * and timer expired, send what we can and go into
851          * transmit state.
852          */
853         if (cb->s_force == 1 + SPXT_PERSIST) {
854                 if (win != 0) {
855                         cb->s_timer[SPXT_PERSIST] = 0;
856                         cb->s_rxtshift = 0;
857                 }
858         }
859         span = cb->s_seq - cb->s_rack;
860         len = min(span, win) - off;
861
862         if (len < 0) {
863                 /*
864                  * Window shrank after we went into it.
865                  * If window shrank to 0, cancel pending
866                  * restransmission and pull s_snxt back
867                  * to (closed) window.  We will enter persist
868                  * state below.  If the widndow didn't close completely,
869                  * just wait for an ACK.
870                  */
871                 len = 0;
872                 if (win == 0) {
873                         cb->s_timer[SPXT_REXMT] = 0;
874                         cb->s_snxt = cb->s_rack;
875                 }
876         }
877         if (len > 1)
878                 sendalot = 1;
879         rcv_win = ssb_space(&so->so_rcv);
880
881         /*
882          * Send if we owe peer an ACK.
883          */
884         if (cb->s_oobflags & SF_SOOB) {
885                 /*
886                  * must transmit this out of band packet
887                  */
888                 cb->s_oobflags &= ~ SF_SOOB;
889                 sendalot = 1;
890                 spxstat.spxs_sndurg++;
891                 goto found;
892         }
893         if (cb->s_flags & SF_ACKNOW)
894                 goto send;
895         if (cb->s_state < TCPS_ESTABLISHED)
896                 goto send;
897         /*
898          * Silly window can't happen in spx.
899          * Code from tcp deleted.
900          */
901         if (len)
902                 goto send;
903         /*
904          * Compare available window to amount of window
905          * known to peer (as advertised window less
906          * next expected input.)  If the difference is at least two
907          * packets or at least 35% of the mximum possible window,
908          * then want to send a window update to peer.
909          */
910         if (rcv_win > 0) {
911                 u_short delta =  1 + cb->s_alo - cb->s_ack;
912                 int adv = rcv_win - (delta * cb->s_mtu);
913                 
914                 if ((so->so_rcv.ssb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
915                     (100 * adv / so->so_rcv.ssb_hiwat >= 35)) {
916                         spxstat.spxs_sndwinup++;
917                         cb->s_flags |= SF_ACKNOW;
918                         goto send;
919                 }
920
921         }
922         /*
923          * Many comments from tcp_output.c are appropriate here
924          * including . . .
925          * If send window is too small, there is data to transmit, and no
926          * retransmit or persist is pending, then go to persist state.
927          * If nothing happens soon, send when timer expires:
928          * if window is nonzero, transmit what we can,
929          * otherwise send a probe.
930          */
931         if (so->so_snd.ssb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
932                 cb->s_timer[SPXT_PERSIST] == 0) {
933                         cb->s_rxtshift = 0;
934                         spx_setpersist(cb);
935         }
936         /*
937          * No reason to send a packet, just return.
938          */
939         cb->s_outx = 1;
940         return (0);
941
942 send:
943         /*
944          * Find requested packet.
945          */
946         si = NULL;
947         if (len > 0) {
948                 cb->s_want = cb->s_snxt;
949                 for (m = ssb->ssb_mb; m != NULL; m = m->m_nextpkt) {
950                         si = mtod(m, struct spx *);
951                         if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
952                                 break;
953                 }
954         found:
955                 if (si != NULL) {
956                         if (si->si_seq == cb->s_snxt)
957                                         cb->s_snxt++;
958                                 else
959                                         spxstat.spxs_sndvoid++, si = 0;
960                 }
961         }
962         /*
963          * update window
964          */
965         if (rcv_win < 0)
966                 rcv_win = 0;
967         alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
968         if (SSEQ_LT(alo, cb->s_alo)) 
969                 alo = cb->s_alo;
970
971         if (si != NULL) {
972                 /*
973                  * must make a copy of this packet for
974                  * ipx_output to monkey with
975                  */
976                 m = m_copy(m, 0, (int)M_COPYALL);
977                 if (m == NULL) {
978                         return (ENOBUFS);
979                 }
980                 si = mtod(m, struct spx *);
981                 if (SSEQ_LT(si->si_seq, cb->s_smax))
982                         spxstat.spxs_sndrexmitpack++;
983                 else
984                         spxstat.spxs_sndpack++;
985         } else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
986                 /*
987                  * Must send an acknowledgement or a probe
988                  */
989                 if (cb->s_force)
990                         spxstat.spxs_sndprobe++;
991                 if (cb->s_flags & SF_ACKNOW)
992                         spxstat.spxs_sndacks++;
993                 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
994                 if (m == NULL)
995                         return (ENOBUFS);
996                 /*
997                  * Fill in mbuf with extended SP header
998                  * and addresses and length put into network format.
999                  */
1000                 MH_ALIGN(m, sizeof(struct spx));
1001                 m->m_len = sizeof(*si);
1002                 m->m_pkthdr.len = sizeof(*si);
1003                 si = mtod(m, struct spx *);
1004                 si->si_i = *cb->s_ipx;
1005                 si->si_s = cb->s_shdr;
1006                 si->si_seq = cb->s_smax + 1;
1007                 si->si_len = htons(sizeof(*si));
1008                 si->si_cc |= SPX_SP;
1009         } else {
1010                 cb->s_outx = 3;
1011                 if (so->so_options & SO_DEBUG || traceallspxs)
1012                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1013                 return (0);
1014         }
1015         /*
1016          * Stuff checksum and output datagram.
1017          */
1018         if ((si->si_cc & SPX_SP) == 0) {
1019                 if (cb->s_force != (1 + SPXT_PERSIST) ||
1020                     cb->s_timer[SPXT_PERSIST] == 0) {
1021                         /*
1022                          * If this is a new packet and we are not currently 
1023                          * timing anything, time this one.
1024                          */
1025                         if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1026                                 cb->s_smax = si->si_seq;
1027                                 if (cb->s_rtt == 0) {
1028                                         spxstat.spxs_segstimed++;
1029                                         cb->s_rtseq = si->si_seq;
1030                                         cb->s_rtt = 1;
1031                                 }
1032                         }
1033                         /*
1034                          * Set rexmt timer if not currently set,
1035                          * Initial value for retransmit timer is smoothed
1036                          * round-trip time + 2 * round-trip time variance.
1037                          * Initialize shift counter which is used for backoff
1038                          * of retransmit time.
1039                          */
1040                         if (cb->s_timer[SPXT_REXMT] == 0 &&
1041                             cb->s_snxt != cb->s_rack) {
1042                                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1043                                 if (cb->s_timer[SPXT_PERSIST]) {
1044                                         cb->s_timer[SPXT_PERSIST] = 0;
1045                                         cb->s_rxtshift = 0;
1046                                 }
1047                         }
1048                 } else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1049                         cb->s_smax = si->si_seq;
1050                 }
1051         } else if (cb->s_state < TCPS_ESTABLISHED) {
1052                 if (cb->s_rtt == 0)
1053                         cb->s_rtt = 1; /* Time initial handshake */
1054                 if (cb->s_timer[SPXT_REXMT] == 0)
1055                         cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1056         }
1057         {
1058                 /*
1059                  * Do not request acks when we ack their data packets or
1060                  * when we do a gratuitous window update.
1061                  */
1062                 if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1063                                 si->si_cc |= SPX_SA;
1064                 si->si_seq = htons(si->si_seq);
1065                 si->si_alo = htons(alo);
1066                 si->si_ack = htons(cb->s_ack);
1067
1068                 if (ipxcksum) {
1069                         si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1070                 } else
1071                         si->si_sum = 0xffff;
1072
1073                 cb->s_outx = 4;
1074                 if (so->so_options & SO_DEBUG || traceallspxs)
1075                         spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1076
1077                 if (so->so_options & SO_DONTROUTE)
1078                         error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1079                 else
1080                         error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1081         }
1082         if (error) {
1083                 return (error);
1084         }
1085         spxstat.spxs_sndtotal++;
1086         /*
1087          * Data sent (as far as we can tell).
1088          * If this advertises a larger window than any other segment,
1089          * then remember the size of the advertized window.
1090          * Any pending ACK has now been sent.
1091          */
1092         cb->s_force = 0;
1093         cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1094         if (SSEQ_GT(alo, cb->s_alo))
1095                 cb->s_alo = alo;
1096         if (sendalot)
1097                 goto again;
1098         cb->s_outx = 5;
1099         return (0);
1100 }
1101
1102 static int spx_do_persist_panics = 0;
1103
1104 static void
1105 spx_setpersist(struct spxpcb *cb)
1106 {
1107         int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1108
1109         if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1110                 panic("spx_output REXMT");
1111         /*
1112          * Start/restart persistance timer.
1113          */
1114         SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1115             t*spx_backoff[cb->s_rxtshift],
1116             SPXTV_PERSMIN, SPXTV_PERSMAX);
1117         if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1118                 cb->s_rxtshift++;
1119 }
1120
1121 void
1122 spx_ctloutput(netmsg_t msg)
1123 {
1124         struct socket *so = msg->base.nm_so;
1125         struct ipxpcb *ipxp = sotoipxpcb(so);
1126         struct sockopt *sopt = msg->ctloutput.nm_sopt;
1127         struct spxpcb *cb;
1128         int mask, error;
1129         short soptval;
1130         u_short usoptval;
1131         int optval;
1132
1133         error = 0;
1134
1135         if (sopt->sopt_level != IPXPROTO_SPX) {
1136                 /* This will have to be changed when we do more general
1137                    stacking of protocols */
1138                 ipx_ctloutput(msg);
1139                 /* msg now invalid */
1140                 return;
1141         }
1142         if (ipxp == NULL) {
1143                 error = EINVAL;
1144                 goto out;
1145         }
1146         cb = ipxtospxpcb(ipxp);
1147
1148         switch (sopt->sopt_dir) {
1149         case SOPT_GET:
1150                 switch (sopt->sopt_name) {
1151                 case SO_HEADERS_ON_INPUT:
1152                         mask = SF_HI;
1153                         goto get_flags;
1154
1155                 case SO_HEADERS_ON_OUTPUT:
1156                         mask = SF_HO;
1157                 get_flags:
1158                         soptval = cb->s_flags & mask;
1159                         error = sooptcopyout(sopt, &soptval, sizeof soptval);
1160                         break;
1161
1162                 case SO_MTU:
1163                         usoptval = cb->s_mtu;
1164                         error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1165                         break;
1166
1167                 case SO_LAST_HEADER:
1168                         error = sooptcopyout(sopt, &cb->s_rhdr, 
1169                                              sizeof cb->s_rhdr);
1170                         break;
1171
1172                 case SO_DEFAULT_HEADERS:
1173                         error = sooptcopyout(sopt, &cb->s_shdr, 
1174                                              sizeof cb->s_shdr);
1175                         break;
1176
1177                 default:
1178                         error = ENOPROTOOPT;
1179                 }
1180                 break;
1181
1182         case SOPT_SET:
1183                 switch (sopt->sopt_name) {
1184                         /* XXX why are these shorts on get and ints on set?
1185                            that doesn't make any sense... */
1186                 case SO_HEADERS_ON_INPUT:
1187                         mask = SF_HI;
1188                         goto set_head;
1189
1190                 case SO_HEADERS_ON_OUTPUT:
1191                         mask = SF_HO;
1192                 set_head:
1193                         error = sooptcopyin(sopt, &optval, sizeof optval,
1194                                             sizeof optval);
1195                         if (error)
1196                                 break;
1197
1198                         if (cb->s_flags & SF_PI) {
1199                                 if (optval)
1200                                         cb->s_flags |= mask;
1201                                 else
1202                                         cb->s_flags &= ~mask;
1203                         } else error = EINVAL;
1204                         break;
1205
1206                 case SO_MTU:
1207                         error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1208                                             sizeof usoptval);
1209                         if (error)
1210                                 break;
1211                         cb->s_mtu = usoptval;
1212                         break;
1213
1214 #ifdef SF_NEWCALL
1215                 case SO_NEWCALL:
1216                         error = sooptcopyin(sopt, &optval, sizeof optval,
1217                                             sizeof optval);
1218                         if (error)
1219                                 break;
1220                         if (optval) {
1221                                 cb->s_flags2 |= SF_NEWCALL;
1222                                 spx_newchecks[5]++;
1223                         } else {
1224                                 cb->s_flags2 &= ~SF_NEWCALL;
1225                                 spx_newchecks[6]++;
1226                         }
1227                         break;
1228 #endif
1229
1230                 case SO_DEFAULT_HEADERS:
1231                         {
1232                                 struct spxhdr sp;
1233
1234                                 error = sooptcopyin(sopt, &sp, sizeof sp,
1235                                                     sizeof sp);
1236                                 if (error)
1237                                         break;
1238                                 cb->s_dt = sp.spx_dt;
1239                                 cb->s_cc = sp.spx_cc & SPX_EM;
1240                         }
1241                         break;
1242
1243                 default:
1244                         error = ENOPROTOOPT;
1245                 }
1246                 break;
1247         }
1248 out:
1249         lwkt_replymsg(&msg->lmsg, error);
1250 }
1251
1252 /*
1253  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1254  *       will sofree() it when we return.
1255  */
1256 static void
1257 spx_usr_abort(netmsg_t msg)
1258 {
1259         struct socket *so = msg->base.nm_so;
1260         struct ipxpcb *ipxp;
1261         struct spxpcb *cb;
1262
1263         ipxp = sotoipxpcb(so);
1264         cb = ipxtospxpcb(ipxp);
1265
1266         spx_drop(cb, ECONNABORTED);
1267
1268         lwkt_replymsg(&msg->lmsg, 0);
1269 }
1270
1271 /*
1272  * Accept a connection.  Essentially all the work is
1273  * done at higher levels; just return the address
1274  * of the peer, storing through addr.
1275  */
1276 static void
1277 spx_accept(netmsg_t msg)
1278 {
1279         struct socket *so = msg->base.nm_so;
1280         struct sockaddr **nam = msg->accept.nm_nam;
1281         struct ipxpcb *ipxp;
1282         struct sockaddr_ipx *sipx, ssipx;
1283
1284         ipxp = sotoipxpcb(so);
1285         sipx = &ssipx;
1286         bzero(sipx, sizeof *sipx);
1287         sipx->sipx_len = sizeof *sipx;
1288         sipx->sipx_family = AF_IPX;
1289         sipx->sipx_addr = ipxp->ipxp_faddr;
1290         *nam = dup_sockaddr((struct sockaddr *)sipx);
1291
1292         lwkt_replymsg(&msg->lmsg, 0);
1293 }
1294
1295 static int
1296 spx_attach_oncpu(struct socket *so, int proto, struct pru_attach_info *ai)
1297 {
1298         struct ipxpcb *ipxp;
1299         struct spxpcb *cb;
1300         struct mbuf *mm;
1301         struct signalsockbuf *ssb;
1302         int error;
1303
1304         ipxp = sotoipxpcb(so);
1305         cb = ipxtospxpcb(ipxp);
1306
1307         crit_enter();
1308         if (ipxp != NULL) {
1309                 error = EISCONN;
1310                 goto spx_attach_end;
1311         }
1312         error = ipx_pcballoc(so, &ipxpcb);
1313         if (error)
1314                 goto spx_attach_end;
1315         if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
1316                 error = soreserve(so, (u_long) 3072, (u_long) 3072,
1317                                   ai->sb_rlimit);
1318                 if (error)
1319                         goto spx_attach_end;
1320         }
1321         ipxp = sotoipxpcb(so);
1322
1323         MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_INTWAIT | M_ZERO);
1324         ssb = &so->so_snd;
1325
1326         mm = m_getclr(MB_DONTWAIT, MT_HEADER);
1327         if (mm == NULL) {
1328                 FREE(cb, M_PCB);
1329                 error = ENOBUFS;
1330                 goto spx_attach_end;
1331         }
1332         cb->s_ipx_m = mm;
1333         cb->s_ipx = mtod(mm, struct ipx *);
1334         cb->s_state = TCPS_LISTEN;
1335         cb->s_smax = -1;
1336         cb->s_swl1 = -1;
1337         cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1338         cb->s_ipxpcb = ipxp;
1339         cb->s_mtu = 576 - sizeof(struct spx);
1340         cb->s_cwnd = ssb_space(ssb) * CUNIT / cb->s_mtu;
1341         cb->s_ssthresh = cb->s_cwnd;
1342         cb->s_cwmx = ssb_space(ssb) * CUNIT / (2 * sizeof(struct spx));
1343         /* Above is recomputed when connecting to account
1344            for changed buffering or mtu's */
1345         cb->s_rtt = SPXTV_SRTTBASE;
1346         cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1347         SPXT_RANGESET(cb->s_rxtcur,
1348             ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1349             SPXTV_MIN, SPXTV_REXMTMAX);
1350         ipxp->ipxp_pcb = (caddr_t)cb; 
1351 spx_attach_end:
1352         crit_exit();
1353         return error;
1354 }
1355
1356 static void
1357 spx_attach(netmsg_t msg)
1358 {
1359         int error;
1360
1361         error = spx_attach_oncpu(msg->base.nm_so,
1362                                  msg->attach.nm_proto,
1363                                  msg->attach.nm_ai);
1364         lwkt_replymsg(&msg->lmsg, error);
1365 }
1366
1367
1368 static void
1369 spx_bind(netmsg_t msg)
1370 {  
1371         struct socket *so = msg->base.nm_so;
1372         struct ipxpcb *ipxp;
1373         int error;
1374
1375         ipxp = sotoipxpcb(so);
1376
1377         error = ipx_pcbbind(ipxp, msg->bind.nm_nam, msg->bind.nm_td);
1378         lwkt_replymsg(&msg->lmsg, error);
1379 }  
1380    
1381 /*
1382  * Initiate connection to peer.
1383  * Enter SYN_SENT state, and mark socket as connecting.
1384  * Start keep-alive timer, setup prototype header,
1385  * Send initial system packet requesting connection.
1386  */
1387 static void
1388 spx_connect(netmsg_t msg)
1389 {
1390         struct socket *so = msg->base.nm_so;
1391         struct sockaddr *nam = msg->connect.nm_nam;
1392         struct thread *td = msg->connect.nm_td;
1393         struct ipxpcb *ipxp;
1394         struct spxpcb *cb;
1395         int error;
1396
1397         ipxp = sotoipxpcb(so);
1398         cb = ipxtospxpcb(ipxp);
1399
1400         crit_enter();
1401         if (ipxp->ipxp_lport == 0) {
1402                 error = ipx_pcbbind(ipxp, NULL, td);
1403                 if (error)
1404                         goto spx_connect_end;
1405         }
1406         error = ipx_pcbconnect(ipxp, nam, td);
1407         if (error)
1408                 goto spx_connect_end;
1409         soisconnecting(so);
1410         spxstat.spxs_connattempt++;
1411         cb->s_state = TCPS_SYN_SENT;
1412         cb->s_did = 0;
1413         spx_template(cb);
1414         cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1415         cb->s_force = 1 + SPXTV_KEEP;
1416         /*
1417          * Other party is required to respond to
1418          * the port I send from, but he is not
1419          * required to answer from where I am sending to,
1420          * so allow wildcarding.
1421          * original port I am sending to is still saved in
1422          * cb->s_dport.
1423          */
1424         ipxp->ipxp_fport = 0;
1425         error = spx_output(cb, NULL);
1426 spx_connect_end:
1427         crit_exit();
1428         lwkt_replymsg(&msg->lmsg, error);
1429 }
1430
1431 static void
1432 spx_detach(netmsg_t msg)
1433 {
1434         struct socket *so = msg->base.nm_so;
1435         struct ipxpcb *ipxp;
1436         struct spxpcb *cb;
1437         int error;
1438
1439         ipxp = sotoipxpcb(so);
1440         cb = ipxtospxpcb(ipxp);
1441
1442         if (ipxp) {
1443                 crit_enter();
1444                 if (cb->s_state > TCPS_LISTEN)
1445                         spx_disconnect(cb);
1446                 else
1447                         spx_close(cb);
1448                 crit_exit();
1449                 error = 0;
1450         } else {
1451                 error = ENOTCONN;
1452         }
1453         lwkt_replymsg(&msg->lmsg, error);
1454 }
1455
1456 /*
1457  * We may decide later to implement connection closing
1458  * handshaking at the spx level optionally.
1459  * here is the hook to do it:
1460  */
1461 static void
1462 spx_usr_disconnect(netmsg_t msg)
1463 {
1464         struct socket *so = msg->base.nm_so;
1465         struct ipxpcb *ipxp;
1466         struct spxpcb *cb;
1467
1468         ipxp = sotoipxpcb(so);
1469         cb = ipxtospxpcb(ipxp);
1470
1471         crit_enter();
1472         spx_disconnect(cb);
1473         crit_exit();
1474
1475         lwkt_replymsg(&msg->lmsg, 0);
1476 }
1477
1478 static void
1479 spx_listen(netmsg_t msg)
1480 {
1481         struct socket *so = msg->base.nm_so;
1482         struct ipxpcb *ipxp;
1483         struct spxpcb *cb;
1484         int error;
1485
1486         error = 0;
1487         ipxp = sotoipxpcb(so);
1488         cb = ipxtospxpcb(ipxp);
1489
1490         if (ipxp->ipxp_lport == 0)
1491                 error = ipx_pcbbind(ipxp, NULL, msg->listen.nm_td);
1492         if (error == 0)
1493                 cb->s_state = TCPS_LISTEN;
1494         lwkt_replymsg(&msg->lmsg, error);
1495 }
1496
1497 /*
1498  * After a receive, possibly send acknowledgment
1499  * updating allocation.
1500  */
1501 static void
1502 spx_rcvd(netmsg_t msg)
1503 {
1504         struct socket *so = msg->base.nm_so;
1505         struct ipxpcb *ipxp;
1506         struct spxpcb *cb;
1507
1508         ipxp = sotoipxpcb(so);
1509         cb = ipxtospxpcb(ipxp);
1510
1511         crit_enter();
1512         cb->s_flags |= SF_RVD;
1513         spx_output(cb, NULL);
1514         cb->s_flags &= ~SF_RVD;
1515         crit_exit();
1516
1517         lwkt_replymsg(&msg->lmsg, 0);
1518 }
1519
1520 static void
1521 spx_rcvoob(netmsg_t msg)
1522 {
1523         struct mbuf *m = msg->rcvoob.nm_m;
1524         struct socket *so = msg->base.nm_so;
1525         struct ipxpcb *ipxp;
1526         struct spxpcb *cb;
1527         int error;
1528
1529         ipxp = sotoipxpcb(so);
1530         cb = ipxtospxpcb(ipxp);
1531
1532         if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1533             (so->so_state & SS_RCVATMARK)) {
1534                 m->m_len = 1;
1535                 *mtod(m, caddr_t) = cb->s_iobc;
1536                 error = 0;
1537         } else {
1538                 error = EINVAL;
1539         }
1540         lwkt_replymsg(&msg->lmsg, error);
1541 }
1542
1543 static void
1544 spx_send(netmsg_t msg)
1545 {
1546         struct socket *so = msg->base.nm_so;
1547         struct mbuf *m = msg->send.nm_m;
1548         struct mbuf *controlp = msg->send.nm_control;
1549         int flags = msg->send.nm_flags;
1550         struct ipxpcb *ipxp;
1551         struct spxpcb *cb;
1552         int error;
1553
1554         error = 0;
1555         ipxp = sotoipxpcb(so);
1556         cb = ipxtospxpcb(ipxp);
1557
1558         crit_enter();
1559         if (flags & PRUS_OOB) {
1560                 if (ssb_space(&so->so_snd) < -512) {
1561                         error = ENOBUFS;
1562                         goto spx_send_end;
1563                 }
1564                 cb->s_oobflags |= SF_SOOB;
1565         }
1566         if (controlp != NULL) {
1567                 u_short *p = mtod(controlp, u_short *);
1568                 spx_newchecks[2]++;
1569                 if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1570                         cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1571                         spx_newchecks[3]++;
1572                 }
1573                 m_freem(controlp);
1574         }
1575         controlp = NULL;
1576         error = spx_output(cb, m);
1577         m = NULL;
1578 spx_send_end:
1579         if (controlp != NULL)
1580                 m_freem(controlp);
1581         if (m != NULL)
1582                 m_freem(m);
1583         crit_exit();
1584         lwkt_replymsg(&msg->lmsg, error);
1585 }
1586
1587 static void
1588 spx_shutdown(netmsg_t msg)
1589 {
1590         struct socket *so = msg->base.nm_so;
1591         struct ipxpcb *ipxp;
1592         struct spxpcb *cb;
1593         int error;
1594
1595         error = 0;
1596         ipxp = sotoipxpcb(so);
1597         cb = ipxtospxpcb(ipxp);
1598
1599         crit_enter();
1600         socantsendmore(so);
1601         cb = spx_usrclosed(cb);
1602         if (cb != NULL)
1603                 error = spx_output(cb, NULL);
1604         crit_exit();
1605         lwkt_replymsg(&msg->lmsg, error);
1606 }
1607
1608 static void
1609 spx_sp_attach(netmsg_t msg)
1610 {
1611         struct socket *so = msg->base.nm_so;
1612         struct ipxpcb *ipxp;
1613         int error;
1614
1615         error = spx_attach_oncpu(so, msg->attach.nm_proto, msg->attach.nm_ai);
1616         if (error == 0) {
1617                 ipxp = sotoipxpcb(so);
1618                 ((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1619                                         (SF_HI | SF_HO | SF_PI);
1620         }
1621         lwkt_replymsg(&msg->lmsg, error);
1622 }
1623
1624 /*
1625  * Create template to be used to send spx packets on a connection.
1626  * Called after host entry created, fills
1627  * in a skeletal spx header (choosing connection id),
1628  * minimizing the amount of work necessary when the connection is used.
1629  */
1630 static void
1631 spx_template(struct spxpcb *cb)
1632 {
1633         struct ipxpcb *ipxp = cb->s_ipxpcb;
1634         struct ipx *ipx = cb->s_ipx;
1635         struct signalsockbuf *ssb = &(ipxp->ipxp_socket->so_snd);
1636
1637         ipx->ipx_pt = IPXPROTO_SPX;
1638         ipx->ipx_sna = ipxp->ipxp_laddr;
1639         ipx->ipx_dna = ipxp->ipxp_faddr;
1640         cb->s_sid = htons(spx_iss);
1641         spx_iss += SPX_ISSINCR/2;
1642         cb->s_alo = 1;
1643         cb->s_cwnd = (ssb_space(ssb) * CUNIT) / cb->s_mtu;
1644         cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1645                                         of large packets */
1646         cb->s_cwmx = (ssb_space(ssb) * CUNIT) / (2 * sizeof(struct spx));
1647         cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1648                 /* But allow for lots of little packets as well */
1649 }
1650
1651 /*
1652  * Close a SPIP control block:
1653  *      discard spx control block itself
1654  *      discard ipx protocol control block
1655  *      wake up any sleepers
1656  */
1657 static struct spxpcb *
1658 spx_close(struct spxpcb *cb)
1659 {
1660         struct spx_q *q;
1661         struct spx_q *oq;
1662         struct ipxpcb *ipxp = cb->s_ipxpcb;
1663         struct socket *so = ipxp->ipxp_socket;
1664         struct mbuf *m;
1665
1666         q = cb->s_q.si_next;
1667         while (q != &(cb->s_q)) {
1668                 oq = q;
1669                 q = q->si_next;
1670                 m = oq->si_mbuf;
1671                 remque(oq);
1672                 m_freem(m);
1673                 kfree(oq, M_SPX_Q);
1674         }
1675         m_free(cb->s_ipx_m);
1676         FREE(cb, M_PCB);
1677         ipxp->ipxp_pcb = 0;
1678         soisdisconnected(so);
1679         ipx_pcbdetach(ipxp);
1680         spxstat.spxs_closed++;
1681         return (NULL);
1682 }
1683
1684 /*
1685  *      Someday we may do level 3 handshaking
1686  *      to close a connection or send a xerox style error.
1687  *      For now, just close.
1688  */
1689 static struct spxpcb *
1690 spx_usrclosed(struct spxpcb *cb)
1691 {
1692         return (spx_close(cb));
1693 }
1694
1695 static struct spxpcb *
1696 spx_disconnect(struct spxpcb *cb)
1697 {
1698         return (spx_close(cb));
1699 }
1700
1701 /*
1702  * Drop connection, reporting
1703  * the specified error.
1704  */
1705 static struct spxpcb *
1706 spx_drop(struct spxpcb *cb, int errno)
1707 {
1708         struct socket *so = cb->s_ipxpcb->ipxp_socket;
1709
1710         /*
1711          * someday, in the xerox world
1712          * we will generate error protocol packets
1713          * announcing that the socket has gone away.
1714          */
1715         if (TCPS_HAVERCVDSYN(cb->s_state)) {
1716                 spxstat.spxs_drops++;
1717                 cb->s_state = TCPS_CLOSED;
1718                 /*tcp_output(cb);*/
1719         } else
1720                 spxstat.spxs_conndrops++;
1721         so->so_error = errno;
1722         return (spx_close(cb));
1723 }
1724
1725 /*
1726  * Fast timeout routine for processing delayed acks
1727  */
1728 void
1729 spx_fasttimo(void)
1730 {
1731         struct ipxpcb *ipxp;
1732         struct spxpcb *cb;
1733
1734         crit_enter();
1735         ipxp = ipxpcb.ipxp_next;
1736         if (ipxp != NULL) {
1737             for (; ipxp != &ipxpcb; ipxp = ipxp->ipxp_next) {
1738                 if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1739                     (cb->s_flags & SF_DELACK)) {
1740                         cb->s_flags &= ~SF_DELACK;
1741                         cb->s_flags |= SF_ACKNOW;
1742                         spxstat.spxs_delack++;
1743                         spx_output(cb, NULL);
1744                 }
1745             }
1746         }
1747         crit_exit();
1748 }
1749
1750 /*
1751  * spx protocol timeout routine called every 500 ms.
1752  * Updates the timers in all active pcb's and
1753  * causes finite state machine actions if timers expire.
1754  */
1755 void
1756 spx_slowtimo(void)
1757 {
1758         struct ipxpcb *ip, *ipnxt;
1759         struct spxpcb *cb;
1760         int i;
1761
1762         /*
1763          * Search through tcb's and update active timers.
1764          */
1765         crit_enter();
1766         ip = ipxpcb.ipxp_next;
1767         if (ip == NULL) {
1768                 crit_exit();
1769                 return;
1770         }
1771         while (ip != &ipxpcb) {
1772                 cb = ipxtospxpcb(ip);
1773                 ipnxt = ip->ipxp_next;
1774                 if (cb == NULL)
1775                         goto tpgone;
1776                 for (i = 0; i < SPXT_NTIMERS; i++) {
1777                         if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1778                                 spx_timers(cb, i);
1779                                 if (ipnxt->ipxp_prev != ip)
1780                                         goto tpgone;
1781                         }
1782                 }
1783                 cb->s_idle++;
1784                 if (cb->s_rtt)
1785                         cb->s_rtt++;
1786 tpgone:
1787                 ip = ipnxt;
1788         }
1789         spx_iss += SPX_ISSINCR/PR_SLOWHZ;               /* increment iss */
1790         crit_exit();
1791 }
1792
1793 /*
1794  * SPX timer processing.
1795  */
1796 static struct spxpcb *
1797 spx_timers(struct spxpcb *cb, int timer)
1798 {
1799         long rexmt;
1800         int win;
1801
1802         cb->s_force = 1 + timer;
1803         switch (timer) {
1804
1805         /*
1806          * 2 MSL timeout in shutdown went off.  TCP deletes connection
1807          * control block.
1808          */
1809         case SPXT_2MSL:
1810                 kprintf("spx: SPXT_2MSL went off for no reason\n");
1811                 cb->s_timer[timer] = 0;
1812                 break;
1813
1814         /*
1815          * Retransmission timer went off.  Message has not
1816          * been acked within retransmit interval.  Back off
1817          * to a longer retransmit interval and retransmit one packet.
1818          */
1819         case SPXT_REXMT:
1820                 if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1821                         cb->s_rxtshift = SPX_MAXRXTSHIFT;
1822                         spxstat.spxs_timeoutdrop++;
1823                         cb = spx_drop(cb, ETIMEDOUT);
1824                         break;
1825                 }
1826                 spxstat.spxs_rexmttimeo++;
1827                 rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1828                 rexmt *= spx_backoff[cb->s_rxtshift];
1829                 SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1830                 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1831                 /*
1832                  * If we have backed off fairly far, our srtt
1833                  * estimate is probably bogus.  Clobber it
1834                  * so we'll take the next rtt measurement as our srtt;
1835                  * move the current srtt into rttvar to keep the current
1836                  * retransmit times until then.
1837                  */
1838                 if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1839                         cb->s_rttvar += (cb->s_srtt >> 2);
1840                         cb->s_srtt = 0;
1841                 }
1842                 cb->s_snxt = cb->s_rack;
1843                 /*
1844                  * If timing a packet, stop the timer.
1845                  */
1846                 cb->s_rtt = 0;
1847                 /*
1848                  * See very long discussion in tcp_timer.c about congestion
1849                  * window and sstrhesh
1850                  */
1851                 win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1852                 if (win < 2)
1853                         win = 2;
1854                 cb->s_cwnd = CUNIT;
1855                 cb->s_ssthresh = win * CUNIT;
1856                 spx_output(cb, NULL);
1857                 break;
1858
1859         /*
1860          * Persistance timer into zero window.
1861          * Force a probe to be sent.
1862          */
1863         case SPXT_PERSIST:
1864                 spxstat.spxs_persisttimeo++;
1865                 spx_setpersist(cb);
1866                 spx_output(cb, NULL);
1867                 break;
1868
1869         /*
1870          * Keep-alive timer went off; send something
1871          * or drop connection if idle for too long.
1872          */
1873         case SPXT_KEEP:
1874                 spxstat.spxs_keeptimeo++;
1875                 if (cb->s_state < TCPS_ESTABLISHED)
1876                         goto dropit;
1877                 if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1878                         if (cb->s_idle >= SPXTV_MAXIDLE)
1879                                 goto dropit;
1880                         spxstat.spxs_keepprobe++;
1881                         spx_output(cb, NULL);
1882                 } else
1883                         cb->s_idle = 0;
1884                 cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1885                 break;
1886         dropit:
1887                 spxstat.spxs_keepdrops++;
1888                 cb = spx_drop(cb, ETIMEDOUT);
1889                 break;
1890         }
1891         return (cb);
1892 }