2 * Copyright (c) 2006-2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/kern_syslink.c,v 1.10 2007/04/26 02:10:59 dillon Exp $
37 * This module implements the syslink() system call and protocol which
38 * is used to glue clusters together as well as to interface userland
39 * devices and filesystems to the kernel.
41 * We implement the management node concept in this module. A management
42 * node is basically a router node with additional features that take much
43 * of the protocol burden away from connecting terminal nodes.
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/alist.h>
55 #include <sys/thread.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/socketops.h>
63 #include <sys/syslink.h>
64 #include <sys/syslink_msg.h>
65 #include <netinet/in.h>
67 #include <sys/thread2.h>
69 #include "opt_syslink.h"
72 * Red-Black trees organizing the syslink 'router' nodes and connections
78 RB_HEAD(slrouter_rb_tree, slrouter);
79 RB_HEAD(sldata_rb_tree, sldata);
80 RB_PROTOTYPE2(slrouter_rb_tree, slrouter, rbnode,
81 rb_slrouter_compare, sysid_t);
82 RB_PROTOTYPE2(sldata_rb_tree, sldata, rbnode,
83 rb_sldata_compare, int);
86 * Fifo used to buffer broadcast packets
90 int bufsize; /* must be a power of 2 */
91 int bufmask; /* (bufsize - 1) */
92 int rindex; /* tail-chasing FIFO indices */
97 * Syslink Router abstraction
100 RB_ENTRY(slrouter) rbnode; /* list of routers */
101 struct sldata_rb_tree sldata_rb_root; /* connections to router */
102 sysid_t sysid; /* logical sysid of router */
103 int flags; /* flags passed on create */
104 int bits; /* accomodate connections */
105 int count; /* number of connections */
108 struct slbuf bbuf; /* broadcast buffer */
109 char label[SYSLINK_LABEL_SIZE];
113 * Syslink Connection abstraction
116 RB_ENTRY(sldata) rbnode;
117 struct slrouter *router; /* organizing router */
118 struct file *xfp; /* external file pointer */
119 struct lock rlock; /* synchronizing lock */
120 struct lock wlock; /* synchronizing lock */
121 struct thread *rthread; /* helper thread */
122 struct thread *wthread; /* helper thread */
123 struct sockbuf sior; /* accumulate incoming mbufs */
124 struct sockbuf siow; /* accumulate outgoing mbufs */
125 struct sockaddr sa; /* used w/SLIF_SUBNET mode */
126 int bindex; /* broadcast index */
127 int flags; /* connection flags */
131 char label[SYSLINK_LABEL_SIZE];
134 #define SYSLINK_BBUFSIZE (32*1024)
135 #define SYSLINK_SIOBUFSIZE (128*1024)
137 static int rb_slrouter_compare(struct slrouter *r1, struct slrouter *r2);
138 static int rb_sldata_compare(struct sldata *d1, struct sldata *d2);
140 static int syslink_destroy(struct slrouter *slrouter);
141 static int syslink_add(struct slrouter *slrouter,
142 struct syslink_info *info, int *result);
143 static int syslink_rem(struct slrouter *slrouter, struct sldata *sldata,
144 struct syslink_info *info);
146 static int syslink_read(struct file *fp, struct uio *uio,
147 struct ucred *cred, int flags);
148 static int syslink_write(struct file *fp, struct uio *uio,
149 struct ucred *cred, int flags);
150 static int syslink_close(struct file *fp);
151 static int syslink_stat(struct file *fp, struct stat *sb, struct ucred *cred);
152 static int syslink_shutdown(struct file *fp, int how);
153 static int syslink_ioctl(struct file *fp, u_long cmd, caddr_t data,
155 static int syslink_poll(struct file *fp, int events, struct ucred *cred);
156 static int syslink_kqfilter(struct file *fp, struct knote *kn);
158 static void syslink_rthread_so(void *arg);
159 static void syslink_rthread_fp(void *arg);
160 static void syslink_wthread_so(void *arg);
161 static void syslink_wthread_fp(void *arg);
162 static int syslink_getsubnet(struct sockaddr *sa);
163 static struct mbuf *syslink_parse_stream(struct sockbuf *sio);
164 static void syslink_route(struct slrouter *slrouter, int linkid, struct mbuf *m);
165 static void slbuf_alloc(struct slbuf *buf, int bytes);
166 static void slbuf_free(struct slbuf *buf);
167 static void sldata_rels(struct sldata *sldata);
168 static void slrouter_rels(struct slrouter *slrouter);
169 static int process_syslink_msg(struct sldata *sldata, struct syslink_msg *head);
170 static int syslink_validate(struct syslink_msg *head, int bytes);
172 RB_GENERATE2(slrouter_rb_tree, slrouter, rbnode,
173 rb_slrouter_compare, sysid_t, sysid);
174 RB_GENERATE2(sldata_rb_tree, sldata, rbnode,
175 rb_sldata_compare, int, linkid);
177 static struct fileops syslinkops = {
178 .fo_read = syslink_read,
179 .fo_write = syslink_write,
180 .fo_ioctl = syslink_ioctl,
181 .fo_poll = syslink_poll,
182 .fo_kqfilter = syslink_kqfilter,
183 .fo_stat = syslink_stat,
184 .fo_close = syslink_close,
185 .fo_shutdown = syslink_shutdown
188 MALLOC_DEFINE(M_SYSLINK, "syslink", "syslink manager");
190 static int syslink_enabled;
191 SYSCTL_INT(_kern, OID_AUTO, syslink_enabled,
192 CTLFLAG_RW, &syslink_enabled, 0, "Enable SYSLINK");
195 * Support declarations and compare function for our RB trees
197 static struct slrouter_rb_tree slrouter_rb_root;
200 rb_slrouter_compare(struct slrouter *r1, struct slrouter *r2)
202 if (r1->sysid < r2->sysid)
204 if (r1->sysid > r2->sysid)
210 rb_sldata_compare(struct sldata *d1, struct sldata *d2)
212 if (d1->linkid < d2->linkid)
214 if (d1->linkid > d2->linkid)
220 * Compare and callback functions for first-sysid and first-linkid searches.
223 syslink_cmd_locate_cmp(struct slrouter *slrouter, void *data)
225 struct syslink_info *info = data;
227 if (slrouter->sysid < info->sysid)
229 if (slrouter->sysid > info->sysid)
235 syslink_cmd_locate_callback(struct slrouter *slrouter, void *data)
237 struct syslink_info *info = data;
239 info->flags = slrouter->flags; /* also clears SLIF_ERROR */
240 bcopy(slrouter->label, info->label, SYSLINK_LABEL_SIZE);
246 syslink_cmd_find_cmp(struct sldata *sldata, void *data)
248 struct syslink_info *info = data;
250 if (sldata->linkid < info->linkid)
252 if (sldata->linkid > info->linkid)
258 syslink_cmd_find_callback(struct sldata *sldata, void *data)
260 struct syslink_info *info = data;
262 info->linkid = sldata->linkid;
263 info->flags = sldata->flags; /* also clears SLIF_ERROR */
264 bcopy(sldata->label, info->label, SYSLINK_LABEL_SIZE);
270 * Primary system call interface - associate a full-duplex stream
271 * (typically a pipe or a connected socket) with a sysid namespace,
272 * or create a direct link.
274 * syslink(int cmd, struct syslink_info *info, size_t bytes)
277 sys_syslink(struct syslink_args *uap)
279 struct syslink_info info;
280 struct slrouter *slrouter = NULL;
281 struct sldata *sldata = NULL;
286 * System call is under construction and disabled by default.
287 * Superuser access is also required.
289 if (syslink_enabled == 0)
291 error = suser(curthread);
296 * Load and validate the info structure. Unloaded bytes are zerod
297 * out. The label field must always be 0-filled, even if not used
300 bzero(&info, sizeof(info));
301 if ((unsigned)uap->bytes <= sizeof(info)) {
303 error = copyin(uap->info, &info, uap->bytes);
310 if (info.label[sizeof(info.label)-1] != 0)
317 case SYSLINK_CMD_CREATE:
319 * Create a new syslink router node. Set refs to prevent the
320 * router node from being destroyed. One ref is our temporary
321 * reference while the other is the SLIF_DESTROYED-interlocked
324 if (info.bits < 2 || info.bits > SYSLINK_ROUTER_MAXBITS)
326 slrouter = kmalloc(sizeof(struct slrouter), M_SYSLINK,
328 if (slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root, info.sysid)) {
329 kfree(slrouter, M_SYSLINK);
333 slrouter->sysid = info.sysid;
335 slrouter->bits = info.bits;
336 slrouter->flags = info.flags & SLIF_USERFLAGS;
337 slrouter->bitmap = alist_create(1 << info.bits, M_SYSLINK);
338 slbuf_alloc(&slrouter->bbuf, SYSLINK_BBUFSIZE);
339 RB_INIT(&slrouter->sldata_rb_root);
340 RB_INSERT(slrouter_rb_tree, &slrouter_rb_root, slrouter);
342 case SYSLINK_CMD_DESTROY:
344 * Destroy a syslink router node. The physical node is
345 * not freed until our temporary reference is removed.
347 slrouter = slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root,
351 if ((slrouter->flags & SLIF_DESTROYED) == 0) {
352 slrouter->flags |= SLIF_DESTROYED;
353 /* SLIF_DESTROYED interlock */
354 slrouter_rels(slrouter);
355 error = syslink_destroy(slrouter);
356 /* still holding our private interlock */
360 case SYSLINK_CMD_LOCATE:
362 * Locate the first syslink router node >= info.sysid
364 info.flags |= SLIF_ERROR;
365 n = slrouter_rb_tree_RB_SCAN(
367 syslink_cmd_locate_cmp, syslink_cmd_locate_callback,
369 if (info.flags & SLIF_ERROR)
372 case SYSLINK_CMD_ADD:
373 slrouter = slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root, info.sysid);
375 (info.bits < 2 || info.bits > SYSLINK_ROUTER_MAXBITS)) {
377 } else if (slrouter && (slrouter->flags & SLIF_DESTROYED)) {
379 * Someone is trying to destroy this route node,
380 * no new adds please!
383 } else if (slrouter) {
385 error = syslink_add(slrouter, &info,
386 &uap->sysmsg_result);
391 case SYSLINK_CMD_REM:
392 slrouter = slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root,
396 sldata = sldata_rb_tree_RB_LOOKUP(&slrouter->sldata_rb_root, info.linkid);
399 error = syslink_rem(slrouter, sldata, &info);
407 case SYSLINK_CMD_FIND:
408 slrouter = slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root, info.sysid);
409 info.flags |= SLIF_ERROR;
412 n = sldata_rb_tree_RB_SCAN(
413 &slrouter->sldata_rb_root,
414 syslink_cmd_find_cmp, syslink_cmd_find_callback,
416 if (info.flags & SLIF_ERROR)
433 slrouter_rels(slrouter);
439 syslink_destroy_callback(struct sldata *sldata, void *data __unused)
442 if ((sldata->flags & SLIF_RQUIT) == 0) {
443 sldata->flags |= SLIF_RQUIT;
444 wakeup(&sldata->rthread);
446 if ((sldata->flags & SLIF_WQUIT) == 0) {
447 sldata->flags |= SLIF_WQUIT;
448 wakeup(&sldata->wthread);
455 * Shutdown all the connections going into this syslink.
457 * Try to wait for completion, but return after 1 second
462 syslink_destroy(struct slrouter *slrouter)
466 while (!RB_EMPTY(&slrouter->sldata_rb_root) && retries) {
467 RB_SCAN(sldata_rb_tree, &slrouter->sldata_rb_root, NULL,
468 syslink_destroy_callback, slrouter);
470 tsleep(&retries, 0, "syslnk", hz / 10);
472 if (RB_EMPTY(&slrouter->sldata_rb_root))
480 syslink_add(struct slrouter *slrouter, struct syslink_info *info,
483 struct sldata *sldata;
491 maxphys = 1 << slrouter->bits;
492 numphys = info->bits ? (1 << info->bits) : 1;
495 * Create a connection to the route node and allocate a physical ID.
496 * Physical ID 0 is reserved for the route node itself, and an all-1's
497 * ID is reserved as a broadcast address.
499 sldata = kmalloc(sizeof(struct sldata), M_SYSLINK, M_WAITOK|M_ZERO);
501 linkid = alist_alloc(slrouter->bitmap, numphys);
502 if (linkid == ALIST_BLOCK_NONE) {
503 kfree(sldata, M_SYSLINK);
508 * Insert the node, initializing enough fields to prevent things from
509 * being ripped out from under us before we have a chance to complete
512 sldata->linkid = linkid;
515 if (sldata_rb_tree_RB_LOOKUP(&slrouter->sldata_rb_root, linkid))
516 panic("syslink_add: free linkid wasn't free!");
517 RB_INSERT(sldata_rb_tree, &slrouter->sldata_rb_root, sldata);
520 * Complete initialization of the physical route node. Setting
521 * sldata->router activates the node.
523 sbinit(&sldata->sior, SYSLINK_SIOBUFSIZE);
524 sbinit(&sldata->siow, SYSLINK_SIOBUFSIZE);
525 sldata->bindex = slrouter->bbuf.windex;
526 sldata->flags = info->flags & SLIF_USERFLAGS;
527 lockinit(&sldata->rlock, "slread", 0, 0);
528 lockinit(&sldata->wlock, "slwrite", 0, 0);
529 bcopy(&info->u.sa, &sldata->sa, sizeof(sldata->sa));
533 * We create a direct syslink descriptor. No helper threads
536 error = falloc(curproc, &fp, &info->fd);
538 fp->f_type = DTYPE_SYSLINK;
539 fp->f_flag = FREAD | FWRITE;
540 fp->f_ops = &syslinkops;
542 /* one ref: the fp descriptor */
544 sldata->flags |= SLIF_WQUIT | SLIF_WDONE;
545 sldata->flags |= SLIF_RQUIT | SLIF_RDONE;
546 fsetfd(curproc, fp, info->fd);
551 sldata->xfp = holdfp(curproc->p_fd, info->fd, -1);
552 if (sldata->xfp != NULL) {
553 /* two refs: reader thread and writer thread */
555 if (sldata->xfp->f_type == DTYPE_SOCKET) {
556 lwkt_create(syslink_rthread_so, sldata,
557 &sldata->rthread, NULL,
559 lwkt_create(syslink_wthread_so, sldata,
560 &sldata->wthread, NULL,
563 lwkt_create(syslink_rthread_fp, sldata,
564 &sldata->rthread, NULL,
566 lwkt_create(syslink_wthread_fp, sldata,
567 &sldata->wthread, NULL,
574 sldata->router = slrouter;
581 syslink_rem(struct slrouter *slrouter, struct sldata *sldata,
582 struct syslink_info *info)
584 int error = EINPROGRESS;
586 if ((sldata->flags & SLIF_RQUIT) == 0) {
587 sldata->flags |= SLIF_RQUIT;
588 wakeup(&sldata->rthread);
591 if ((sldata->flags & SLIF_WQUIT) == 0) {
592 sldata->flags |= SLIF_WQUIT;
593 wakeup(&sldata->wthread);
600 * Read syslink messages from an external socket and route them.
604 syslink_rthread_so(void *arg)
606 struct sldata *sldata = arg;
615 so = (void *)sldata->xfp->f_data;
619 * Calculate whether we need to get the peer address or not.
620 * We need to obtain the peer address for packet-mode sockets
621 * representing subnets (rather then single connections).
623 needsa = (sldata->bits && (sldata->flags & SLIF_PACKET));
625 while ((sldata->flags & SLIF_RQUIT) == 0) {
627 * Read some data. This is easy if the data is packetized,
628 * otherwise we can still obtain an mbuf chain but we have
629 * to parse out the syslink messages.
632 error = so_pru_soreceive(so,
633 (needsa ? &sa : NULL),
638 * The target is responsible for adjusting the src address
639 * field in the syslink_msg. We may need subnet information
640 * from the sockaddr to accomplish this.
642 * For streams representing subnets the originator is
643 * responsible for tagging its subnet bits in the src
644 * address but we have to renormalize
646 linkid = sldata->linkid;
647 if (sldata->flags & SLIF_PACKET) {
649 linkid += syslink_getsubnet(sa) &
650 ((1 << sldata->bits) - 1);
652 if ((m = sldata->sior.sb_mb) != NULL) {
653 sbinit(&sldata->sior, SYSLINK_SIOBUFSIZE);
654 syslink_route(sldata->router, linkid, m);
657 while ((m = syslink_parse_stream(&sldata->sior)) != NULL) {
658 syslink_route(sldata->router, linkid, m);
667 if ((sldata->flags & SLIF_SUBNET) && sldata->bits && sa) {
668 linkid += syslink_getsubnet(sa) &
669 ((1 << sldata->bits) - 1);
676 * Note: Incoming syslink messages must have their headers
677 * adjusted to reflect the origination address. This will
678 * be handled by syslink_route.
680 if (sldata->flags & SLIF_PACKET) {
682 * Packetized data can just be directly routed.
684 if ((m = sldata->sior.sb_mb) != NULL) {
685 sbinit(&sldata->sior, SYSLINK_SIOBUFSIZE);
686 syslink_route(sldata->router, linkid, m);
690 * Stream data has to be parsed out.
692 while ((m = syslink_parse_stream(&sldata->sior)) != NULL) {
693 syslink_route(sldata->router, linkid, m);
699 * Mark us as done and deref sldata. Tell the writer to terminate as
702 sldata->flags |= SLIF_RDONE;
703 sbflush(&sldata->sior);
704 sbflush(&sldata->siow);
705 if ((sldata->flags & SLIF_WDONE) == 0) {
706 sldata->flags |= SLIF_WQUIT;
707 wakeup(&sldata->wthread);
709 wakeup(&sldata->rthread);
710 wakeup(&sldata->wthread);
715 * Read syslink messages from an external descriptor and route them. Used
716 * when no socket interface is available.
720 syslink_rthread_fp(void *arg)
722 struct sldata *sldata = arg;
726 * Loop until told otherwise
728 while ((sldata->flags & SLIF_RQUIT) == 0) {
729 error = fp_read(slink->xfp,
731 (slbuf->windex & slbuf->bufmask
733 count, &count, 0, UIO_SYSSPACE);
738 * Mark us as done and deref sldata. Tell the writer to terminate as
741 sldata->flags |= SLIF_RDONE;
742 sbflush(&sldata->sior);
743 sbflush(&sldata->siow);
744 if ((sldata->flags & SLIF_WDONE) == 0) {
745 sldata->flags |= SLIF_WQUIT;
746 wakeup(&sldata->wthread);
748 wakeup(&sldata->rthread);
749 wakeup(&sldata->wthread);
755 syslink_parse_stream(struct sockbuf *sio)
762 syslink_route(struct slrouter *slrouter, int linkid, struct mbuf *m)
775 * Calculate contiguous space available to read and read as
778 * If the entire buffer is used there's probably a format
779 * error of some sort and we terminate the link.
781 used = slbuf->windex - slbuf->rindex;
785 * Read some data, terminate the link if an error occurs or
786 * if EOF is encountered. xfp can be NULL, indicating that
787 * the data was injected by other means.
790 count = slbuf->bufsize -
791 (slbuf->windex & slbuf->bufmask);
792 if (count > slbuf->bufsize - used)
793 count = slbuf->bufsize - used;
796 error = fp_read(sldata->xfp,
798 (slbuf->windex & slbuf->bufmask),
799 count, &count, 0, UIO_SYSSPACE);
804 slbuf->windex += count;
807 tsleep(slbuf, 0, "fiford", 0);
811 * Process as many syslink messages as we can. The record
812 * length must be at least a minimal PAD record (8 bytes).
814 while (slbuf->windex - slbuf->rindex >= min_msg_size) {
817 head = (void *)(slbuf->buf +
818 (slbuf->rindex & slbuf->bufmask));
819 if (head->sm_bytes < min_msg_size) {
823 aligned_reclen = SLMSG_ALIGN(head->sm_bytes);
828 if ((slbuf->rindex & slbuf->bufmask) >
829 ((slbuf->rindex + aligned_reclen) & slbuf->bufmask)
836 * Insufficient data read
838 if (slbuf->windex - slbuf->rindex < aligned_reclen)
842 * Process non-pad messages. Non-pad messages have
843 * to be at least the size of the syslink_msg
846 * A PAD message's sm_cmd field contains 0.
849 if (head->sm_bytes < sizeof(*head)) {
853 error = process_syslink_msg(sldata, head);
858 slbuf->rindex += aligned_reclen;
869 * This thread takes outgoing syslink messages queued to wbuf and writes them
870 * to the descriptor. PAD is stripped. PAD is also added as required to
871 * conform to the outgoing descriptor's buffering requirements.
875 syslink_wthread_so(void *arg)
877 struct sldata *sldata = arg;
878 struct slrouter *slrouter;
879 struct syslink_msg *head;
889 so = (void *)sldata->xfp->f_data;
890 slrouter = sldata->router;
892 while ((sldata->flags & SLIF_WQUIT) == 0) {
894 * Deal with any broadcast data sitting in the route node's
895 * broadcast buffer. If we have fallen too far behind the
896 * data may no longer be valid.
898 * avail -- available data in broadcast buffer and
899 * bytes -- available contiguous data in broadcast buffer
901 if (slrouter->bbuf.rindex - sldata->bindex > 0)
902 sldata->bindex = slrouter->bbuf.rindex;
903 if ((avail = slrouter->bbuf.windex - sldata->bindex) > 0) {
904 bytes = slrouter->bbuf.bufsize -
905 (sldata->bindex & slrouter->bbuf.bufmask);
908 head = (void *)(slrouter->bbuf.buf +
909 (sldata->bindex & slrouter->bbuf.bufmask));
911 * Break into packets if necessary, else just write
912 * it all in one fell swoop.
914 aiov.iov_base = (void *)head;
915 aiov.iov_len = bytes;
916 auio.uio_iov = &aiov;
919 auio.uio_resid = bytes;
920 auio.uio_segflg = UIO_SYSSPACE;
921 auio.uio_rw = UIO_WRITE;
922 auio.uio_td = curthread;
923 if (sldata->flags & SLIF_PACKET) {
924 if (head->sm_bytes < SL_MIN_MESSAGE_SIZE) {
925 kprintf("syslink_msg too small, terminating\n");
928 if (head->sm_bytes > bytes) {
929 kprintf("syslink_msg not FIFO aligned, terminating\n");
932 bytes = SLMSG_ALIGN(head->sm_bytes);
933 so_pru_sosend(so, sa, &auio, NULL, NULL, 0, curthread);
935 so_pru_sosend(so, sa, &auio, NULL, NULL, 0, curthread);
941 * Deal with mbuf records waiting to be output
943 if (sldata->siow.sb_mb != NULL) {
948 * Block waiting for something to do.
950 tsleep(&sldata->wthread, 0, "wait", 0);
960 used = slbuf->windex - slbuf->rindex;
961 if (used < SL_MIN_MESSAGE_SIZE)
964 head = (void *)(slbuf->buf +
965 (slbuf->rindex & slbuf->bufmask));
966 if (head->sm_bytes < SL_MIN_MESSAGE_SIZE) {
970 aligned_reclen = SLMSG_ALIGN(head->sm_bytes);
975 if ((slbuf->rindex & slbuf->bufmask) >
976 ((slbuf->rindex + aligned_reclen) & slbuf->bufmask)
983 * Insufficient data read
985 if (used < aligned_reclen)
989 * Write it out whether it is PAD or not.
990 * XXX re-PAD for output here.
992 error = fp_write(sldata->xfp, head,
996 if (error && error != ENOBUFS)
998 if (count != aligned_reclen) {
1002 slbuf->rindex += aligned_reclen;
1006 tsleep(slbuf, 0, "fifowt", 0);
1009 sldata->flags |= SLIF_WDONE;
1010 sldata_rels(sldata);
1015 syslink_wthread_fp(void *arg)
1017 struct sldata *sldata = arg;
1019 sldata->flags |= SLIF_WDONE;
1020 sldata_rels(sldata);
1025 slbuf_alloc(struct slbuf *slbuf, int bytes)
1027 bzero(slbuf, sizeof(*slbuf));
1028 slbuf->buf = kmalloc(bytes, M_SYSLINK, M_WAITOK);
1029 slbuf->bufsize = bytes;
1030 slbuf->bufmask = bytes - 1;
1035 slbuf_free(struct slbuf *slbuf)
1037 kfree(slbuf->buf, M_SYSLINK);
1043 sldata_rels(struct sldata *sldata)
1045 struct slrouter *slrouter;
1047 if (--sldata->refs == 0) {
1048 slrouter = sldata->router;
1049 KKASSERT(slrouter != NULL);
1051 RB_REMOVE(sldata_rb_tree,
1052 &sldata->router->sldata_rb_root, sldata);
1053 sldata->router = NULL;
1054 kfree(sldata, M_SYSLINK);
1055 slrouter_rels(slrouter);
1061 slrouter_rels(struct slrouter *slrouter)
1063 if (--slrouter->refs == 0 && RB_EMPTY(&slrouter->sldata_rb_root)) {
1064 KKASSERT(slrouter->flags & SLIF_DESTROYED);
1065 RB_REMOVE(slrouter_rb_tree, &slrouter_rb_root, slrouter);
1066 alist_destroy(slrouter->bitmap, M_SYSLINK);
1067 slrouter->bitmap = NULL;
1068 slbuf_free(&slrouter->bbuf);
1069 kfree(slrouter, M_SYSLINK);
1074 * A switched ethernet socket connected to a syslink router node may
1075 * represent an entire subnet. We need to generate a subnet id from
1076 * the originating IP address which the caller can then incorporate into
1077 * the base linkid assigned to the connection to form the actual linkid
1078 * originating the message.
1082 syslink_getsubnet(struct sockaddr *sa)
1085 struct in6_addr *i6;
1088 switch(sa->sa_family) {
1090 i4 = &((struct sockaddr_in *)sa)->sin_addr;
1091 linkid = (int)ntohl(i4->s_addr);
1094 i6 = &((struct sockaddr_in6 *)sa)->sin6_addr;
1095 linkid = (int)ntohl(i6->s6_addr32[0]); /* XXX */
1105 * fileops for an established syslink when the kernel is asked to create a
1106 * descriptor (verses one being handed to it). No threads are created in
1111 * Transfer zero or more messages from the kernel to userland. Only complete
1112 * messages are returned. If the uio has insufficient space then EMSGSIZE
1113 * is returned. The kernel feeds messages to wbuf so we use wlock (structures
1114 * are relative to the kernel).
1118 syslink_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
1120 struct sldata *sldata = fp->f_data;
1122 struct syslink_msg *head;
1129 if (flags & O_FBLOCKING)
1131 else if (flags & O_FNONBLOCKING)
1133 else if (fp->f_flag & O_NONBLOCK)
1138 lockmgr(&sldata->wlock, LK_EXCLUSIVE | LK_RETRY);
1143 * Calculate the number of bytes we can transfer in one shot. Transfers
1144 * do not wrap the FIFO.
1146 contig = slbuf->bufsize - (slbuf->rindex & slbuf->bufmask);
1148 bytes = slbuf->windex - slbuf->rindex;
1151 if (sldata->flags & SLIF_RDONE) {
1159 tsleep(slbuf, 0, "fiford", 0);
1165 * The uio must be able to accomodate the transfer.
1167 if (uio->uio_resid < bytes) {
1173 * Copy the data to userland and update rindex.
1175 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
1176 error = uiomove((caddr_t)head, bytes, uio);
1178 slbuf->rindex += bytes;
1185 lockmgr(&sldata->wlock, LK_RELEASE);
1190 * Transfer zero or more messages from userland to the kernel. Only complete
1191 * messages may be written. The kernel processes from rbuf so that is where
1192 * we have to copy the messages.
1196 syslink_write (struct file *fp, struct uio *uio, struct ucred *cred, int flags)
1198 struct sldata *sldata = fp->f_data;
1200 struct slbuf *slbuf = &sldata->rbuf;
1201 struct syslink_msg *head;
1208 if (flags & O_FBLOCKING)
1210 else if (flags & O_FNONBLOCKING)
1212 else if (fp->f_flag & O_NONBLOCK)
1217 lockmgr(&sldata->rlock, LK_EXCLUSIVE | LK_RETRY);
1222 * Calculate the maximum number of contiguous bytes that may be
1223 * available. Caller is required to not wrap our FIFO.
1225 contig = slbuf->bufsize - (slbuf->windex & slbuf->bufmask);
1226 if (uio->uio_resid > contig) {
1232 * Truncate based on actual unused space available in the FIFO. If
1233 * the uio does not fit, block and loop.
1236 bytes = slbuf->bufsize - (slbuf->windex - slbuf->rindex);
1239 if (uio->uio_resid <= bytes)
1241 if (sldata->flags & SLIF_RDONE) {
1249 tsleep(slbuf, 0, "fifowr", 0);
1251 bytes = uio->uio_resid;
1252 head = (void *)(slbuf->buf + (slbuf->windex & slbuf->bufmask));
1253 error = uiomove((caddr_t)head, bytes, uio);
1255 error = syslink_validate(head, bytes);
1257 slbuf->windex += bytes;
1262 lockmgr(&sldata->rlock, LK_RELEASE);
1268 syslink_close (struct file *fp)
1270 struct sldata *sldata;
1272 sldata = fp->f_data;
1273 if ((sldata->flags & SLIF_RQUIT) == 0) {
1274 sldata->flags |= SLIF_RQUIT;
1275 wakeup(&sldata->rthread);
1277 if ((sldata->flags & SLIF_WQUIT) == 0) {
1278 sldata->flags |= SLIF_WQUIT;
1279 wakeup(&sldata->wthread);
1282 sldata_rels(sldata);
1288 syslink_stat (struct file *fp, struct stat *sb, struct ucred *cred)
1295 syslink_shutdown (struct file *fp, int how)
1302 syslink_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred)
1309 syslink_poll (struct file *fp, int events, struct ucred *cred)
1316 syslink_kqfilter(struct file *fp, struct knote *kn)
1322 * This routine is called from a route node's reader thread to process a
1323 * syslink message once it has been completely read and its size validated.
1327 process_syslink_msg(struct sldata *sldata, struct syslink_msg *head)
1329 kprintf("process syslink msg %08x\n", head->sm_cmd);
1334 * Validate that the syslink message header(s) are correctly sized.
1338 syslink_validate(struct syslink_msg *head, int bytes)
1340 const int min_msg_size = SL_MIN_MESSAGE_SIZE;
1345 * Message size and alignment
1347 if (bytes < min_msg_size)
1349 if (bytes & SL_ALIGNMASK)
1351 if (head->sm_cmd && bytes < sizeof(struct syslink_msg))
1355 * Buffer must contain entire record
1357 aligned_reclen = SLMSG_ALIGN(head->sm_bytes);
1358 if (bytes < aligned_reclen)
1360 bytes -= aligned_reclen;
1361 head = (void *)((char *)head + aligned_reclen);