2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/kern_syslink.c,v 1.1 2006/08/06 18:56:44 dillon Exp $
37 * This module implements the syslink() system call and protocol which
38 * is used to glue clusters together as well as to interface userland
39 * devices and filesystems to the kernel.
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
50 #include <sys/thread.h>
51 #include <sys/sysctl.h>
52 #include <sys/sysproto.h>
53 #include <sys/syslink.h>
54 #include <sys/syslink_msg.h>
56 #include <sys/thread2.h>
59 * fileops interface. slbuf and sldata are also used in conjunction with a
60 * normal file descriptor.
65 int bufsize; /* must be a power of 2 */
66 int bufmask; /* (bufsize - 1) */
67 int rindex; /* tail-chasing FIFO indices */
74 struct file *xfp; /* external file pointer */
75 struct lock rlock; /* synchronizing lock */
76 struct lock wlock; /* synchronizing lock */
77 struct thread *rthread; /* xfp -> rbuf & process */
78 struct thread *wthread; /* wbuf -> xfp */
83 #define SLF_RQUIT 0x0001
84 #define SLF_WQUIT 0x0002
85 #define SLF_RDONE 0x0004
86 #define SLF_WDONE 0x0008
88 #define SYSLINK_BUFSIZE (128*1024)
90 static int syslink_read (struct file *fp, struct uio *uio,
91 struct ucred *cred, int flags);
92 static int syslink_write (struct file *fp, struct uio *uio,
93 struct ucred *cred, int flags);
94 static int syslink_close (struct file *fp);
95 static int syslink_stat (struct file *fp, struct stat *sb, struct ucred *cred);
96 static int syslink_shutdown (struct file *fp, int how);
97 static int syslink_ioctl (struct file *fp, u_long cmd, caddr_t data,
99 static int syslink_poll (struct file *fp, int events, struct ucred *cred);
100 static int syslink_kqfilter(struct file *fp, struct knote *kn);
102 static void syslink_rthread(void *arg);
103 static void syslink_wthread(void *arg);
104 static void slbuf_alloc(struct slbuf *buf, int bytes);
105 static void slbuf_free(struct slbuf *buf);
106 static void sldata_rels(struct sldata *sldata);
107 static int process_syslink_msg(struct sldata *sldata, struct syslink_msg *head);
108 static int syslink_validate(struct syslink_msg *head, int bytes);
110 static struct fileops syslinkops = {
111 .fo_read = syslink_read,
112 .fo_write = syslink_write,
113 .fo_ioctl = syslink_ioctl,
114 .fo_poll = syslink_poll,
115 .fo_kqfilter = syslink_kqfilter,
116 .fo_stat = syslink_stat,
117 .fo_close = syslink_close,
118 .fo_shutdown = syslink_shutdown
121 MALLOC_DEFINE(M_SYSLINK, "syslink", "syslink manager");
123 static int syslink_enabled;
124 SYSCTL_INT(_kern, OID_AUTO, syslink_enabled,
125 CTLFLAG_RW, &syslink_enabled, 0, "Enable SYSLINK");
128 * Kernel mask and match bits. These may be modified during early boot,
129 * before any syslink services have been established, but must remain fixed
130 * after that. Note that the match value is only used if a message leaves
131 * the machine's domain. '0' is used for unmasked match bits to indicate
132 * transport within the machine.
134 static sysid_t sl_mask = 0x00000000FFFFFFFFLL;
135 static sysid_t sl_match = 0x0000000100000000LL;
138 * Primary system call interface - associate a full-duplex stream
139 * (typically a pipe or a connected socket) with a sysid namespace,
140 * or create a direct link.
142 * syslink(int fd, int cmd, sysid_t *mask, sysid_t *match)
146 sys_syslink(struct syslink_args *uap)
150 struct sldata *sldata;
153 * System call is under construction and disabled by default
155 if (syslink_enabled == 0)
159 case SYSLINK_ESTABLISH:
160 error = suser(curthread);
163 sldata = malloc(sizeof(struct sldata), M_SYSLINK, M_WAITOK|M_ZERO);
164 lockinit(&sldata->rlock, "slread", 0, 0);
165 lockinit(&sldata->wlock, "slwrite", 0, 0);
169 * We create a direct syslink descriptor. Only the reader thread
172 error = falloc(curproc, &fp, &uap->fd);
174 fp->f_type = DTYPE_SYSLINK;
175 fp->f_flag = FREAD | FWRITE;
176 fp->f_ops = &syslinkops;
178 slbuf_alloc(&sldata->rbuf, SYSLINK_BUFSIZE);
179 slbuf_alloc(&sldata->wbuf, SYSLINK_BUFSIZE);
181 sldata->flags = SLF_WQUIT | SLF_WDONE;
182 lwkt_create(syslink_rthread, sldata,
183 &sldata->rthread, NULL,
187 sldata->xfp = holdfp(curproc->p_fd, uap->fd, -1);
188 if (sldata->xfp != NULL) {
189 slbuf_alloc(&sldata->rbuf, SYSLINK_BUFSIZE);
190 slbuf_alloc(&sldata->wbuf, SYSLINK_BUFSIZE);
192 lwkt_create(syslink_rthread, sldata,
193 &sldata->rthread, NULL,
195 lwkt_create(syslink_wthread, sldata,
196 &sldata->wthread, NULL,
203 free(sldata, M_SYSLINK);
205 case SYSLINK_GETSYSMASK:
206 error = copyout(&sl_mask, uap->mask, sizeof(sl_mask));
208 error = copyout(&sl_match, uap->match, sizeof(sl_match));
218 * This thread reads from an external descriptor into rbuf, then parses and
219 * dispatches syslink messages from rbuf.
223 syslink_rthread(void *arg)
225 struct sldata *sldata = arg;
226 struct slbuf *slbuf = &sldata->rbuf;
227 struct syslink_msg *head;
228 const int min_msg_size = offsetof(struct syslink_msg, src_sysid);
230 while ((sldata->flags & SLF_RQUIT) == 0) {
236 * Calculate contiguous space available to read and read as much
239 * If the entire buffer is used there's probably a format error
240 * of some sort and we terminate the link.
242 used = slbuf->windex - slbuf->rindex;
246 * Read some data, terminate the link if an error occurs or if EOF
247 * is encountered. xfp can be NULL, indicating that the data was
248 * injected by other means.
251 count = slbuf->bufsize - (slbuf->windex & slbuf->bufmask);
252 if (count > slbuf->bufsize - used)
253 count = slbuf->bufsize - used;
256 error = fp_read(sldata->xfp,
257 slbuf->buf + (slbuf->windex & slbuf->bufmask), count,
263 slbuf->windex += count;
266 tsleep(slbuf, 0, "fiford", 0);
270 * Process as many syslink messages as we can. The record length
271 * must be at least a minimal PAD record (8 bytes). A msgid of 0
274 while (slbuf->windex - slbuf->rindex >= min_msg_size) {
277 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
278 if (head->reclen < min_msg_size) {
282 aligned_reclen = SLMSG_ALIGN(head->reclen);
287 if ((slbuf->rindex & slbuf->bufmask) >
288 ((slbuf->rindex + aligned_reclen) & slbuf->bufmask)
295 * Insufficient data read
297 if (slbuf->windex - slbuf->rindex < aligned_reclen)
301 * Process non-pad messages. Non-pad messages have to be at
302 * least the size of the syslink_msg structure.
305 if (head->reclen < sizeof(struct syslink_msg)) {
309 error = process_syslink_msg(sldata, head);
314 slbuf->rindex += aligned_reclen;
321 * Mark us as done and deref sldata. Tell the writer to terminate as
324 sldata->flags |= SLF_RDONE;
325 if ((sldata->flags & SLF_WDONE) == 0) {
326 sldata->flags |= SLF_WQUIT;
327 wakeup(&sldata->wbuf);
333 * This thread takes outgoing syslink messages queued to wbuf and writes them
334 * to the descriptor. PAD is stripped. PAD is also added as required to
335 * conform to the outgoing descriptor's buffering requirements.
339 syslink_wthread(void *arg)
341 struct sldata *sldata = arg;
342 struct slbuf *slbuf = &sldata->wbuf;
343 struct syslink_msg *head;
346 while ((sldata->flags & SLF_WQUIT) == 0) {
353 used = slbuf->windex - slbuf->rindex;
354 if (used < offsetof(struct syslink_msg, src_sysid))
357 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
358 if (head->reclen < offsetof(struct syslink_msg, src_sysid)) {
362 aligned_reclen = SLMSG_ALIGN(head->reclen);
367 if ((slbuf->rindex & slbuf->bufmask) >
368 ((slbuf->rindex + aligned_reclen) & slbuf->bufmask)
375 * Insufficient data read
377 if (used < aligned_reclen)
381 * Write it out whether it is PAD or not. XXX re-PAD for output
384 error = fp_write(sldata->xfp, head, aligned_reclen, &count);
387 if (count != aligned_reclen) {
391 slbuf->rindex += aligned_reclen;
395 tsleep(slbuf, 0, "fifowt", 0);
397 sldata->flags |= SLF_WDONE;
403 slbuf_alloc(struct slbuf *slbuf, int bytes)
405 bzero(slbuf, sizeof(*slbuf));
406 slbuf->buf = malloc(bytes, M_SYSLINK, M_WAITOK);
407 slbuf->bufsize = bytes;
408 slbuf->bufmask = bytes - 1;
413 slbuf_free(struct slbuf *slbuf)
415 free(slbuf->buf, M_SYSLINK);
421 sldata_rels(struct sldata *sldata)
423 if (--sldata->refs == 0) {
424 slbuf_free(&sldata->rbuf);
425 slbuf_free(&sldata->wbuf);
426 free(sldata, M_SYSLINK);
431 * fileops for an established syslink when the kernel is asked to create a
432 * descriptor (verses one being handed to it). No threads are created in
437 * Transfer zero or more messages from the kernel to userland. Only complete
438 * messages are returned. If the uio has insufficient space then EMSGSIZE
439 * is returned. The kernel feeds messages to wbuf so we use wlock (structures
440 * are relative to the kernel).
444 syslink_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
446 struct sldata *sldata = fp->f_data;
447 struct slbuf *slbuf = &sldata->wbuf;
448 struct syslink_msg *head;
454 if (flags & O_FBLOCKING)
456 else if (flags & O_FNONBLOCKING)
458 else if (fp->f_flag & O_NONBLOCK)
463 lockmgr(&sldata->wlock, LK_EXCLUSIVE | LK_RETRY);
466 * Calculate the number of bytes we can transfer in one shot. Transfers
467 * do not wrap the FIFO.
469 contig = slbuf->bufsize - (slbuf->rindex & slbuf->bufmask);
471 bytes = slbuf->windex - slbuf->rindex;
478 tsleep(slbuf, 0, "fiford", 0);
484 * The uio must be able to accomodate the transfer.
486 if (uio->uio_resid < bytes) {
492 * Copy the data to userland and update rindex.
494 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
495 error = uiomove((caddr_t)head, bytes, uio);
497 slbuf->rindex += bytes;
503 lockmgr(&sldata->wlock, LK_RELEASE);
508 * Transfer zero or more messages from userland to the kernel. Only complete
509 * messages may be written. The kernel processes from rbuf so that is where
510 * we have to copy the messages.
514 syslink_write (struct file *fp, struct uio *uio, struct ucred *cred, int flags)
516 struct sldata *sldata = fp->f_data;
517 struct slbuf *slbuf = &sldata->rbuf;
518 struct syslink_msg *head;
524 if (flags & O_FBLOCKING)
526 else if (flags & O_FNONBLOCKING)
528 else if (fp->f_flag & O_NONBLOCK)
533 lockmgr(&sldata->rlock, LK_EXCLUSIVE | LK_RETRY);
536 * Calculate the maximum number of contiguous bytes that may be available.
537 * Caller is required to not wrap our FIFO.
539 contig = slbuf->bufsize - (slbuf->windex & slbuf->bufmask);
540 if (uio->uio_resid > contig) {
546 * Truncate based on actual unused space available in the FIFO. If
547 * the uio does not fit, block and loop.
550 bytes = slbuf->bufsize - (slbuf->windex - slbuf->rindex);
553 if (uio->uio_resid <= bytes)
559 tsleep(slbuf, 0, "fifowr", 0);
561 bytes = uio->uio_resid;
562 head = (void *)(slbuf->buf + (slbuf->windex & slbuf->bufmask));
563 error = uiomove((caddr_t)head, bytes, uio);
565 error = syslink_validate(head, bytes);
567 slbuf->windex += bytes;
569 lockmgr(&sldata->rlock, LK_RELEASE);
575 syslink_close (struct file *fp)
577 struct sldata *sldata;
587 syslink_stat (struct file *fp, struct stat *sb, struct ucred *cred)
594 syslink_shutdown (struct file *fp, int how)
601 syslink_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred)
608 syslink_poll (struct file *fp, int events, struct ucred *cred)
615 syslink_kqfilter(struct file *fp, struct knote *kn)
621 * Process a syslink message
625 process_syslink_msg(struct sldata *sldata, struct syslink_msg *head)
627 printf("process syslink msg %08x %04x\n", head->msgid, head->cid);
632 * Validate that the syslink message header(s) are correctly sized.
636 syslink_validate(struct syslink_msg *head, int bytes)
638 const int min_msg_size = offsetof(struct syslink_msg, src_sysid);
643 * Message size and alignment
645 if (bytes < min_msg_size)
647 if (bytes & SL_ALIGNMASK)
649 if (head->msgid && bytes < sizeof(struct syslink_msg))
653 * Buffer must contain entire record
655 aligned_reclen = SLMSG_ALIGN(head->reclen);
656 if (bytes < aligned_reclen)
658 bytes -= aligned_reclen;
659 head = (void *)((char *)head + aligned_reclen);