2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/kern_syslink.c,v 1.4 2006/12/23 00:35:04 swildner Exp $
37 * This module implements the syslink() system call and protocol which
38 * is used to glue clusters together as well as to interface userland
39 * devices and filesystems to the kernel.
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
50 #include <sys/thread.h>
51 #include <sys/sysctl.h>
52 #include <sys/sysproto.h>
53 #include <sys/syslink.h>
54 #include <sys/syslink_msg.h>
56 #include <sys/thread2.h>
59 * fileops interface. slbuf and sldata are also used in conjunction with a
60 * normal file descriptor.
65 int bufsize; /* must be a power of 2 */
66 int bufmask; /* (bufsize - 1) */
67 int rindex; /* tail-chasing FIFO indices */
74 struct file *xfp; /* external file pointer */
75 struct lock rlock; /* synchronizing lock */
76 struct lock wlock; /* synchronizing lock */
77 struct thread *rthread; /* xfp -> rbuf & process */
78 struct thread *wthread; /* wbuf -> xfp */
83 #define SLF_RQUIT 0x0001
84 #define SLF_WQUIT 0x0002
85 #define SLF_RDONE 0x0004
86 #define SLF_WDONE 0x0008
88 #define SYSLINK_BUFSIZE (128*1024)
90 static int syslink_read (struct file *fp, struct uio *uio,
91 struct ucred *cred, int flags);
92 static int syslink_write (struct file *fp, struct uio *uio,
93 struct ucred *cred, int flags);
94 static int syslink_close (struct file *fp);
95 static int syslink_stat (struct file *fp, struct stat *sb, struct ucred *cred);
96 static int syslink_shutdown (struct file *fp, int how);
97 static int syslink_ioctl (struct file *fp, u_long cmd, caddr_t data,
99 static int syslink_poll (struct file *fp, int events, struct ucred *cred);
100 static int syslink_kqfilter(struct file *fp, struct knote *kn);
102 static void syslink_rthread(void *arg);
103 static void syslink_wthread(void *arg);
104 static void slbuf_alloc(struct slbuf *buf, int bytes);
105 static void slbuf_free(struct slbuf *buf);
106 static void sldata_rels(struct sldata *sldata);
107 static int process_syslink_msg(struct sldata *sldata, struct syslink_msg *head);
108 static int syslink_validate(struct syslink_msg *head, int bytes);
110 static struct fileops syslinkops = {
111 .fo_read = syslink_read,
112 .fo_write = syslink_write,
113 .fo_ioctl = syslink_ioctl,
114 .fo_poll = syslink_poll,
115 .fo_kqfilter = syslink_kqfilter,
116 .fo_stat = syslink_stat,
117 .fo_close = syslink_close,
118 .fo_shutdown = syslink_shutdown
121 MALLOC_DEFINE(M_SYSLINK, "syslink", "syslink manager");
123 static int syslink_enabled;
124 SYSCTL_INT(_kern, OID_AUTO, syslink_enabled,
125 CTLFLAG_RW, &syslink_enabled, 0, "Enable SYSLINK");
128 * Kernel mask and match bits. These may be modified during early boot,
129 * before any syslink services have been established, but must remain fixed
130 * after that. Note that the match value is only used if a message leaves
131 * the machine's domain. '0' is used for unmasked match bits to indicate
132 * transport within the machine.
134 static sysid_t sl_mask = 0x00000000FFFFFFFFLL;
135 static sysid_t sl_match = 0x0000000100000000LL;
138 * Primary system call interface - associate a full-duplex stream
139 * (typically a pipe or a connected socket) with a sysid namespace,
140 * or create a direct link.
142 * syslink(int fd, int cmd, sysid_t *mask, sysid_t *match)
146 sys_syslink(struct syslink_args *uap)
150 struct sldata *sldata;
153 * System call is under construction and disabled by default
155 if (syslink_enabled == 0)
159 case SYSLINK_ESTABLISH:
160 error = suser(curthread);
163 sldata = kmalloc(sizeof(struct sldata), M_SYSLINK, M_WAITOK|M_ZERO);
164 lockinit(&sldata->rlock, "slread", 0, 0);
165 lockinit(&sldata->wlock, "slwrite", 0, 0);
169 * We create a direct syslink descriptor. Only the reader thread
172 error = falloc(curproc, &fp, &uap->fd);
174 fp->f_type = DTYPE_SYSLINK;
175 fp->f_flag = FREAD | FWRITE;
176 fp->f_ops = &syslinkops;
178 slbuf_alloc(&sldata->rbuf, SYSLINK_BUFSIZE);
179 slbuf_alloc(&sldata->wbuf, SYSLINK_BUFSIZE);
181 sldata->flags = SLF_WQUIT | SLF_WDONE;
182 lwkt_create(syslink_rthread, sldata,
183 &sldata->rthread, NULL,
185 fsetfd(curproc, fp, uap->fd);
187 uap->sysmsg_result = uap->fd;
190 sldata->xfp = holdfp(curproc->p_fd, uap->fd, -1);
191 if (sldata->xfp != NULL) {
192 slbuf_alloc(&sldata->rbuf, SYSLINK_BUFSIZE);
193 slbuf_alloc(&sldata->wbuf, SYSLINK_BUFSIZE);
195 lwkt_create(syslink_rthread, sldata,
196 &sldata->rthread, NULL,
198 lwkt_create(syslink_wthread, sldata,
199 &sldata->wthread, NULL,
206 kfree(sldata, M_SYSLINK);
208 case SYSLINK_GETSYSMASK:
209 error = copyout(&sl_mask, uap->mask, sizeof(sl_mask));
211 error = copyout(&sl_match, uap->match, sizeof(sl_match));
221 * This thread reads from an external descriptor into rbuf, then parses and
222 * dispatches syslink messages from rbuf.
226 syslink_rthread(void *arg)
228 struct sldata *sldata = arg;
229 struct slbuf *slbuf = &sldata->rbuf;
230 struct syslink_msg *head;
231 const int min_msg_size = offsetof(struct syslink_msg, src_sysid);
233 while ((sldata->flags & SLF_RQUIT) == 0) {
239 * Calculate contiguous space available to read and read as much
242 * If the entire buffer is used there's probably a format error
243 * of some sort and we terminate the link.
245 used = slbuf->windex - slbuf->rindex;
249 * Read some data, terminate the link if an error occurs or if EOF
250 * is encountered. xfp can be NULL, indicating that the data was
251 * injected by other means.
254 count = slbuf->bufsize - (slbuf->windex & slbuf->bufmask);
255 if (count > slbuf->bufsize - used)
256 count = slbuf->bufsize - used;
259 error = fp_read(sldata->xfp,
260 slbuf->buf + (slbuf->windex & slbuf->bufmask), count,
266 slbuf->windex += count;
269 tsleep(slbuf, 0, "fiford", 0);
273 * Process as many syslink messages as we can. The record length
274 * must be at least a minimal PAD record (8 bytes). A msgid of 0
277 while (slbuf->windex - slbuf->rindex >= min_msg_size) {
280 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
281 if (head->reclen < min_msg_size) {
285 aligned_reclen = SLMSG_ALIGN(head->reclen);
290 if ((slbuf->rindex & slbuf->bufmask) >
291 ((slbuf->rindex + aligned_reclen) & slbuf->bufmask)
298 * Insufficient data read
300 if (slbuf->windex - slbuf->rindex < aligned_reclen)
304 * Process non-pad messages. Non-pad messages have to be at
305 * least the size of the syslink_msg structure.
308 if (head->reclen < sizeof(struct syslink_msg)) {
312 error = process_syslink_msg(sldata, head);
317 slbuf->rindex += aligned_reclen;
324 * Mark us as done and deref sldata. Tell the writer to terminate as
327 sldata->flags |= SLF_RDONE;
328 if ((sldata->flags & SLF_WDONE) == 0) {
329 sldata->flags |= SLF_WQUIT;
330 wakeup(&sldata->wbuf);
332 wakeup(&sldata->rbuf);
333 wakeup(&sldata->wbuf);
338 * This thread takes outgoing syslink messages queued to wbuf and writes them
339 * to the descriptor. PAD is stripped. PAD is also added as required to
340 * conform to the outgoing descriptor's buffering requirements.
344 syslink_wthread(void *arg)
346 struct sldata *sldata = arg;
347 struct slbuf *slbuf = &sldata->wbuf;
348 struct syslink_msg *head;
351 while ((sldata->flags & SLF_WQUIT) == 0) {
358 used = slbuf->windex - slbuf->rindex;
359 if (used < offsetof(struct syslink_msg, src_sysid))
362 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
363 if (head->reclen < offsetof(struct syslink_msg, src_sysid)) {
367 aligned_reclen = SLMSG_ALIGN(head->reclen);
372 if ((slbuf->rindex & slbuf->bufmask) >
373 ((slbuf->rindex + aligned_reclen) & slbuf->bufmask)
380 * Insufficient data read
382 if (used < aligned_reclen)
386 * Write it out whether it is PAD or not. XXX re-PAD for output
389 error = fp_write(sldata->xfp, head, aligned_reclen, &count);
392 if (count != aligned_reclen) {
396 slbuf->rindex += aligned_reclen;
400 tsleep(slbuf, 0, "fifowt", 0);
402 sldata->flags |= SLF_WDONE;
408 slbuf_alloc(struct slbuf *slbuf, int bytes)
410 bzero(slbuf, sizeof(*slbuf));
411 slbuf->buf = kmalloc(bytes, M_SYSLINK, M_WAITOK);
412 slbuf->bufsize = bytes;
413 slbuf->bufmask = bytes - 1;
418 slbuf_free(struct slbuf *slbuf)
420 kfree(slbuf->buf, M_SYSLINK);
426 sldata_rels(struct sldata *sldata)
428 if (--sldata->refs == 0) {
429 slbuf_free(&sldata->rbuf);
430 slbuf_free(&sldata->wbuf);
431 kfree(sldata, M_SYSLINK);
436 * fileops for an established syslink when the kernel is asked to create a
437 * descriptor (verses one being handed to it). No threads are created in
442 * Transfer zero or more messages from the kernel to userland. Only complete
443 * messages are returned. If the uio has insufficient space then EMSGSIZE
444 * is returned. The kernel feeds messages to wbuf so we use wlock (structures
445 * are relative to the kernel).
449 syslink_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
451 struct sldata *sldata = fp->f_data;
452 struct slbuf *slbuf = &sldata->wbuf;
453 struct syslink_msg *head;
459 if (flags & O_FBLOCKING)
461 else if (flags & O_FNONBLOCKING)
463 else if (fp->f_flag & O_NONBLOCK)
468 lockmgr(&sldata->wlock, LK_EXCLUSIVE | LK_RETRY);
471 * Calculate the number of bytes we can transfer in one shot. Transfers
472 * do not wrap the FIFO.
474 contig = slbuf->bufsize - (slbuf->rindex & slbuf->bufmask);
476 bytes = slbuf->windex - slbuf->rindex;
479 if (sldata->flags & SLF_RDONE) {
487 tsleep(slbuf, 0, "fiford", 0);
493 * The uio must be able to accomodate the transfer.
495 if (uio->uio_resid < bytes) {
501 * Copy the data to userland and update rindex.
503 head = (void *)(slbuf->buf + (slbuf->rindex & slbuf->bufmask));
504 error = uiomove((caddr_t)head, bytes, uio);
506 slbuf->rindex += bytes;
512 lockmgr(&sldata->wlock, LK_RELEASE);
517 * Transfer zero or more messages from userland to the kernel. Only complete
518 * messages may be written. The kernel processes from rbuf so that is where
519 * we have to copy the messages.
523 syslink_write (struct file *fp, struct uio *uio, struct ucred *cred, int flags)
525 struct sldata *sldata = fp->f_data;
526 struct slbuf *slbuf = &sldata->rbuf;
527 struct syslink_msg *head;
533 if (flags & O_FBLOCKING)
535 else if (flags & O_FNONBLOCKING)
537 else if (fp->f_flag & O_NONBLOCK)
542 lockmgr(&sldata->rlock, LK_EXCLUSIVE | LK_RETRY);
545 * Calculate the maximum number of contiguous bytes that may be available.
546 * Caller is required to not wrap our FIFO.
548 contig = slbuf->bufsize - (slbuf->windex & slbuf->bufmask);
549 if (uio->uio_resid > contig) {
555 * Truncate based on actual unused space available in the FIFO. If
556 * the uio does not fit, block and loop.
559 bytes = slbuf->bufsize - (slbuf->windex - slbuf->rindex);
562 if (uio->uio_resid <= bytes)
564 if (sldata->flags & SLF_RDONE) {
572 tsleep(slbuf, 0, "fifowr", 0);
574 bytes = uio->uio_resid;
575 head = (void *)(slbuf->buf + (slbuf->windex & slbuf->bufmask));
576 error = uiomove((caddr_t)head, bytes, uio);
578 error = syslink_validate(head, bytes);
580 slbuf->windex += bytes;
584 lockmgr(&sldata->rlock, LK_RELEASE);
590 syslink_close (struct file *fp)
592 struct sldata *sldata;
595 if ((sldata->flags & SLF_RQUIT) == 0) {
596 sldata->flags |= SLF_RQUIT;
597 wakeup(&sldata->rbuf);
599 if ((sldata->flags & SLF_WQUIT) == 0) {
600 sldata->flags |= SLF_WQUIT;
601 wakeup(&sldata->wbuf);
610 syslink_stat (struct file *fp, struct stat *sb, struct ucred *cred)
617 syslink_shutdown (struct file *fp, int how)
624 syslink_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred)
631 syslink_poll (struct file *fp, int events, struct ucred *cred)
638 syslink_kqfilter(struct file *fp, struct knote *kn)
644 * Process a syslink message
648 process_syslink_msg(struct sldata *sldata, struct syslink_msg *head)
650 kprintf("process syslink msg %08x %04x\n", head->msgid, head->cid);
655 * Validate that the syslink message header(s) are correctly sized.
659 syslink_validate(struct syslink_msg *head, int bytes)
661 const int min_msg_size = offsetof(struct syslink_msg, src_sysid);
666 * Message size and alignment
668 if (bytes < min_msg_size)
670 if (bytes & SL_ALIGNMASK)
672 if (head->msgid && bytes < sizeof(struct syslink_msg))
676 * Buffer must contain entire record
678 aligned_reclen = SLMSG_ALIGN(head->reclen);
679 if (bytes < aligned_reclen)
681 bytes -= aligned_reclen;
682 head = (void *)((char *)head + aligned_reclen);