2 * Copyright (c) 2006-2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/kern_syslink.c,v 1.12 2007/06/17 21:31:05 dillon Exp $
37 * This module implements the core syslink() system call and provides
38 * glue for kernel syslink frontends and backends, creating a intra-host
39 * communications infrastructure and DMA transport abstraction.
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/endian.h>
46 #include <sys/malloc.h>
47 #include <sys/alist.h>
52 #include <sys/objcache.h>
53 #include <sys/queue.h>
54 #include <sys/thread.h>
56 #include <sys/sysctl.h>
57 #include <sys/sysproto.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/socketops.h>
62 #include <sys/sysref.h>
63 #include <sys/syslink.h>
64 #include <sys/syslink_msg.h>
65 #include <netinet/in.h>
67 #include <sys/thread2.h>
68 #include <sys/spinlock2.h>
70 #include "opt_syslink.h"
73 * Syslink Connection abstraction
82 struct slmsg_rb_tree reply_rb_root; /* replies to requests */
84 struct sldesc *peer; /* peer syslink, if any */
85 struct file *xfp; /* external file pointer */
86 struct slcommon *common;
88 int rwaiters; /* number of threads waiting */
89 int wblocked; /* blocked waiting for us to drain */
90 size_t cmdbytes; /* unreplied commands pending */
91 size_t repbytes; /* undrained replies pending */
92 int (*backend_wblocked)(struct sldesc *, int, sl_proto_t);
93 int (*backend_write)(struct sldesc *, struct slmsg *);
94 void (*backend_reply)(struct sldesc *,struct slmsg *,struct slmsg *);
95 void (*backend_dispose)(struct sldesc *, struct slmsg *);
98 #define SLF_RSHUTDOWN 0x0001
99 #define SLF_WSHUTDOWN 0x0002
101 static int syslink_cmd_new(struct syslink_info_new *info, int *result);
102 static struct sldesc *allocsldesc(struct slcommon *common);
103 static void setsldescfp(struct sldesc *sl, struct file *fp);
104 static void shutdownsldesc(struct sldesc *sl, int how);
105 static void shutdownsldesc2(struct sldesc *sl, int how);
106 static void sldrop(struct sldesc *sl);
107 static int syslink_validate_msg(struct syslink_msg *msg, int bytes);
108 static int syslink_validate_elm(struct syslink_elm *elm, sl_reclen_t bytes,
109 int swapit, int depth);
111 static int backend_wblocked_user(struct sldesc *sl, int nbio, sl_proto_t proto);
112 static int backend_write_user(struct sldesc *sl, struct slmsg *slmsg);
113 static void backend_reply_user(struct sldesc *sl, struct slmsg *slcmd,
114 struct slmsg *slrep);
115 static void backend_dispose_user(struct sldesc *sl, struct slmsg *slmsg);
117 static int backend_wblocked_kern(struct sldesc *sl, int nbio, sl_proto_t proto);
118 static int backend_write_kern(struct sldesc *sl, struct slmsg *slmsg);
119 static void backend_reply_kern(struct sldesc *sl, struct slmsg *slcmd,
120 struct slmsg *slrep);
121 static void backend_dispose_kern(struct sldesc *sl, struct slmsg *slmsg);
124 * Objcache memory backend
126 * All three object caches return slmsg structures but each is optimized
127 * for syslink message buffers of varying sizes. We use the slightly
128 * more complex ctor/dtor API in order to provide ready-to-go slmsg's.
131 static struct objcache *sl_objcache_big;
132 static struct objcache *sl_objcache_small;
133 static struct objcache *sl_objcache_none;
135 MALLOC_DEFINE(M_SYSLINK, "syslink", "syslink manager");
137 static boolean_t slmsg_ctor(void *data, void *private, int ocflags);
138 static void slmsg_dtor(void *data, void *private);
142 syslinkinit(void *dummy __unused)
144 size_t n = sizeof(struct slmsg);
146 sl_objcache_none = objcache_create_mbacked(M_SYSLINK, n, 0, 64,
147 slmsg_ctor, slmsg_dtor,
149 sl_objcache_small= objcache_create_mbacked(M_SYSLINK, n, 0, 64,
150 slmsg_ctor, slmsg_dtor,
152 sl_objcache_big = objcache_create_mbacked(M_SYSLINK, n, 0, 16,
153 slmsg_ctor, slmsg_dtor,
159 slmsg_ctor(void *data, void *private, int ocflags)
161 struct slmsg *slmsg = data;
163 bzero(slmsg, sizeof(*slmsg));
165 slmsg->oc = *(struct objcache **)private;
166 if (slmsg->oc == sl_objcache_none) {
168 } else if (slmsg->oc == sl_objcache_small) {
169 slmsg->maxsize = SLMSG_SMALL;
170 } else if (slmsg->oc == sl_objcache_big) {
171 slmsg->maxsize = SLMSG_BIG;
173 panic("slmsg_ctor: bad objcache?\n");
175 if (slmsg->maxsize) {
176 slmsg->msg = kmalloc(slmsg->maxsize,
177 M_SYSLINK, M_WAITOK|M_ZERO);
184 slmsg_dtor(void *data, void *private)
186 struct slmsg *slmsg = data;
188 if (slmsg->maxsize && slmsg->msg) {
189 kfree(slmsg->msg, M_SYSLINK);
195 SYSINIT(syslink, SI_BOOT2_MACHDEP, SI_ORDER_ANY, syslinkinit, NULL)
197 static int rb_slmsg_compare(struct slmsg *msg1, struct slmsg *msg2);
198 RB_GENERATE2(slmsg_rb_tree, slmsg, rbnode, rb_slmsg_compare,
199 sysid_t, msg->sm_msgid);
204 static int syslink_enabled;
205 SYSCTL_NODE(_kern, OID_AUTO, syslink, CTLFLAG_RW, 0, "Pipe operation");
206 SYSCTL_INT(_kern_syslink, OID_AUTO, enabled,
207 CTLFLAG_RW, &syslink_enabled, 0, "Enable SYSLINK");
208 static size_t syslink_bufsize = 65536;
209 SYSCTL_UINT(_kern_syslink, OID_AUTO, bufsize,
210 CTLFLAG_RW, &syslink_bufsize, 0, "Maximum buffer size");
213 * Fileops API - typically used to glue a userland frontend with a
217 static int slfileop_read(struct file *fp, struct uio *uio,
218 struct ucred *cred, int flags);
219 static int slfileop_write(struct file *fp, struct uio *uio,
220 struct ucred *cred, int flags);
221 static int slfileop_close(struct file *fp);
222 static int slfileop_stat(struct file *fp, struct stat *sb, struct ucred *cred);
223 static int slfileop_shutdown(struct file *fp, int how);
224 static int slfileop_ioctl(struct file *fp, u_long cmd, caddr_t data,
226 static int slfileop_poll(struct file *fp, int events, struct ucred *cred);
227 static int slfileop_kqfilter(struct file *fp, struct knote *kn);
229 static struct fileops syslinkops = {
230 .fo_read = slfileop_read,
231 .fo_write = slfileop_write,
232 .fo_ioctl = slfileop_ioctl,
233 .fo_poll = slfileop_poll,
234 .fo_kqfilter = slfileop_kqfilter,
235 .fo_stat = slfileop_stat,
236 .fo_close = slfileop_close,
237 .fo_shutdown = slfileop_shutdown
240 /************************************************************************
241 * PRIMARY SYSTEM CALL INTERFACE *
242 ************************************************************************
244 * syslink(int cmd, struct syslink_info *info, size_t bytes)
247 sys_syslink(struct syslink_args *uap)
249 union syslink_info_all info;
253 * System call is under construction and disabled by default.
254 * Superuser access is also required for now, but eventually
255 * will not be needed.
257 if (syslink_enabled == 0)
259 error = suser(curthread);
264 * Load and validate the info structure. Unloaded bytes are zerod
265 * out. The label field must always be 0-filled, even if not used
268 bzero(&info, sizeof(info));
269 if ((unsigned)uap->bytes <= sizeof(info)) {
271 error = copyin(uap->info, &info, uap->bytes);
279 * Process the command
282 case SYSLINK_CMD_NEW:
283 error = syslink_cmd_new(&info.cmd_new, &uap->sysmsg_result);
289 if (error == 0 && info.head.wbflag)
290 copyout(&info, uap->info, uap->bytes);
295 * Create a linked pair of descriptors, like a pipe.
299 syslink_cmd_new(struct syslink_info_new *info, int *result)
301 struct proc *p = curproc;
305 struct sldesc *slpeer;
309 error = falloc(p, &fp1, &fd1);
312 error = falloc(p, &fp2, &fd2);
314 fsetfd(p, NULL, fd1);
318 slpeer = allocsldesc(NULL);
319 slpeer->backend_wblocked = backend_wblocked_user;
320 slpeer->backend_write = backend_write_user;
321 slpeer->backend_reply = backend_reply_user;
322 slpeer->backend_dispose = backend_dispose_user;
323 sl = allocsldesc(slpeer->common);
325 sl->backend_wblocked = backend_wblocked_user;
326 sl->backend_write = backend_write_user;
327 sl->backend_reply = backend_reply_user;
328 sl->backend_dispose = backend_dispose_user;
331 setsldescfp(sl, fp1);
332 setsldescfp(slpeer, fp2);
339 info->head.wbflag = 1; /* write back */
346 /************************************************************************
347 * LOW LEVEL SLDESC SUPPORT *
348 ************************************************************************
354 allocsldesc(struct slcommon *common)
358 sl = kmalloc(sizeof(struct sldesc), M_SYSLINK, M_WAITOK|M_ZERO);
360 common = kmalloc(sizeof(*common), M_SYSLINK, M_WAITOK|M_ZERO);
361 TAILQ_INIT(&sl->inq); /* incoming requests */
362 RB_INIT(&sl->reply_rb_root); /* match incoming replies */
363 spin_init(&sl->spin);
371 setsldescfp(struct sldesc *sl, struct file *fp)
374 fp->f_type = DTYPE_SYSLINK;
375 fp->f_flag = FREAD | FWRITE;
376 fp->f_ops = &syslinkops;
381 * Red-black tree compare function
385 rb_slmsg_compare(struct slmsg *msg1, struct slmsg *msg2)
387 if (msg1->msg->sm_msgid < msg2->msg->sm_msgid)
389 if (msg1->msg->sm_msgid == msg2->msg->sm_msgid)
396 shutdownsldesc(struct sldesc *sl, int how)
401 shutdownsldesc2(sl, how);
404 * Return unread and unreplied messages
406 spin_lock_wr(&sl->spin);
407 while ((slmsg = TAILQ_FIRST(&sl->inq)) != NULL) {
408 TAILQ_REMOVE(&sl->inq, slmsg, tqnode);
409 spin_unlock_wr(&sl->spin);
410 if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
411 sl->repbytes -= slmsg->maxsize;
412 slmsg->flags &= ~SLMSGF_ONINQ;
413 sl->peer->backend_dispose(sl->peer, slmsg);
415 /* leave ONINQ set for commands, it will cleared below */
416 spin_lock_wr(&sl->spin);
418 while ((slmsg = RB_ROOT(&sl->reply_rb_root)) != NULL) {
419 RB_REMOVE(slmsg_rb_tree, &sl->reply_rb_root, slmsg);
420 sl->cmdbytes -= slmsg->maxsize;
421 spin_unlock_wr(&sl->spin);
422 slmsg->flags &= ~SLMSGF_ONINQ;
423 sl->peer->backend_reply(sl->peer, slmsg, NULL);
424 spin_lock_wr(&sl->spin);
426 spin_unlock_wr(&sl->spin);
429 * Call shutdown on the peer with the opposite flags
443 shutdownsldesc2(sl->peer, rhow);
448 shutdownsldesc2(struct sldesc *sl, int how)
450 spin_lock_wr(&sl->spin);
453 sl->flags |= SLF_RSHUTDOWN;
456 sl->flags |= SLF_WSHUTDOWN;
459 sl->flags |= SLF_RSHUTDOWN | SLF_WSHUTDOWN;
462 spin_unlock_wr(&sl->spin);
465 * Handle signaling on the user side
469 wakeup(&sl->rwaiters);
473 sl->wblocked = 0; /* race ok */
474 wakeup(&sl->wblocked);
481 sldrop(struct sldesc *sl)
483 struct sldesc *slpeer;
485 spin_lock_wr(&sl->common->spin);
486 if (--sl->common->refs == 0) {
487 spin_unlock_wr(&sl->common->spin);
488 if ((slpeer = sl->peer) != NULL) {
491 slpeer->common = NULL;
492 KKASSERT(slpeer->xfp == NULL);
493 KKASSERT(TAILQ_EMPTY(&slpeer->inq));
494 KKASSERT(RB_EMPTY(&slpeer->reply_rb_root));
495 kfree(slpeer, M_SYSLINK);
497 KKASSERT(sl->xfp == NULL);
498 KKASSERT(TAILQ_EMPTY(&sl->inq));
499 KKASSERT(RB_EMPTY(&sl->reply_rb_root));
500 kfree(sl->common, M_SYSLINK);
502 kfree(sl, M_SYSLINK);
504 spin_unlock_wr(&sl->common->spin);
508 /************************************************************************
510 ************************************************************************
512 * Implement userland fileops.
518 slfileop_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
520 struct sldesc *sl = fp->f_data; /* fp refed on call */
527 * Kinda messy. Figure out the non-blocking state
529 if (flags & O_FBLOCKING)
531 else if (flags & O_FNONBLOCKING)
533 else if (fp->f_flag & O_NONBLOCK)
541 if (uio->uio_iovcnt < 1) {
545 iov0 = &uio->uio_iov[0];
548 * Get a message, blocking if necessary.
550 spin_lock_wr(&sl->spin);
551 while ((slmsg = TAILQ_FIRST(&sl->inq)) == NULL) {
552 if (sl->flags & SLF_RSHUTDOWN) {
561 error = msleep(&sl->rwaiters, &sl->spin, PCATCH, "slrmsg", 0);
568 * We have a message. If there isn't enough space, return
569 * ENOSPC without dequeueing it.
571 if (slmsg->msgsize > iov0->iov_len) {
577 * Dequeue the message. Adjust repbytes immediately. cmdbytes
578 * are adjusted when the command is replied to, not here.
580 TAILQ_REMOVE(&sl->inq, slmsg, tqnode);
581 if (slmsg->msg->sm_proto & SM_PROTO_REPLY)
582 sl->repbytes -= slmsg->maxsize;
583 spin_unlock_wr(&sl->spin);
586 * Load the message data into the user buffer and clean up. We
587 * may have to wakeup blocked writers.
589 if ((error = uiomove((void *)slmsg->msg, slmsg->msgsize, uio)) == 0) {
593 if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
595 * Dispose of any received reply after we've copied it
596 * to userland. We don't need the slmsg any more.
598 slmsg->flags &= ~SLMSGF_ONINQ;
599 sl->peer->backend_dispose(sl->peer, slmsg);
600 if (sl->wblocked && sl->repbytes < syslink_bufsize) {
601 sl->wblocked = 0; /* MP race ok here */
602 wakeup(&sl->wblocked);
606 * Reply to a command that we failed to copy to userspace.
608 spin_lock_wr(&sl->spin);
609 RB_REMOVE(slmsg_rb_tree, &sl->reply_rb_root, slmsg);
610 sl->cmdbytes -= slmsg->maxsize;
611 spin_unlock_wr(&sl->spin);
612 slmsg->flags &= ~SLMSGF_ONINQ;
613 sl->peer->backend_reply(sl->peer, slmsg, NULL);
614 if (sl->wblocked && sl->cmdbytes < syslink_bufsize) {
615 sl->wblocked = 0; /* MP race ok here */
616 wakeup(&sl->wblocked);
620 * Leave the command in the RB tree but clear ONINQ now
621 * that we have returned it to userland so userland can
624 slmsg->flags &= ~SLMSGF_ONINQ;
628 spin_unlock_wr(&sl->spin);
634 * Userland writes syslink message
638 slfileop_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
640 struct sldesc *sl = fp->f_data;
643 struct syslink_msg sltmp;
650 * Kinda messy. Figure out the non-blocking state
652 if (flags & O_FBLOCKING)
654 else if (flags & O_FNONBLOCKING)
656 else if (fp->f_flag & O_NONBLOCK)
664 if (uio->uio_iovcnt < 1) {
668 iov0 = &uio->uio_iov[0];
669 if (iov0->iov_len > SLMSG_BIG) {
675 * Handle the buffer-full case. slpeer cmdbytes is managed
676 * by the backend function, not us so if the callback just
677 * directly implements the message and never adjusts cmdbytes,
678 * we will never sleep here.
680 if (sl->flags & SLF_WSHUTDOWN) {
686 * Only commands can block the pipe, not replies. Otherwise a
687 * deadlock is possible.
689 error = copyin(iov0->iov_base, &sltmp, sizeof(sltmp));
692 if ((proto = sltmp.sm_proto) & SM_PROTO_ENDIAN_REV)
693 proto = bswap16(proto);
694 error = sl->peer->backend_wblocked(sl->peer, nbio, proto);
699 * Allocate a slmsg and load the message. Note that the bytes
700 * returned to userland only reflects the primary syslink message
701 * and does not include any DMA buffers.
703 if (iov0->iov_len <= SLMSG_SMALL)
704 slmsg = objcache_get(sl_objcache_small, M_WAITOK);
706 slmsg = objcache_get(sl_objcache_big, M_WAITOK);
707 slmsg->msgsize = iov0->iov_len;
709 error = uiomove((void *)slmsg->msg, iov0->iov_len, uio);
712 error = syslink_validate_msg(slmsg->msg, slmsg->msgsize);
717 * Replies have to be matched up against received commands.
719 if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
720 spin_lock_wr(&sl->spin);
721 slcmd = slmsg_rb_tree_RB_LOOKUP(&sl->reply_rb_root,
722 slmsg->msg->sm_msgid);
723 if (slcmd == NULL || (slcmd->flags & SLMSGF_ONINQ)) {
725 spin_unlock_wr(&sl->spin);
728 RB_REMOVE(slmsg_rb_tree, &sl->reply_rb_root, slcmd);
729 sl->cmdbytes -= slcmd->maxsize;
730 spin_unlock_wr(&sl->spin);
731 sl->peer->backend_reply(sl->peer, slcmd, slmsg);
732 if (sl->wblocked && sl->cmdbytes < syslink_bufsize) {
733 sl->wblocked = 0; /* MP race ok here */
734 wakeup(&sl->wblocked);
738 error = sl->peer->backend_write(sl->peer, slmsg);
742 objcache_put(slmsg->oc, slmsg);
748 * Close a syslink descriptor.
750 * Disassociate the syslink from the file descriptor and disconnect from
755 slfileop_close(struct file *fp)
760 * Disassociate the file pointer. Take ownership of the ref on the
765 fp->f_ops = &badfileops;
769 * Shutdown both directions. The other side will not issue API
770 * calls to us after we've shutdown both directions.
772 shutdownsldesc(sl, SHUT_RDWR);
777 KKASSERT(sl->cmdbytes == 0);
778 KKASSERT(sl->repbytes == 0);
785 slfileop_stat (struct file *fp, struct stat *sb, struct ucred *cred)
792 slfileop_shutdown (struct file *fp, int how)
794 shutdownsldesc((struct sldesc *)fp->f_data, how);
800 slfileop_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred)
807 slfileop_poll (struct file *fp, int events, struct ucred *cred)
814 slfileop_kqfilter(struct file *fp, struct knote *kn)
819 /************************************************************************
820 * MESSAGE VALIDATION *
821 ************************************************************************
823 * Validate that the syslink message. Check that all headers and elements
824 * conform. Correct the endian if necessary.
826 * NOTE: If reverse endian needs to be corrected, SE_CMDF_UNTRANSLATED
827 * is recursively flipped on all syslink_elm's in the message. As the
828 * message traverses the mesh, multiple flips may occur. It is
829 * up to the RPC protocol layer to correct opaque data payloads and
830 * SE_CMDF_UNTRANSLATED prevents the protocol layer from misinterpreting
831 * a command or reply element which has not been endian-corrected.
835 syslink_validate_msg(struct syslink_msg *msg, int bytes)
842 * The raw message must be properly-aligned.
844 if (bytes & SL_ALIGNMASK)
849 * The message must at least contain the msgid, bytes, and
852 if (bytes < SL_MIN_PAD_SIZE)
856 * Fix the endian if it is reversed.
858 if (msg->sm_proto & SM_PROTO_ENDIAN_REV) {
859 msg->sm_msgid = bswap64(msg->sm_msgid);
860 msg->sm_sessid = bswap64(msg->sm_sessid);
861 msg->sm_bytes = bswap16(msg->sm_bytes);
862 msg->sm_proto = bswap16(msg->sm_proto);
863 msg->sm_rlabel = bswap32(msg->sm_rlabel);
864 if (msg->sm_proto & SM_PROTO_ENDIAN_REV)
872 * Validate the contents. For PADs, the entire payload is
873 * ignored and the minimum message size can be as small as
876 if (msg->sm_proto == SMPROTO_PAD) {
877 if (msg->sm_bytes < SL_MIN_PAD_SIZE ||
878 msg->sm_bytes > bytes) {
881 /* ignore the entire payload, it can be garbage */
883 if (msg->sm_bytes < SL_MIN_MSG_SIZE ||
884 msg->sm_bytes > bytes) {
887 error = syslink_validate_elm(
890 offsetof(struct syslink_msg,
892 swapit, SL_MAXDEPTH);
898 * The aligned payload size must be used to locate the
899 * next syslink_msg in the buffer.
901 aligned_reclen = SL_MSG_ALIGN(msg->sm_bytes);
902 bytes -= aligned_reclen;
903 msg = (void *)((char *)msg + aligned_reclen);
910 syslink_validate_elm(struct syslink_elm *elm, sl_reclen_t bytes,
911 int swapit, int depth)
916 * If the buffer isn't big enough to fit the header, stop now!
918 if (bytes < SL_MIN_ELM_SIZE)
921 * All syslink_elm headers are recursively endian-adjusted. Opaque
922 * data payloads are not.
925 elm->se_cmd = bswap16(elm->se_cmd) ^ SE_CMDF_UNTRANSLATED;
926 elm->se_bytes = bswap16(elm->se_bytes);
927 elm->se_aux = bswap32(elm->se_aux);
931 * Check element size requirements.
933 if (elm->se_bytes < SL_MIN_ELM_SIZE || elm->se_bytes > bytes)
937 * Recursively check structured payloads. A structured payload may
938 * contain as few as 0 recursive elements.
940 if (elm->se_cmd & SE_CMDF_STRUCTURED) {
943 bytes -= SL_MIN_ELM_SIZE;
946 if (syslink_validate_elm(elm, bytes, swapit, depth - 1))
948 aligned_reclen = SL_MSG_ALIGN(elm->se_bytes);
949 elm = (void *)((char *)elm + aligned_reclen);
950 bytes -= aligned_reclen;
956 /************************************************************************
957 * BACKEND FUNCTIONS - USER DESCRIPTOR *
958 ************************************************************************
960 * Peer backend links are primarily used when userland creates a pair
961 * of linked descriptors.
965 * Do any required blocking / nbio handling for attempts to write to
966 * a sldesc associated with a user descriptor.
970 backend_wblocked_user(struct sldesc *sl, int nbio, sl_proto_t proto)
973 int *bytesp = (proto & SM_PROTO_REPLY) ? &sl->repbytes : &sl->cmdbytes;
976 * Block until sufficient data is drained by the target. It is
977 * ok to have a MP race against cmdbytes.
979 if (*bytesp >= syslink_bufsize) {
980 spin_lock_wr(&sl->spin);
981 while (*bytesp >= syslink_bufsize) {
982 if (sl->flags & SLF_WSHUTDOWN) {
991 error = msleep(&sl->wblocked, &sl->spin,
992 PCATCH, "slwmsg", 0);
996 spin_unlock_wr(&sl->spin);
1002 * Unconditionally write a syslink message to the sldesc associated with
1003 * a user descriptor. Command messages are also placed in a red-black
1004 * tree so their DMA tag (if any) can be accessed and so they can be
1005 * linked to any reply message.
1009 backend_write_user(struct sldesc *sl, struct slmsg *slmsg)
1013 spin_lock_wr(&sl->spin);
1014 if (sl->flags & SLF_RSHUTDOWN) {
1016 * Not accepting new messages
1019 } else if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
1023 TAILQ_INSERT_TAIL(&sl->inq, slmsg, tqnode);
1024 sl->repbytes += slmsg->maxsize;
1025 slmsg->flags |= SLMSGF_ONINQ;
1027 } else if (RB_INSERT(slmsg_rb_tree, &sl->reply_rb_root, slmsg)) {
1029 * Write a command, but there was a msgid collision when
1030 * we tried to insert it into the RB tree.
1035 * Write a command, successful insertion into the RB tree.
1037 TAILQ_INSERT_TAIL(&sl->inq, slmsg, tqnode);
1038 sl->cmdbytes += slmsg->maxsize;
1039 slmsg->flags |= SLMSGF_ONINQ;
1042 spin_unlock_wr(&sl->spin);
1044 wakeup(&sl->rwaiters);
1049 * Our peer is replying a command we previously sent it back to us, along
1050 * with the reply message (if not NULL). We just queue the reply to
1051 * userland and free of the command.
1055 backend_reply_user(struct sldesc *sl, struct slmsg *slcmd, struct slmsg *slrep)
1059 objcache_put(slcmd->oc, slcmd);
1061 spin_lock_wr(&sl->spin);
1062 if ((sl->flags & SLF_RSHUTDOWN) == 0) {
1063 TAILQ_INSERT_TAIL(&sl->inq, slrep, tqnode);
1064 sl->repbytes += slrep->maxsize;
1069 spin_unlock_wr(&sl->spin);
1071 sl->peer->backend_dispose(sl->peer, slrep);
1072 else if (sl->rwaiters)
1073 wakeup(&sl->rwaiters);
1079 backend_dispose_user(struct sldesc *sl, struct slmsg *slmsg)
1081 objcache_put(slmsg->oc, slmsg);
1084 /************************************************************************
1085 * KERNEL DRIVER OR FILESYSTEM API *
1086 ************************************************************************
1091 * Create a user<->kernel link, returning the user descriptor in *fdp
1092 * and the kernel descriptor in *kslp. 0 is returned on success, and an
1093 * error code is returned on failure.
1096 syslink_ukbackend(int *fdp, struct sldesc **kslp)
1098 struct proc *p = curproc;
1108 error = falloc(p, &fp, &fd);
1111 usl = allocsldesc(NULL);
1112 usl->backend_wblocked = backend_wblocked_user;
1113 usl->backend_write = backend_write_user;
1114 usl->backend_reply = backend_reply_user;
1115 usl->backend_dispose = backend_dispose_user;
1117 ksl = allocsldesc(usl->common);
1119 ksl->backend_wblocked = backend_wblocked_kern;
1120 ksl->backend_write = backend_write_kern;
1121 ksl->backend_reply = backend_reply_kern;
1122 ksl->backend_dispose = backend_dispose_kern;
1126 setsldescfp(usl, fp);
1136 * Assign a unique message id, issue a syslink message to userland,
1137 * and wait for a reply.
1140 syslink_kdomsg(struct sldesc *ksl, struct slmsg *slmsg)
1142 struct syslink_msg *msg;
1146 * Finish initializing slmsg and post it to the red-black tree for
1147 * reply matching. If the message id is already in use we return
1148 * EEXIST, giving the originator the chance to roll a new msgid.
1151 slmsg->msgsize = msg->sm_bytes;
1152 if ((error = syslink_validate_msg(msg, msg->sm_bytes)) != 0)
1154 msg->sm_msgid = allocsysid();
1157 * Issue the request and wait for a matching reply or failure,
1158 * then remove the message from the matching tree and return.
1160 error = ksl->peer->backend_write(ksl->peer, slmsg);
1161 spin_lock_wr(&ksl->spin);
1163 while (slmsg->rep == NULL) {
1164 error = msleep(slmsg, &ksl->spin, 0, "kwtmsg", 0);
1165 /* XXX ignore error for now */
1167 if (slmsg->rep == (struct slmsg *)-1) {
1171 error = slmsg->rep->msg->sm_head.se_aux;
1174 spin_unlock_wr(&ksl->spin);
1179 syslink_kallocmsg(void)
1181 return(objcache_get(sl_objcache_small, M_WAITOK));
1185 syslink_kfreemsg(struct sldesc *ksl, struct slmsg *slmsg)
1189 if ((rep = slmsg->rep) != NULL) {
1191 ksl->peer->backend_dispose(ksl->peer, rep);
1193 objcache_put(slmsg->oc, slmsg);
1197 syslink_kshutdown(struct sldesc *ksl, int how)
1199 shutdownsldesc(ksl, how);
1203 syslink_kclose(struct sldesc *ksl)
1205 shutdownsldesc(ksl, SHUT_RDWR);
1209 /************************************************************************
1210 * BACKEND FUNCTIONS FOR KERNEL API *
1211 ************************************************************************
1213 * These are the backend functions for a sldesc associated with a kernel
1218 * Our peer wants to write a syslink message to us and is asking us to
1219 * block if our input queue is full. We don't implement command reception
1220 * so don't block right now.
1224 backend_wblocked_kern(struct sldesc *ksl, int nbio, sl_proto_t proto)
1231 * Our peer is writing a request to the kernel. At the moment we do not
1236 backend_write_kern(struct sldesc *ksl, struct slmsg *slmsg)
1242 * Our peer wants to reply to a syslink message we sent it earlier. The
1243 * original command (that we passed to our peer), and the peer's reply
1244 * is specified. If the peer has failed slrep will be NULL.
1248 backend_reply_kern(struct sldesc *ksl, struct slmsg *slcmd, struct slmsg *slrep)
1250 spin_lock_wr(&ksl->spin);
1251 if (slrep == NULL) {
1252 slcmd->rep = (struct slmsg *)-1;
1256 spin_unlock_wr(&ksl->spin);
1261 * Any reply messages we sent to our peer are returned to us for disposal.
1262 * Since we do not currently accept commands from our peer, there will not
1263 * be any replies returned to the peer to dispose of.
1267 backend_dispose_kern(struct sldesc *ksl, struct slmsg *slmsg)
1269 panic("backend_dispose_kern: kernel can't accept commands so it "
1270 "certainly did not reply to one!");