kernel: Start removing syslink.
[dragonfly.git] / sys / kern / kern_syslink.c
CommitLineData
7c09365b
SZ
1/*
2 * Copyright (c) 2006-2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34/*
35 * This module implements the core syslink() system call and provides
36 * glue for kernel syslink frontends and backends, creating a intra-host
37 * communications infrastructure and DMA transport abstraction.
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/endian.h>
44#include <sys/malloc.h>
45#include <sys/alist.h>
46#include <sys/file.h>
47#include <sys/proc.h>
48#include <sys/priv.h>
49#include <sys/lock.h>
50#include <sys/uio.h>
51#include <sys/objcache.h>
52#include <sys/queue.h>
53#include <sys/thread.h>
54#include <sys/tree.h>
55#include <sys/sysctl.h>
56#include <sys/sysproto.h>
57#include <sys/mbuf.h>
58#include <sys/socket.h>
59#include <sys/socketvar.h>
60#include <sys/socketops.h>
61#include <sys/sysref.h>
62#include <sys/syslink.h>
63#include <sys/syslink_msg.h>
64#include <netinet/in.h>
65
66#include <sys/thread2.h>
67#include <sys/spinlock2.h>
68#include <sys/buf2.h>
69#include <sys/mplock2.h>
70
7c09365b
SZ
71/*
72 * Syslink Connection abstraction
73 */
74struct slcommon {
75 struct spinlock spin;
76 int refs;
77};
78
79struct sldesc {
80 struct slmsgq inq;
81 struct slmsg_rb_tree reply_rb_root; /* replies to requests */
82 struct spinlock spin;
83 struct sldesc *peer; /* peer syslink, if any */
84 struct file *xfp; /* external file pointer */
85 struct slcommon *common;
86 int flags;
87 int rwaiters; /* number of threads waiting */
88 int wblocked; /* blocked waiting for us to drain */
89 size_t cmdbytes; /* unreplied commands pending */
90 size_t repbytes; /* undrained replies pending */
91 int (*backend_wblocked)(struct sldesc *, int, sl_proto_t);
92 int (*backend_write)(struct sldesc *, struct slmsg *);
93 void (*backend_reply)(struct sldesc *,struct slmsg *,struct slmsg *);
94 void (*backend_dispose)(struct sldesc *, struct slmsg *);
95};
96
97#define SLF_RSHUTDOWN 0x0001
98#define SLF_WSHUTDOWN 0x0002
99
100static int syslink_cmd_new(struct syslink_info_new *info, int *result);
101static struct sldesc *allocsldesc(struct slcommon *common);
102static void setsldescfp(struct sldesc *sl, struct file *fp);
103static void shutdownsldesc(struct sldesc *sl, int how);
104static void shutdownsldesc2(struct sldesc *sl, int how);
105static void sldrop(struct sldesc *sl);
106static int syslink_validate_msg(struct syslink_msg *msg, int bytes);
107static int syslink_validate_elm(struct syslink_elm *elm, sl_reclen_t bytes,
108 int swapit, int depth);
109
110static int sl_local_mmap(struct slmsg *slmsg, char *base, size_t len);
111static void sl_local_munmap(struct slmsg *slmsg);
112
113static int backend_wblocked_user(struct sldesc *sl, int nbio, sl_proto_t proto);
114static int backend_write_user(struct sldesc *sl, struct slmsg *slmsg);
115static void backend_reply_user(struct sldesc *sl, struct slmsg *slcmd,
116 struct slmsg *slrep);
117static void backend_dispose_user(struct sldesc *sl, struct slmsg *slmsg);
118
119static int backend_wblocked_kern(struct sldesc *sl, int nbio, sl_proto_t proto);
120static int backend_write_kern(struct sldesc *sl, struct slmsg *slmsg);
121static void backend_reply_kern(struct sldesc *sl, struct slmsg *slcmd,
122 struct slmsg *slrep);
123static void backend_dispose_kern(struct sldesc *sl, struct slmsg *slmsg);
124static void slmsg_put(struct slmsg *slmsg);
125
126/*
127 * Objcache memory backend
128 *
129 * All three object caches return slmsg structures but each is optimized
130 * for syslink message buffers of varying sizes. We use the slightly
131 * more complex ctor/dtor API in order to provide ready-to-go slmsg's.
132 */
133
134static struct objcache *sl_objcache_big;
135static struct objcache *sl_objcache_small;
136static struct objcache *sl_objcache_none;
137
138MALLOC_DEFINE(M_SYSLINK, "syslink", "syslink manager");
139
140static boolean_t slmsg_ctor(void *data, void *private, int ocflags);
141static void slmsg_dtor(void *data, void *private);
142
143static
144void
145syslinkinit(void *dummy __unused)
146{
147 size_t n = sizeof(struct slmsg);
148
149 sl_objcache_none = objcache_create_mbacked(M_SYSLINK, n, 0, 64,
150 slmsg_ctor, slmsg_dtor,
151 &sl_objcache_none);
152 sl_objcache_small= objcache_create_mbacked(M_SYSLINK, n, 0, 64,
153 slmsg_ctor, slmsg_dtor,
154 &sl_objcache_small);
155 sl_objcache_big = objcache_create_mbacked(M_SYSLINK, n, 0, 16,
156 slmsg_ctor, slmsg_dtor,
157 &sl_objcache_big);
158}
159
160static
161boolean_t
162slmsg_ctor(void *data, void *private, int ocflags)
163{
164 struct slmsg *slmsg = data;
165
166 bzero(slmsg, sizeof(*slmsg));
167
168 slmsg->oc = *(struct objcache **)private;
169 if (slmsg->oc == sl_objcache_none) {
170 slmsg->maxsize = 0;
171 } else if (slmsg->oc == sl_objcache_small) {
172 slmsg->maxsize = SLMSG_SMALL;
173 } else if (slmsg->oc == sl_objcache_big) {
174 slmsg->maxsize = SLMSG_BIG;
175 } else {
176 panic("slmsg_ctor: bad objcache?");
177 }
178 if (slmsg->maxsize) {
179 slmsg->msg = kmalloc(slmsg->maxsize,
180 M_SYSLINK, M_WAITOK|M_ZERO);
181 }
182 xio_init(&slmsg->xio);
183 return(TRUE);
184}
185
186static
187void
188slmsg_dtor(void *data, void *private)
189{
190 struct slmsg *slmsg = data;
191
192 if (slmsg->maxsize && slmsg->msg) {
193 kfree(slmsg->msg, M_SYSLINK);
194 slmsg->msg = NULL;
195 }
196 slmsg->oc = NULL;
197}
198
199SYSINIT(syslink, SI_BOOT2_MACHDEP, SI_ORDER_ANY, syslinkinit, NULL)
200
201static int rb_slmsg_compare(struct slmsg *msg1, struct slmsg *msg2);
202RB_GENERATE2(slmsg_rb_tree, slmsg, rbnode, rb_slmsg_compare,
203 sysid_t, msg->sm_msgid);
204
205/*
206 * Sysctl elements
207 */
208static int syslink_enabled;
209SYSCTL_NODE(_kern, OID_AUTO, syslink, CTLFLAG_RW, 0, "Pipe operation");
210SYSCTL_INT(_kern_syslink, OID_AUTO, enabled,
211 CTLFLAG_RW, &syslink_enabled, 0, "Enable SYSLINK");
212static size_t syslink_bufsize = 65536;
213SYSCTL_UINT(_kern_syslink, OID_AUTO, bufsize,
214 CTLFLAG_RW, &syslink_bufsize, 0, "Maximum buffer size");
215
216/*
217 * Fileops API - typically used to glue a userland frontend with a
218 * kernel backend.
219 */
220
221static int slfileop_read(struct file *fp, struct uio *uio,
222 struct ucred *cred, int flags);
223static int slfileop_write(struct file *fp, struct uio *uio,
224 struct ucred *cred, int flags);
225static int slfileop_close(struct file *fp);
226static int slfileop_stat(struct file *fp, struct stat *sb, struct ucred *cred);
227static int slfileop_shutdown(struct file *fp, int how);
228static int slfileop_ioctl(struct file *fp, u_long cmd, caddr_t data,
229 struct ucred *cred, struct sysmsg *msg);
230static int slfileop_kqfilter(struct file *fp, struct knote *kn);
231
232static struct fileops syslinkops = {
233 .fo_read = slfileop_read,
234 .fo_write = slfileop_write,
235 .fo_ioctl = slfileop_ioctl,
236 .fo_kqfilter = slfileop_kqfilter,
237 .fo_stat = slfileop_stat,
238 .fo_close = slfileop_close,
239 .fo_shutdown = slfileop_shutdown
240};
241
242/************************************************************************
243 * PRIMARY SYSTEM CALL INTERFACE *
244 ************************************************************************
245 *
246 * syslink(int cmd, struct syslink_info *info, size_t bytes)
247 *
248 * MPALMOSTSAFE
249 */
250int
251sys_syslink(struct syslink_args *uap)
252{
253 union syslink_info_all info;
254 int error;
255
256 /*
257 * System call is under construction and disabled by default.
258 * Superuser access is also required for now, but eventually
259 * will not be needed.
260 */
261 if (syslink_enabled == 0)
262 return (EAUTH);
263 error = priv_check(curthread, PRIV_ROOT);
264 if (error)
265 return (error);
266
267 /*
268 * Load and validate the info structure. Unloaded bytes are zerod
269 * out. The label field must always be 0-filled, even if not used
270 * for a command.
271 */
272 bzero(&info, sizeof(info));
273 if ((unsigned)uap->bytes <= sizeof(info)) {
274 if (uap->bytes)
275 error = copyin(uap->info, &info, uap->bytes);
276 } else {
277 error = EINVAL;
278 }
279 if (error)
280 return (error);
281 get_mplock();
282
283 /*
284 * Process the command
285 */
286 switch(uap->cmd) {
287 case SYSLINK_CMD_NEW:
288 error = syslink_cmd_new(&info.cmd_new, &uap->sysmsg_result);
289 break;
290 default:
291 error = EINVAL;
292 break;
293 }
294
295 rel_mplock();
296 if (error == 0 && info.head.wbflag)
297 copyout(&info, uap->info, uap->bytes);
298 return (error);
299}
300
301/*
302 * Create a linked pair of descriptors, like a pipe.
303 */
304static
305int
306syslink_cmd_new(struct syslink_info_new *info, int *result)
307{
308 struct thread *td = curthread;
309 struct filedesc *fdp = td->td_proc->p_fd;
310 struct file *fp1;
311 struct file *fp2;
312 struct sldesc *sl;
313 struct sldesc *slpeer;
314 int error;
315 int fd1, fd2;
316
317 error = falloc(td->td_lwp, &fp1, &fd1);
318 if (error)
319 return(error);
320 error = falloc(td->td_lwp, &fp2, &fd2);
321 if (error) {
322 fsetfd(fdp, NULL, fd1);
323 fdrop(fp1);
324 return(error);
325 }
326 slpeer = allocsldesc(NULL);
327 slpeer->backend_wblocked = backend_wblocked_user;
328 slpeer->backend_write = backend_write_user;
329 slpeer->backend_reply = backend_reply_user;
330 slpeer->backend_dispose = backend_dispose_user;
331 sl = allocsldesc(slpeer->common);
332 sl->peer = slpeer;
333 sl->backend_wblocked = backend_wblocked_user;
334 sl->backend_write = backend_write_user;
335 sl->backend_reply = backend_reply_user;
336 sl->backend_dispose = backend_dispose_user;
337 slpeer->peer = sl;
338
339 setsldescfp(sl, fp1);
340 setsldescfp(slpeer, fp2);
341
342 fsetfd(fdp, fp1, fd1);
343 fdrop(fp1);
344 fsetfd(fdp, fp2, fd2);
345 fdrop(fp2);
346
347 info->head.wbflag = 1; /* write back */
348 info->fds[0] = fd1;
349 info->fds[1] = fd2;
350
351 return(0);
352}
353
354/************************************************************************
355 * LOW LEVEL SLDESC SUPPORT *
356 ************************************************************************
357 *
358 */
359
360static
361struct sldesc *
362allocsldesc(struct slcommon *common)
363{
364 struct sldesc *sl;
365
366 sl = kmalloc(sizeof(struct sldesc), M_SYSLINK, M_WAITOK|M_ZERO);
367 if (common == NULL)
368 common = kmalloc(sizeof(*common), M_SYSLINK, M_WAITOK|M_ZERO);
369 TAILQ_INIT(&sl->inq); /* incoming requests */
370 RB_INIT(&sl->reply_rb_root); /* match incoming replies */
371 spin_init(&sl->spin);
372 sl->common = common;
373 ++common->refs;
374 return(sl);
375}
376
377static
378void
379setsldescfp(struct sldesc *sl, struct file *fp)
380{
381 sl->xfp = fp;
382 fp->f_type = DTYPE_SYSLINK;
383 fp->f_flag = FREAD | FWRITE;
384 fp->f_ops = &syslinkops;
385 fp->f_data = sl;
386}
387
388/*
389 * Red-black tree compare function
390 */
391static
392int
393rb_slmsg_compare(struct slmsg *msg1, struct slmsg *msg2)
394{
395 if (msg1->msg->sm_msgid < msg2->msg->sm_msgid)
396 return(-1);
397 if (msg1->msg->sm_msgid == msg2->msg->sm_msgid)
398 return(0);
399 return(1);
400}
401
402static
403void
404shutdownsldesc(struct sldesc *sl, int how)
405{
406 struct slmsg *slmsg;
407 int rhow;
408
409 shutdownsldesc2(sl, how);
410
411 /*
412 * Return unread and unreplied messages
413 */
414 spin_lock(&sl->spin);
415 while ((slmsg = TAILQ_FIRST(&sl->inq)) != NULL) {
416 TAILQ_REMOVE(&sl->inq, slmsg, tqnode);
417 spin_unlock(&sl->spin);
418 if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
419 sl->repbytes -= slmsg->maxsize;
420 slmsg->flags &= ~SLMSGF_ONINQ;
421 sl->peer->backend_dispose(sl->peer, slmsg);
422 }
423 /* leave ONINQ set for commands, it will cleared below */
424 spin_lock(&sl->spin);
425 }
426 while ((slmsg = RB_ROOT(&sl->reply_rb_root)) != NULL) {
427 RB_REMOVE(slmsg_rb_tree, &sl->reply_rb_root, slmsg);
428 sl->cmdbytes -= slmsg->maxsize;
429 spin_unlock(&sl->spin);
430 slmsg->flags &= ~SLMSGF_ONINQ;
431 sl->peer->backend_reply(sl->peer, slmsg, NULL);
432 spin_lock(&sl->spin);
433 }
434 spin_unlock(&sl->spin);
435
436 /*
437 * Call shutdown on the peer with the opposite flags
438 */
439 rhow = 0;
440 switch(how) {
441 case SHUT_RD:
442 rhow = SHUT_WR;
443 break;
444 case SHUT_WR:
445 rhow = SHUT_WR;
446 break;
447 case SHUT_RDWR:
448 rhow = SHUT_RDWR;
449 break;
450 }
451 shutdownsldesc2(sl->peer, rhow);
452}
453
454static
455void
456shutdownsldesc2(struct sldesc *sl, int how)
457{
458 spin_lock(&sl->spin);
459 switch(how) {
460 case SHUT_RD:
461 sl->flags |= SLF_RSHUTDOWN;
462 break;
463 case SHUT_WR:
464 sl->flags |= SLF_WSHUTDOWN;
465 break;
466 case SHUT_RDWR:
467 sl->flags |= SLF_RSHUTDOWN | SLF_WSHUTDOWN;
468 break;
469 }
470 spin_unlock(&sl->spin);
471
472 /*
473 * Handle signaling on the user side
474 */
475 if (how & SHUT_RD) {
476 if (sl->rwaiters)
477 wakeup(&sl->rwaiters);
478 }
479 if (how & SHUT_WR) {
480 if (sl->wblocked) {
481 sl->wblocked = 0; /* race ok */
482 wakeup(&sl->wblocked);
483 }
484 }
485}
486
487static
488void
489sldrop(struct sldesc *sl)
490{
491 struct sldesc *slpeer;
492
493 spin_lock(&sl->common->spin);
494 if (--sl->common->refs == 0) {
495 spin_unlock(&sl->common->spin);
496 if ((slpeer = sl->peer) != NULL) {
497 sl->peer = NULL;
498 slpeer->peer = NULL;
499 slpeer->common = NULL;
500 KKASSERT(slpeer->xfp == NULL);
501 KKASSERT(TAILQ_EMPTY(&slpeer->inq));
502 KKASSERT(RB_EMPTY(&slpeer->reply_rb_root));
503 kfree(slpeer, M_SYSLINK);
504 }
505 KKASSERT(sl->xfp == NULL);
506 KKASSERT(TAILQ_EMPTY(&sl->inq));
507 KKASSERT(RB_EMPTY(&sl->reply_rb_root));
508 kfree(sl->common, M_SYSLINK);
509 sl->common = NULL;
510 kfree(sl, M_SYSLINK);
511 } else {
512 spin_unlock(&sl->common->spin);
513 }
514}
515
516static
517void
518slmsg_put(struct slmsg *slmsg)
519{
520 if (slmsg->flags & SLMSGF_HASXIO) {
521 slmsg->flags &= ~SLMSGF_HASXIO;
522 get_mplock();
523 xio_release(&slmsg->xio);
524 rel_mplock();
525 }
526 slmsg->flags &= ~SLMSGF_LINMAP;
527 objcache_put(slmsg->oc, slmsg);
528}
529
530/************************************************************************
531 * FILEOPS API *
532 ************************************************************************
533 *
534 * Implement userland fileops.
535 *
536 * MPSAFE ops
537 */
538static
539int
540slfileop_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
541{
542 struct sldesc *sl = fp->f_data; /* fp refed on call */
543 struct slmsg *slmsg;
544 struct iovec *iov0;
545 struct iovec *iov1;
546 struct syslink_msg *wmsg;
547 int error;
548 int nbio;
549
550 /*
551 * Kinda messy. Figure out the non-blocking state
552 */
553 if (flags & O_FBLOCKING)
554 nbio = 0;
555 else if (flags & O_FNONBLOCKING)
556 nbio = 1;
557 else if (fp->f_flag & O_NONBLOCK)
558 nbio = 1;
559 else
560 nbio = 0;
561
562 /*
563 * Validate the uio.
564 *
565 * iov0 - message buffer
566 * iov1 - DMA buffer or backup buffer
567 */
568 if (uio->uio_iovcnt < 1) {
569 error = 0;
570 goto done2;
571 }
572 iov0 = &uio->uio_iov[0];
573 if (uio->uio_iovcnt > 2) {
574 error = EINVAL;
575 goto done2;
576 }
577
578 /*
579 * Get a message, blocking if necessary.
580 */
581 spin_lock(&sl->spin);
582 while ((slmsg = TAILQ_FIRST(&sl->inq)) == NULL) {
583 if (sl->flags & SLF_RSHUTDOWN) {
584 error = 0;
585 goto done1;
586 }
587 if (nbio) {
588 error = EAGAIN;
589 goto done1;
590 }
591 ++sl->rwaiters;
592 error = ssleep(&sl->rwaiters, &sl->spin, PCATCH, "slrmsg", 0);
593 --sl->rwaiters;
594 if (error)
595 goto done1;
596 }
597 wmsg = slmsg->msg;
598
599 /*
600 * We have a message and still hold the spinlock. Make sure the
601 * uio has enough room to hold the message.
602 *
603 * Note that replies do not have XIOs.
604 */
605 if (slmsg->msgsize > iov0->iov_len) {
606 error = ENOSPC;
607 goto done1;
608 }
609 if (slmsg->xio.xio_bytes) {
610 if (uio->uio_iovcnt != 2) {
611 error = ENOSPC;
612 goto done1;
613 }
614 iov1 = &uio->uio_iov[1];
615 if (slmsg->xio.xio_bytes > iov1->iov_len) {
616 error = ENOSPC;
617 goto done1;
618 }
619 } else {
620 iov1 = NULL;
621 }
622
623 /*
624 * Dequeue the message. Adjust repbytes immediately. cmdbytes
625 * are adjusted when the command is replied to, not here.
626 */
627 TAILQ_REMOVE(&sl->inq, slmsg, tqnode);
628 if (slmsg->msg->sm_proto & SM_PROTO_REPLY)
629 sl->repbytes -= slmsg->maxsize;
630 spin_unlock(&sl->spin);
631
632 /*
633 * Load the message data into the user buffer.
634 *
635 * If receiving a command an XIO may exist specifying a DMA buffer.
636 * For commands, if DMAW is set we have to copy or map the buffer
637 * so the caller can access the data being written. If DMAR is set
638 * we do not have to copy but we still must map the buffer so the
639 * caller can directly fill in the data being requested.
640 */
641 error = uiomove((void *)slmsg->msg, slmsg->msgsize, uio);
642 if (error == 0 && slmsg->xio.xio_bytes &&
643 (wmsg->sm_head.se_cmd & SE_CMDF_REPLY) == 0) {
644 if (wmsg->sm_head.se_cmd & SE_CMDF_DMAW) {
645 /*
646 * Data being passed to caller or being passed in both
647 * directions, copy or map.
648 */
649 get_mplock();
650 if ((flags & O_MAPONREAD) &&
651 (slmsg->xio.xio_flags & XIOF_VMLINEAR)) {
652 error = sl_local_mmap(slmsg,
653 iov1->iov_base,
654 iov1->iov_len);
655 if (error)
656 error = xio_copy_xtou(&slmsg->xio, 0,
657 iov1->iov_base,
658 slmsg->xio.xio_bytes);
659 } else {
660 error = xio_copy_xtou(&slmsg->xio, 0,
661 iov1->iov_base,
662 slmsg->xio.xio_bytes);
663 }
664 rel_mplock();
665 } else if (wmsg->sm_head.se_cmd & SE_CMDF_DMAR) {
666 /*
667 * Data will be passed back to originator, map
668 * the buffer if we can, else use the backup
669 * buffer at the same VA supplied by the caller.
670 */
671 get_mplock();
672 if ((flags & O_MAPONREAD) &&
673 (slmsg->xio.xio_flags & XIOF_VMLINEAR)) {
674 error = sl_local_mmap(slmsg,
675 iov1->iov_base,
676 iov1->iov_len);
677 error = 0; /* ignore errors */
678 }
679 rel_mplock();
680 }
681 }
682
683 /*
684 * Clean up.
685 */
686 if (error) {
687 /*
688 * Requeue the message if we could not read it successfully
689 */
690 spin_lock(&sl->spin);
691 TAILQ_INSERT_HEAD(&sl->inq, slmsg, tqnode);
692 slmsg->flags |= SLMSGF_ONINQ;
693 spin_unlock(&sl->spin);
694 } else if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
695 /*
696 * Dispose of any received reply after we've copied it
697 * to userland. We don't need the slmsg any more.
698 */
699 slmsg->flags &= ~SLMSGF_ONINQ;
700 sl->peer->backend_dispose(sl->peer, slmsg);
701 if (sl->wblocked && sl->repbytes < syslink_bufsize) {
702 sl->wblocked = 0; /* MP race ok here */
703 wakeup(&sl->wblocked);
704 }
705 } else {
706 /*
707 * Leave the command in the RB tree but clear ONINQ now
708 * that we have returned it to userland so userland can
709 * reply to it.
710 */
711 slmsg->flags &= ~SLMSGF_ONINQ;
712 }
713 return(error);
714done1:
715 spin_unlock(&sl->spin);
716done2:
717 return(error);
718}
719
720/*
721 * Userland writes syslink message (optionally with DMA buffer in iov[1]).
722 */
723static
724int
725slfileop_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
726{
727 struct sldesc *sl = fp->f_data;
728 struct slmsg *slmsg;
729 struct slmsg *slcmd;
730 struct syslink_msg sltmp;
731 struct syslink_msg *wmsg; /* wire message */
732 struct iovec *iov0;
733 struct iovec *iov1;
734 sl_proto_t proto;
735 int nbio;
736 int error;
737 int xflags;
738
739 /*
740 * Kinda messy. Figure out the non-blocking state
741 */
742 if (flags & O_FBLOCKING)
743 nbio = 0;
744 else if (flags & O_FNONBLOCKING)
745 nbio = 1;
746 else if (fp->f_flag & O_NONBLOCK)
747 nbio = 1;
748 else
749 nbio = 0;
750
751 /*
752 * Validate the uio
753 */
754 if (uio->uio_iovcnt < 1) {
755 error = 0;
756 goto done2;
757 }
758 iov0 = &uio->uio_iov[0];
759 if (iov0->iov_len > SLMSG_BIG) {
760 error = EFBIG;
761 goto done2;
762 }
763 if (uio->uio_iovcnt > 2) {
764 error = EFBIG;
765 goto done2;
766 }
767 if (uio->uio_iovcnt > 1) {
768 iov1 = &uio->uio_iov[1];
769 if (iov1->iov_len > XIO_INTERNAL_SIZE) {
770 error = EFBIG;
771 goto done2;
772 }
773 if ((intptr_t)iov1->iov_base & PAGE_MASK) {
774 error = EINVAL;
775 goto done2;
776 }
777 } else {
778 iov1 = NULL;
779 }
780
781 /*
782 * Handle the buffer-full case. slpeer cmdbytes is managed
783 * by the backend function, not us so if the callback just
784 * directly implements the message and never adjusts cmdbytes,
785 * we will never sleep here.
786 */
787 if (sl->flags & SLF_WSHUTDOWN) {
788 error = EPIPE;
789 goto done2;
790 }
791
792 /*
793 * Only commands can block the pipe, not replies. Otherwise a
794 * deadlock is possible.
795 */
796 error = copyin(iov0->iov_base, &sltmp, sizeof(sltmp));
797 if (error)
798 goto done2;
799 if ((proto = sltmp.sm_proto) & SM_PROTO_ENDIAN_REV)
800 proto = bswap16(proto);
801 error = sl->peer->backend_wblocked(sl->peer, nbio, proto);
802 if (error)
803 goto done2;
804
805 /*
806 * Allocate a slmsg and load the message. Note that the bytes
807 * returned to userland only reflects the primary syslink message
808 * and does not include any DMA buffers.
809 */
810 if (iov0->iov_len <= SLMSG_SMALL)
811 slmsg = objcache_get(sl_objcache_small, M_WAITOK);
812 else
813 slmsg = objcache_get(sl_objcache_big, M_WAITOK);
814 slmsg->msgsize = iov0->iov_len;
815 wmsg = slmsg->msg;
816
817 error = uiomove((void *)wmsg, iov0->iov_len, uio);
818 if (error)
819 goto done1;
820 error = syslink_validate_msg(wmsg, slmsg->msgsize);
821 if (error)
822 goto done1;
823
824 if ((wmsg->sm_head.se_cmd & SE_CMDF_REPLY) == 0) {
825 /*
826 * Install the XIO for commands if any DMA flags are set.
827 *
828 * XIOF_VMLINEAR requires that the XIO represent a
829 * contiguous set of pages associated with a single VM
830 * object (so the reader side can mmap it easily).
831 *
832 * XIOF_VMLINEAR might not be set when the kernel sends
833 * commands to userland so the reader side backs off to
834 * a backup buffer if it isn't set, but we require it
835 * for userland writes.
836 */
837 xflags = XIOF_VMLINEAR;
838 if (wmsg->sm_head.se_cmd & SE_CMDF_DMAR)
839 xflags |= XIOF_READ | XIOF_WRITE;
840 else if (wmsg->sm_head.se_cmd & SE_CMDF_DMAW)
841 xflags |= XIOF_READ;
842 if (xflags && iov1) {
843 get_mplock();
844 error = xio_init_ubuf(&slmsg->xio, iov1->iov_base,
845 iov1->iov_len, xflags);
846 rel_mplock();
847 if (error)
848 goto done1;
849 slmsg->flags |= SLMSGF_HASXIO;
850 }
851 error = sl->peer->backend_write(sl->peer, slmsg);
852 } else {
853 /*
854 * Replies have to be matched up against received commands.
855 */
856 spin_lock(&sl->spin);
857 slcmd = slmsg_rb_tree_RB_LOOKUP(&sl->reply_rb_root,
858 slmsg->msg->sm_msgid);
859 if (slcmd == NULL || (slcmd->flags & SLMSGF_ONINQ)) {
860 error = ENOENT;
861 spin_unlock(&sl->spin);
862 goto done1;
863 }
864 RB_REMOVE(slmsg_rb_tree, &sl->reply_rb_root, slcmd);
865 sl->cmdbytes -= slcmd->maxsize;
866 spin_unlock(&sl->spin);
867
868 /*
869 * If the original command specified DMAR, has an xio, and
870 * our write specifies a DMA buffer, then we can do a
871 * copyback. But if we are linearly mapped and the caller
872 * is using the map base address, then the caller filled in
873 * the data via the direct memory map and no copyback is
874 * needed.
875 */
876 if ((slcmd->msg->sm_head.se_cmd & SE_CMDF_DMAR) && iov1 &&
877 (slcmd->flags & SLMSGF_HASXIO) &&
878 ((slcmd->flags & SLMSGF_LINMAP) == 0 ||
879 iov1->iov_base != slcmd->vmbase)
880 ) {
881 size_t count;
882 if (iov1->iov_len > slcmd->xio.xio_bytes)
883 count = slcmd->xio.xio_bytes;
884 else
885 count = iov1->iov_len;
886 get_mplock();
887 error = xio_copy_utox(&slcmd->xio, 0, iov1->iov_base,
888 count);
889 rel_mplock();
890 }
891
892 /*
893 * If we had mapped a DMA buffer, remove it
894 */
895 if (slcmd->flags & SLMSGF_LINMAP) {
896 get_mplock();
897 sl_local_munmap(slcmd);
898 rel_mplock();
899 }
900
901 /*
902 * Reply and handle unblocking
903 */
904 sl->peer->backend_reply(sl->peer, slcmd, slmsg);
905 if (sl->wblocked && sl->cmdbytes < syslink_bufsize) {
906 sl->wblocked = 0; /* MP race ok here */
907 wakeup(&sl->wblocked);
908 }
909
910 /*
911 * slmsg has already been dealt with, make sure error is
912 * 0 so we do not double-free it.
913 */
914 error = 0;
915 }
916 /* fall through */
917done1:
918 if (error)
919 slmsg_put(slmsg);
920 /* fall through */
921done2:
922 return(error);
923}
924
925/*
926 * Close a syslink descriptor.
927 *
928 * Disassociate the syslink from the file descriptor and disconnect from
929 * any peer.
930 */
931static
932int
933slfileop_close(struct file *fp)
934{
935 struct sldesc *sl;
936
937 /*
938 * Disassociate the file pointer. Take ownership of the ref on the
939 * sldesc.
940 */
941 sl = fp->f_data;
942 fp->f_data = NULL;
943 fp->f_ops = &badfileops;
944 sl->xfp = NULL;
945
946 /*
947 * Shutdown both directions. The other side will not issue API
948 * calls to us after we've shutdown both directions.
949 */
950 shutdownsldesc(sl, SHUT_RDWR);
951
952 /*
953 * Cleanup
954 */
955 KKASSERT(sl->cmdbytes == 0);
956 KKASSERT(sl->repbytes == 0);
957 sldrop(sl);
958 return(0);
959}
960
961/*
962 * MPSAFE
963 */
964static
965int
966slfileop_stat (struct file *fp, struct stat *sb, struct ucred *cred)
967{
968 return(EINVAL);
969}
970
971static
972int
973slfileop_shutdown (struct file *fp, int how)
974{
975 shutdownsldesc((struct sldesc *)fp->f_data, how);
976 return(0);
977}
978
979static
980int
981slfileop_ioctl (struct file *fp, u_long cmd, caddr_t data,
982 struct ucred *cred, struct sysmsg *msg)
983{
984 return(EINVAL);
985}
986
987static
988int
989slfileop_kqfilter(struct file *fp, struct knote *kn)
990{
991 return(0);
992}
993
994/************************************************************************
995 * LOCAL MEMORY MAPPING *
996 ************************************************************************
997 *
998 * This feature is currently not implemented
999 *
1000 */
1001
1002static
1003int
1004sl_local_mmap(struct slmsg *slmsg, char *base, size_t len)
1005{
1006 return (EOPNOTSUPP);
1007}
1008
1009static
1010void
1011sl_local_munmap(struct slmsg *slmsg)
1012{
1013 /* empty */
1014}
1015
1016#if 0
1017
1018static
1019int
1020sl_local_mmap(struct slmsg *slmsg, char *base, size_t len)
1021{
1022 struct vmspace *vms = curproc->p_vmspace;
1023 vm_offset_t addr = (vm_offset_t)base;
1024
1025 /* XXX check user address range */
1026 error = vm_map_replace(
1027 &vma->vm_map,
1028 (vm_offset_t)base, (vm_offset_t)base + len,
1029 slmsg->xio.xio_pages[0]->object,
1030 slmsg->xio.xio_pages[0]->pindex << PAGE_SHIFT,
1031 VM_PROT_READ|VM_PROT_WRITE,
1032 VM_PROT_READ|VM_PROT_WRITE,
1033 MAP_DISABLE_SYNCER);
1034 }
1035 if (error == 0) {
1036 slmsg->flags |= SLMSGF_LINMAP;
1037 slmsg->vmbase = base;
1038 slmsg->vmsize = len;
1039 }
1040 return (error);
1041}
1042
1043static
1044void
1045sl_local_munmap(struct slmsg *slmsg)
1046{
1047 if (slmsg->flags & SLMSGF_LINMAP) {
1048 vm_map_remove(&curproc->p_vmspace->vm_map,
1049 slmsg->vmbase,
1050 slmsg->vmbase + slcmd->vmsize);
1051 slmsg->flags &= ~SLMSGF_LINMAP;
1052 }
1053}
1054
1055#endif
1056
1057/************************************************************************
1058 * MESSAGE VALIDATION *
1059 ************************************************************************
1060 *
1061 * Validate that the syslink message. Check that all headers and elements
1062 * conform. Correct the endian if necessary.
1063 *
1064 * NOTE: If reverse endian needs to be corrected, SE_CMDF_UNTRANSLATED
1065 * is recursively flipped on all syslink_elm's in the message. As the
1066 * message traverses the mesh, multiple flips may occur. It is
1067 * up to the RPC protocol layer to correct opaque data payloads and
1068 * SE_CMDF_UNTRANSLATED prevents the protocol layer from misinterpreting
1069 * a command or reply element which has not been endian-corrected.
1070 */
1071static
1072int
1073syslink_validate_msg(struct syslink_msg *msg, int bytes)
1074{
1075 int aligned_reclen;
1076 int swapit;
1077 int error;
1078
1079 /*
1080 * The raw message must be properly-aligned.
1081 */
1082 if (bytes & SL_ALIGNMASK)
1083 return (EINVAL);
1084
1085 while (bytes) {
1086 /*
1087 * The message must at least contain the msgid, bytes, and
1088 * protoid.
1089 */
1090 if (bytes < SL_MIN_PAD_SIZE)
1091 return (EINVAL);
1092
1093 /*
1094 * Fix the endian if it is reversed.
1095 */
1096 if (msg->sm_proto & SM_PROTO_ENDIAN_REV) {
1097 msg->sm_msgid = bswap64(msg->sm_msgid);
1098 msg->sm_sessid = bswap64(msg->sm_sessid);
1099 msg->sm_bytes = bswap16(msg->sm_bytes);
1100 msg->sm_proto = bswap16(msg->sm_proto);
1101 msg->sm_rlabel = bswap32(msg->sm_rlabel);
1102 if (msg->sm_proto & SM_PROTO_ENDIAN_REV)
1103 return (EINVAL);
1104 swapit = 1;
1105 } else {
1106 swapit = 0;
1107 }
1108
1109 /*
1110 * Validate the contents. For PADs, the entire payload is
1111 * ignored and the minimum message size can be as small as
1112 * 8 bytes.
1113 */
1114 if (msg->sm_proto == SMPROTO_PAD) {
1115 if (msg->sm_bytes < SL_MIN_PAD_SIZE ||
1116 msg->sm_bytes > bytes) {
1117 return (EINVAL);
1118 }
1119 /* ignore the entire payload, it can be garbage */
1120 } else {
1121 if (msg->sm_bytes < SL_MIN_MSG_SIZE ||
1122 msg->sm_bytes > bytes) {
1123 return (EINVAL);
1124 }
1125 error = syslink_validate_elm(
1126 &msg->sm_head,
1127 msg->sm_bytes -
1128 offsetof(struct syslink_msg,
1129 sm_head),
1130 swapit, SL_MAXDEPTH);
1131 if (error)
1132 return (error);
1133 }
1134
1135 /*
1136 * The aligned payload size must be used to locate the
1137 * next syslink_msg in the buffer.
1138 */
1139 aligned_reclen = SL_MSG_ALIGN(msg->sm_bytes);
1140 bytes -= aligned_reclen;
1141 msg = (void *)((char *)msg + aligned_reclen);
1142 }
1143 return(0);
1144}
1145
1146static
1147int
1148syslink_validate_elm(struct syslink_elm *elm, sl_reclen_t bytes,
1149 int swapit, int depth)
1150{
1151 int aligned_reclen;
1152
1153 /*
1154 * If the buffer isn't big enough to fit the header, stop now!
1155 */
1156 if (bytes < SL_MIN_ELM_SIZE)
1157 return (EINVAL);
1158 /*
1159 * All syslink_elm headers are recursively endian-adjusted. Opaque
1160 * data payloads are not.
1161 */
1162 if (swapit) {
1163 elm->se_cmd = bswap16(elm->se_cmd) ^ SE_CMDF_UNTRANSLATED;
1164 elm->se_bytes = bswap16(elm->se_bytes);
1165 elm->se_aux = bswap32(elm->se_aux);
1166 }
1167
1168 /*
1169 * Check element size requirements.
1170 */
1171 if (elm->se_bytes < SL_MIN_ELM_SIZE || elm->se_bytes > bytes)
1172 return (EINVAL);
1173
1174 /*
1175 * Recursively check structured payloads. A structured payload may
1176 * contain as few as 0 recursive elements.
1177 */
1178 if (elm->se_cmd & SE_CMDF_STRUCTURED) {
1179 if (depth == 0)
1180 return (EINVAL);
1181 bytes -= SL_MIN_ELM_SIZE;
1182 ++elm;
1183 while (bytes > 0) {
1184 if (syslink_validate_elm(elm, bytes, swapit, depth - 1))
1185 return (EINVAL);
1186 aligned_reclen = SL_MSG_ALIGN(elm->se_bytes);
1187 elm = (void *)((char *)elm + aligned_reclen);
1188 bytes -= aligned_reclen;
1189 }
1190 }
1191 return(0);
1192}
1193
1194/************************************************************************
1195 * BACKEND FUNCTIONS - USER DESCRIPTOR *
1196 ************************************************************************
1197 *
1198 * Peer backend links are primarily used when userland creates a pair
1199 * of linked descriptors.
1200 */
1201
1202/*
1203 * Do any required blocking / nbio handling for attempts to write to
1204 * a sldesc associated with a user descriptor.
1205 */
1206static
1207int
1208backend_wblocked_user(struct sldesc *sl, int nbio, sl_proto_t proto)
1209{
1210 int error = 0;
1211 int *bytesp = (proto & SM_PROTO_REPLY) ? &sl->repbytes : &sl->cmdbytes;
1212
1213 /*
1214 * Block until sufficient data is drained by the target. It is
1215 * ok to have a MP race against cmdbytes.
1216 */
1217 if (*bytesp >= syslink_bufsize) {
1218 spin_lock(&sl->spin);
1219 while (*bytesp >= syslink_bufsize) {
1220 if (sl->flags & SLF_WSHUTDOWN) {
1221 error = EPIPE;
1222 break;
1223 }
1224 if (nbio) {
1225 error = EAGAIN;
1226 break;
1227 }
1228 ++sl->wblocked;
1229 error = ssleep(&sl->wblocked, &sl->spin,
1230 PCATCH, "slwmsg", 0);
1231 if (error)
1232 break;
1233 }
1234 spin_unlock(&sl->spin);
1235 }
1236 return (error);
1237}
1238
1239/*
1240 * Unconditionally write a syslink message to the sldesc associated with
1241 * a user descriptor. Command messages are also placed in a red-black
1242 * tree so their DMA tag (if any) can be accessed and so they can be
1243 * linked to any reply message.
1244 */
1245static
1246int
1247backend_write_user(struct sldesc *sl, struct slmsg *slmsg)
1248{
1249 int error;
1250
1251 spin_lock(&sl->spin);
1252 if (sl->flags & SLF_RSHUTDOWN) {
1253 /*
1254 * Not accepting new messages
1255 */
1256 error = EPIPE;
1257 } else if (slmsg->msg->sm_proto & SM_PROTO_REPLY) {
1258 /*
1259 * Write a reply
1260 */
1261 TAILQ_INSERT_TAIL(&sl->inq, slmsg, tqnode);
1262 sl->repbytes += slmsg->maxsize;
1263 slmsg->flags |= SLMSGF_ONINQ;
1264 error = 0;
1265 } else if (RB_INSERT(slmsg_rb_tree, &sl->reply_rb_root, slmsg)) {
1266 /*
1267 * Write a command, but there was a msgid collision when
1268 * we tried to insert it into the RB tree.
1269 */
1270 error = EEXIST;
1271 } else {
1272 /*
1273 * Write a command, successful insertion into the RB tree.
1274 */
1275 TAILQ_INSERT_TAIL(&sl->inq, slmsg, tqnode);
1276 sl->cmdbytes += slmsg->maxsize;
1277 slmsg->flags |= SLMSGF_ONINQ;
1278 error = 0;
1279 }
1280 spin_unlock(&sl->spin);
1281 if (sl->rwaiters)
1282 wakeup(&sl->rwaiters);
1283 return(error);
1284}
1285
1286/*
1287 * Our peer is replying a command we previously sent it back to us, along
1288 * with the reply message (if not NULL). We just queue the reply to
1289 * userland and free of the command.
1290 */
1291static
1292void
1293backend_reply_user(struct sldesc *sl, struct slmsg *slcmd, struct slmsg *slrep)
1294{
1295 int error;
1296
1297 slmsg_put(slcmd);
1298 if (slrep) {
1299 spin_lock(&sl->spin);
1300 if ((sl->flags & SLF_RSHUTDOWN) == 0) {
1301 TAILQ_INSERT_TAIL(&sl->inq, slrep, tqnode);
1302 sl->repbytes += slrep->maxsize;
1303 error = 0;
1304 } else {
1305 error = EPIPE;
1306 }
1307 spin_unlock(&sl->spin);
1308 if (error)
1309 sl->peer->backend_dispose(sl->peer, slrep);
1310 else if (sl->rwaiters)
1311 wakeup(&sl->rwaiters);
1312 }
1313}
1314
1315static
1316void
1317backend_dispose_user(struct sldesc *sl, struct slmsg *slmsg)
1318{
1319 slmsg_put(slmsg);
1320}
1321
1322/************************************************************************
1323 * KERNEL DRIVER OR FILESYSTEM API *
1324 ************************************************************************
1325 *
1326 */
1327
1328/*
1329 * Create a user<->kernel link, returning the user descriptor in *fdp
1330 * and the kernel descriptor in *kslp. 0 is returned on success, and an
1331 * error code is returned on failure.
1332 */
1333int
1334syslink_ukbackend(int *pfd, struct sldesc **kslp)
1335{
1336 struct thread *td = curthread;
1337 struct filedesc *fdp = td->td_proc->p_fd;
1338 struct file *fp;
1339 struct sldesc *usl;
1340 struct sldesc *ksl;
1341 int error;
1342 int fd;
1343
1344 *pfd = -1;
1345 *kslp = NULL;
1346
1347 error = falloc(td->td_lwp, &fp, &fd);
1348 if (error)
1349 return(error);
1350 usl = allocsldesc(NULL);
1351 usl->backend_wblocked = backend_wblocked_user;
1352 usl->backend_write = backend_write_user;
1353 usl->backend_reply = backend_reply_user;
1354 usl->backend_dispose = backend_dispose_user;
1355
1356 ksl = allocsldesc(usl->common);
1357 ksl->peer = usl;
1358 ksl->backend_wblocked = backend_wblocked_kern;
1359 ksl->backend_write = backend_write_kern;
1360 ksl->backend_reply = backend_reply_kern;
1361 ksl->backend_dispose = backend_dispose_kern;
1362
1363 usl->peer = ksl;
1364
1365 setsldescfp(usl, fp);
1366 fsetfd(fdp, fp, fd);
1367 fdrop(fp);
1368
1369 *pfd = fd;
1370 *kslp = ksl;
1371 return(0);
1372}
1373
1374/*
1375 * Assign a unique message id, issue a syslink message to userland,
1376 * and wait for a reply.
1377 */
1378int
1379syslink_kdomsg(struct sldesc *ksl, struct slmsg *slmsg)
1380{
1381 struct syslink_msg *msg;
1382 int error;
1383
1384 /*
1385 * Finish initializing slmsg and post it to the red-black tree for
1386 * reply matching. If the message id is already in use we return
1387 * EEXIST, giving the originator the chance to roll a new msgid.
1388 */
1389 msg = slmsg->msg;
1390 slmsg->msgsize = msg->sm_bytes;
1391 if ((error = syslink_validate_msg(msg, msg->sm_bytes)) != 0)
1392 return (error);
1393 msg->sm_msgid = allocsysid();
1394
1395 /*
1396 * Issue the request and wait for a matching reply or failure,
1397 * then remove the message from the matching tree and return.
1398 */
1399 error = ksl->peer->backend_write(ksl->peer, slmsg);
1400 spin_lock(&ksl->spin);
1401 if (error == 0) {
1402 while (slmsg->rep == NULL) {
1403 error = ssleep(slmsg, &ksl->spin, 0, "kwtmsg", 0);
1404 /* XXX ignore error for now */
1405 }
1406 if (slmsg->rep == (struct slmsg *)-1) {
1407 error = EIO;
1408 slmsg->rep = NULL;
1409 } else {
1410 error = slmsg->rep->msg->sm_head.se_aux;
1411 }
1412 }
1413 spin_unlock(&ksl->spin);
1414 return(error);
1415}
1416
1417/*
1418 * Similar to syslink_kdomsg but return immediately instead of
1419 * waiting for a reply. The kernel must supply a callback function
1420 * which will be made in the context of the user process replying
1421 * to the message.
1422 */
1423int
1424syslink_ksendmsg(struct sldesc *ksl, struct slmsg *slmsg,
1425 void (*func)(struct slmsg *, void *, int), void *arg)
1426{
1427 struct syslink_msg *msg;
1428 int error;
1429
1430 /*
1431 * Finish initializing slmsg and post it to the red-black tree for
1432 * reply matching. If the message id is already in use we return
1433 * EEXIST, giving the originator the chance to roll a new msgid.
1434 */
1435 msg = slmsg->msg;
1436 slmsg->msgsize = msg->sm_bytes;
1437 slmsg->callback_func = func;
1438 slmsg->callback_data = arg;
1439 if ((error = syslink_validate_msg(msg, msg->sm_bytes)) != 0)
1440 return (error);
1441 msg->sm_msgid = allocsysid();
1442
1443 /*
1444 * Issue the request. If no error occured the operation will be
1445 * in progress, otherwise the operation is considered to have failed
1446 * and the caller can deallocate the slmsg.
1447 */
1448 error = ksl->peer->backend_write(ksl->peer, slmsg);
1449 return (error);
1450}
1451
1452int
1453syslink_kwaitmsg(struct sldesc *ksl, struct slmsg *slmsg)
1454{
1455 int error;
1456
1457 spin_lock(&ksl->spin);
1458 while (slmsg->rep == NULL) {
1459 error = ssleep(slmsg, &ksl->spin, 0, "kwtmsg", 0);
1460 /* XXX ignore error for now */
1461 }
1462 if (slmsg->rep == (struct slmsg *)-1) {
1463 error = EIO;
1464 slmsg->rep = NULL;
1465 } else {
1466 error = slmsg->rep->msg->sm_head.se_aux;
1467 }
1468 spin_unlock(&ksl->spin);
1469 return(error);
1470}
1471
1472struct slmsg *
1473syslink_kallocmsg(void)
1474{
1475 return(objcache_get(sl_objcache_small, M_WAITOK));
1476}
1477
1478void
1479syslink_kfreemsg(struct sldesc *ksl, struct slmsg *slmsg)
1480{
1481 struct slmsg *rep;
1482
1483 if ((rep = slmsg->rep) != NULL) {
1484 slmsg->rep = NULL;
1485 ksl->peer->backend_dispose(ksl->peer, rep);
1486 }
1487 slmsg->callback_func = NULL;
1488 slmsg_put(slmsg);
1489}
1490
1491void
1492syslink_kshutdown(struct sldesc *ksl, int how)
1493{
1494 shutdownsldesc(ksl, how);
1495}
1496
1497void
1498syslink_kclose(struct sldesc *ksl)
1499{
1500 shutdownsldesc(ksl, SHUT_RDWR);
1501 sldrop(ksl);
1502}
1503
1504/*
1505 * Associate a DMA buffer with a kernel syslink message prior to it
1506 * being sent to userland. The DMA buffer is set up from the point
1507 * of view of the target.
1508 */
1509int
1510syslink_kdmabuf_pages(struct slmsg *slmsg, struct vm_page **mbase, int npages)
1511{
1512 int xflags;
1513 int error;
1514
1515 xflags = XIOF_VMLINEAR;
1516 if (slmsg->msg->sm_head.se_cmd & SE_CMDF_DMAR)
1517 xflags |= XIOF_READ | XIOF_WRITE;
1518 else if (slmsg->msg->sm_head.se_cmd & SE_CMDF_DMAW)
1519 xflags |= XIOF_READ;
1520 error = xio_init_pages(&slmsg->xio, mbase, npages, xflags);
1521 slmsg->flags |= SLMSGF_HASXIO;
1522 return (error);
1523}
1524
1525/*
1526 * Associate a DMA buffer with a kernel syslink message prior to it
1527 * being sent to userland. The DMA buffer is set up from the point
1528 * of view of the target.
1529 */
1530int
1531syslink_kdmabuf_data(struct slmsg *slmsg, char *base, int bytes)
1532{
1533 int xflags;
1534
1535 xflags = XIOF_VMLINEAR;
1536 if (slmsg->msg->sm_head.se_cmd & SE_CMDF_DMAR)
1537 xflags |= XIOF_READ | XIOF_WRITE;
1538 else if (slmsg->msg->sm_head.se_cmd & SE_CMDF_DMAW)
1539 xflags |= XIOF_READ;
1540 xio_init_kbuf(&slmsg->xio, base, bytes);
1541 slmsg->xio.xio_flags |= xflags;
1542 slmsg->flags |= SLMSGF_HASXIO;
1543 return(0);
1544}
1545
1546/************************************************************************
1547 * BACKEND FUNCTIONS FOR KERNEL API *
1548 ************************************************************************
1549 *
1550 * These are the backend functions for a sldesc associated with a kernel
1551 * API.
1552 */
1553
1554/*
1555 * Our peer wants to write a syslink message to us and is asking us to
1556 * block if our input queue is full. We don't implement command reception
1557 * so don't block right now.
1558 */
1559static
1560int
1561backend_wblocked_kern(struct sldesc *ksl, int nbio, sl_proto_t proto)
1562{
1563 /* never blocks */
1564 return(0);
1565}
1566
1567/*
1568 * Our peer is writing a request to the kernel. At the moment we do not
1569 * accept commands.
1570 */
1571static
1572int
1573backend_write_kern(struct sldesc *ksl, struct slmsg *slmsg)
1574{
1575 return(EOPNOTSUPP);
1576}
1577
1578/*
1579 * Our peer wants to reply to a syslink message we sent it earlier. The
1580 * original command (that we passed to our peer), and the peer's reply
1581 * is specified. If the peer has failed slrep will be NULL.
1582 */
1583static
1584void
1585backend_reply_kern(struct sldesc *ksl, struct slmsg *slcmd, struct slmsg *slrep)
1586{
1587 int error;
1588
1589 spin_lock(&ksl->spin);
1590 if (slrep == NULL) {
1591 slcmd->rep = (struct slmsg *)-1;
1592 error = EIO;
1593 } else {
1594 slcmd->rep = slrep;
1595 error = slrep->msg->sm_head.se_aux;
1596 }
1597 spin_unlock(&ksl->spin);
1598
1599 /*
1600 * Issue callback or wakeup a synchronous waiter.
1601 */
1602 if (slcmd->callback_func) {
1603 slcmd->callback_func(slcmd, slcmd->callback_data, error);
1604 } else {
1605 wakeup(slcmd);
1606 }
1607}
1608
1609/*
1610 * Any reply messages we sent to our peer are returned to us for disposal.
1611 * Since we do not currently accept commands from our peer, there will not
1612 * be any replies returned to the peer to dispose of.
1613 */
1614static
1615void
1616backend_dispose_kern(struct sldesc *ksl, struct slmsg *slmsg)
1617{
1618 panic("backend_dispose_kern: kernel can't accept commands so it "
1619 "certainly did not reply to one!");
1620}
1621