2 * Copyright (c) 2012-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * This module allows disk devices to be created and associated with a
36 * communications pipe or socket. You open the device and issue an
37 * ioctl() to install a new disk along with its communications descriptor.
39 * All further communication occurs via the descriptor using the DMSG
40 * LNK_CONN, LNK_SPAN, and BLOCK protocols. The descriptor can be a
41 * direct connection to a remote machine's disk (in-kernenl), to a remote
42 * cluster controller, to the local cluster controller, etc.
44 * /dev/xdisk is the control device, issue ioctl()s to create the /dev/xa%d
45 * devices. These devices look like raw disks to the system.
47 #include <sys/param.h>
48 #include <sys/systm.h>
51 #include <sys/device.h>
52 #include <sys/devicestat.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/sysctl.h>
58 #include <sys/queue.h>
62 #include <sys/kern_syscall.h>
65 #include <sys/xdiskioctl.h>
68 #include <sys/thread2.h>
72 RB_HEAD(xa_softc_tree, xa_softc);
73 RB_PROTOTYPE(xa_softc_tree, xa_softc, rbnode, xa_softc_cmp);
79 TAILQ_ENTRY(xa_tag) entry;
81 dmsg_blk_error_t status;
89 typedef struct xa_tag xa_tag_t;
95 struct kdmsg_state_list spanq;
96 RB_ENTRY(xa_softc) rbnode;
98 struct disk_info info;
107 char cl_label[64]; /* from LNK_SPAN cl_label (host/dev) */
108 char fs_label[64]; /* from LNK_SPAN fs_label (serno str) */
110 TAILQ_HEAD(, bio) bioq; /* pending BIOs */
111 TAILQ_HEAD(, xa_tag) tag_freeq; /* available I/O tags */
112 TAILQ_HEAD(, xa_tag) tag_pendq; /* running I/O tags */
113 struct lwkt_token tok;
116 typedef struct xa_softc xa_softc_t;
119 TAILQ_ENTRY(xa_iocom) entry;
124 typedef struct xa_iocom xa_iocom_t;
126 static int xa_softc_cmp(xa_softc_t *sc1, xa_softc_t *sc2);
127 RB_GENERATE(xa_softc_tree, xa_softc, rbnode, xa_softc_cmp);
128 static struct xa_softc_tree xa_device_tree;
130 #define MAXTAGS 64 /* no real limit */
132 static int xdisk_attach(struct xdisk_attach_ioctl *xaioc);
133 static int xdisk_detach(struct xdisk_attach_ioctl *xaioc);
134 static void xaio_exit(kdmsg_iocom_t *iocom);
135 static int xaio_rcvdmsg(kdmsg_msg_t *msg);
137 static void xa_terminate_check(struct xa_softc *sc);
139 static xa_tag_t *xa_setup_cmd(xa_softc_t *sc, struct bio *bio);
140 static void xa_start(xa_tag_t *tag, kdmsg_msg_t *msg, int async);
141 static void xa_done(xa_tag_t *tag, int wasbio);
142 static void xa_release(xa_tag_t *tag, int wasbio);
143 static uint32_t xa_wait(xa_tag_t *tag);
144 static int xa_sync_completion(kdmsg_state_t *state, kdmsg_msg_t *msg);
145 static int xa_bio_completion(kdmsg_state_t *state, kdmsg_msg_t *msg);
146 static void xa_restart_deferred(xa_softc_t *sc);
148 MALLOC_DEFINE(M_XDISK, "Networked disk client", "Network Disks");
151 * Control device, issue ioctls to create xa devices.
153 static d_open_t xdisk_open;
154 static d_close_t xdisk_close;
155 static d_ioctl_t xdisk_ioctl;
157 static struct dev_ops xdisk_ops = {
158 { "xdisk", 0, D_MPSAFE | D_TRACKCLOSE },
159 .d_open = xdisk_open,
160 .d_close = xdisk_close,
161 .d_ioctl = xdisk_ioctl
167 static d_open_t xa_open;
168 static d_close_t xa_close;
169 static d_ioctl_t xa_ioctl;
170 static d_strategy_t xa_strategy;
171 static d_psize_t xa_size;
173 static struct dev_ops xa_ops = {
174 { "xa", 0, D_DISK | D_CANFREE | D_MPSAFE | D_TRACKCLOSE },
179 .d_write = physwrite,
180 .d_strategy = xa_strategy,
184 static struct lwkt_token xdisk_token = LWKT_TOKEN_INITIALIZER(xdisk_token);
185 static int xdisk_opencount;
186 static cdev_t xdisk_dev;
187 static TAILQ_HEAD(, xa_iocom) xaiocomq;
190 * Module initialization
193 xdisk_modevent(module_t mod, int type, void *data)
197 TAILQ_INIT(&xaiocomq);
198 RB_INIT(&xa_device_tree);
199 xdisk_dev = make_dev(&xdisk_ops, 0,
200 UID_ROOT, GID_WHEEL, 0600, "xdisk");
204 if (xdisk_opencount || TAILQ_FIRST(&xaiocomq))
207 destroy_dev(xdisk_dev);
210 dev_ops_remove_all(&xdisk_ops);
211 dev_ops_remove_all(&xa_ops);
219 DEV_MODULE(xdisk, xdisk_modevent, 0);
222 xa_softc_cmp(xa_softc_t *sc1, xa_softc_t *sc2)
224 return(strcmp(sc1->fs_label, sc2->fs_label));
231 xdisk_open(struct dev_open_args *ap)
233 lwkt_gettoken(&xdisk_token);
235 lwkt_reltoken(&xdisk_token);
240 xdisk_close(struct dev_close_args *ap)
242 lwkt_gettoken(&xdisk_token);
244 lwkt_reltoken(&xdisk_token);
249 xdisk_ioctl(struct dev_ioctl_args *ap)
255 error = xdisk_attach((void *)ap->a_data);
258 error = xdisk_detach((void *)ap->a_data);
267 /************************************************************************
269 ************************************************************************/
272 xdisk_attach(struct xdisk_attach_ioctl *xaioc)
278 * Normalize ioctl params
280 kprintf("xdisk_attach1\n");
281 fp = holdfp(curproc->p_fd, xaioc->fd, -1);
284 kprintf("xdisk_attach2\n");
287 * See if the serial number is already present. If we are
288 * racing a termination the disk subsystem may still have
289 * duplicate entries not yet removed so we wait a bit and
292 lwkt_gettoken(&xdisk_token);
294 xaio = kmalloc(sizeof(*xaio), M_XDISK, M_WAITOK | M_ZERO);
295 kprintf("xdisk_attach3\n");
296 kdmsg_iocom_init(&xaio->iocom, xaio,
297 KDMSG_IOCOMF_AUTOCONN,
298 M_XDISK, xaio_rcvdmsg);
299 xaio->iocom.exit_func = xaio_exit;
301 kdmsg_iocom_reconnect(&xaio->iocom, fp, "xdisk");
304 * Setup our LNK_CONN advertisement for autoinitiate.
306 * Our filter is setup to only accept PEER_BLOCK/SERVER
309 * We need a unique pfs_fsid to avoid confusion.
311 xaio->iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_CLIENT;
312 xaio->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
313 xaio->iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
314 xaio->iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
315 xaio->iocom.auto_lnk_conn.pfs_mask = 1LLU << DMSG_PFSTYPE_SERVER;
316 ksnprintf(xaio->iocom.auto_lnk_conn.fs_label,
317 sizeof(xaio->iocom.auto_lnk_conn.fs_label),
319 kern_uuidgen(&xaio->iocom.auto_lnk_conn.pfs_fsid, 1);
322 * Setup our LNK_SPAN advertisement for autoinitiate
324 TAILQ_INSERT_TAIL(&xaiocomq, xaio, entry);
325 kdmsg_iocom_autoinitiate(&xaio->iocom, NULL);
326 lwkt_reltoken(&xdisk_token);
332 xdisk_detach(struct xdisk_attach_ioctl *xaioc)
338 * Called from iocom core transmit thread upon disconnect.
342 xaio_exit(kdmsg_iocom_t *iocom)
344 xa_iocom_t *xaio = iocom->handle;
346 kprintf("xdisk_detach -xaio_exit\n");
347 lwkt_gettoken(&xdisk_token);
348 TAILQ_REMOVE(&xaiocomq, xaio, entry);
349 lwkt_reltoken(&xdisk_token);
351 kfree(xaio, M_XDISK);
355 * Called from iocom core to handle messages that the iocom core does not
356 * handle itself and for which a state function callback has not yet been
359 * We primarily care about LNK_SPAN transactions here.
362 xaio_rcvdmsg(kdmsg_msg_t *msg)
364 kdmsg_state_t *state = msg->state;
365 xa_iocom_t *xaio = state->iocom->handle;
368 kprintf("xdisk_rcvdmsg %08x\n", msg->any.head.cmd);
369 lwkt_gettoken(&xdisk_token);
372 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE:
374 * A LNK_SPAN transaction which is opened and closed
375 * degenerately is not useful to us, just ignore it.
377 kdmsg_msg_reply(msg, 0);
379 case DMSG_LNK_SPAN | DMSGF_CREATE:
381 * Manage the tracking node for the remote LNK_SPAN.
383 * Return a streaming result, leaving the transaction open
384 * in both directions to allow sub-transactions.
386 bcopy(msg->any.lnk_span.cl_label, xaio->dummysc.cl_label,
387 sizeof(xaio->dummysc.cl_label));
388 xaio->dummysc.cl_label[sizeof(xaio->dummysc.cl_label) - 1] = 0;
390 bcopy(msg->any.lnk_span.fs_label, xaio->dummysc.fs_label,
391 sizeof(xaio->dummysc.fs_label));
392 xaio->dummysc.fs_label[sizeof(xaio->dummysc.fs_label) - 1] = 0;
394 kprintf("xdisk: %s LNK_SPAN create ",
395 msg->any.lnk_span.fs_label);
397 sc = RB_FIND(xa_softc_tree, &xa_device_tree, &xaio->dummysc);
405 sc = kmalloc(sizeof(*sc), M_XDISK, M_WAITOK | M_ZERO);
406 kprintf("(not found - create %p)\n", sc);
407 bcopy(msg->any.lnk_span.cl_label, sc->cl_label,
408 sizeof(sc->cl_label));
409 sc->cl_label[sizeof(sc->cl_label) - 1] = 0;
410 bcopy(msg->any.lnk_span.fs_label, sc->fs_label,
411 sizeof(sc->fs_label));
412 sc->fs_label[sizeof(sc->fs_label) - 1] = 0;
414 /* XXX FIXME O(N^2) */
418 RB_FOREACH(sctmp, xa_softc_tree,
420 if (sctmp->unit == unit)
428 lwkt_token_init(&sc->tok, "xa");
429 TAILQ_INIT(&sc->spanq);
430 TAILQ_INIT(&sc->bioq);
431 TAILQ_INIT(&sc->tag_freeq);
432 TAILQ_INIT(&sc->tag_pendq);
433 RB_INSERT(xa_softc_tree, &xa_device_tree, sc);
434 TAILQ_INSERT_TAIL(&sc->spanq, msg->state, user_entry);
435 msg->state->any.xa_sc = sc;
440 for (n = 0; n < MAXTAGS; ++n) {
441 tag = kmalloc(sizeof(*tag),
442 M_XDISK, M_WAITOK|M_ZERO);
444 TAILQ_INSERT_TAIL(&sc->tag_freeq, tag, entry);
447 if (sc->dev == NULL) {
448 dev = disk_create(unit, &sc->disk, &xa_ops);
453 sc->info.d_media_blksize =
454 msg->any.lnk_span.media.block.blksize;
455 if (sc->info.d_media_blksize <= 0)
456 sc->info.d_media_blksize = 1;
457 sc->info.d_media_blocks =
458 msg->any.lnk_span.media.block.bytes /
459 sc->info.d_media_blksize;
460 sc->info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE;
461 sc->info.d_secpertrack = 32;
462 sc->info.d_nheads = 64;
463 sc->info.d_secpercyl = sc->info.d_secpertrack *
465 sc->info.d_ncylinders = 0;
467 sc->info.d_serialno = sc->fs_label;
468 disk_setdiskinfo_sync(&sc->disk, &sc->info);
469 xa_restart_deferred(sc); /* eats serializing */
471 kprintf("(found spancnt %d sc=%p)\n", sc->spancnt, sc);
473 TAILQ_INSERT_TAIL(&sc->spanq, msg->state, user_entry);
474 msg->state->any.xa_sc = sc;
475 if (sc->serializing == 0 && sc->open_tag == NULL) {
477 xa_restart_deferred(sc); /* eats serializing */
480 kdmsg_msg_result(msg, 0);
482 case DMSG_LNK_SPAN | DMSGF_DELETE:
483 case DMSG_LNK_SPAN | DMSGF_DELETE | DMSGF_REPLY:
485 * Manage the tracking node for the remote LNK_SPAN.
487 * Return a final result, closing our end of the transaction.
489 sc = msg->state->any.xa_sc;
490 kprintf("xdisk: %s LNK_SPAN terminate\n", sc->fs_label);
491 msg->state->any.xa_sc = NULL;
492 TAILQ_REMOVE(&sc->spanq, msg->state, user_entry);
494 xa_terminate_check(sc);
495 kdmsg_msg_reply(msg, 0);
497 case DMSG_LNK_SPAN | DMSGF_REPLY:
499 * Ignore unimplemented streaming replies on our LNK_SPAN
505 * Execute shell command (not supported atm).
507 * This is a one-way packet but if not (e.g. if part of
508 * a streaming transaction), we will have already closed
511 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
513 case DMSG_DBG_SHELL | DMSGF_REPLY:
515 * Receive one or more replies to a shell command
516 * that we sent. Just dump it to the console.
518 * This is a one-way packet but if not (e.g. if
519 * part of a streaming transaction), we will have
520 * already closed our end.
523 msg->aux_data[msg->aux_size - 1] = 0;
524 kprintf("xdisk: DEBUGMSG: %s\n",
530 * Unsupported one-way message, streaming message, or
533 * Terminate any unsupported transactions with an error
534 * and ignore any unsupported streaming messages.
536 * NOTE: This case also includes DMSG_LNK_ERROR messages
537 * which might be one-way, replying to those would
538 * cause an infinite ping-pong.
540 if (msg->any.head.cmd & DMSGF_CREATE)
541 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
544 lwkt_reltoken(&xdisk_token);
550 * Determine if we can destroy the xa_softc.
552 * Called with xdisk_token held.
556 xa_terminate_check(struct xa_softc *sc)
561 * Determine if we can destroy the softc.
563 kprintf("xdisk: terminate check xa%d (%d,%d,%d) sc=%p ",
565 sc->opencnt, sc->serializing, sc->spancnt,
568 if (sc->opencnt || sc->serializing || sc->spancnt) {
569 kprintf("(leave intact)\n");
572 kprintf("(remove from tree)\n");
574 KKASSERT(TAILQ_EMPTY(&sc->tag_pendq));
576 RB_REMOVE(xa_softc_tree, &xa_device_tree, sc);
579 disk_destroy(&sc->disk);
580 sc->dev->si_drv1 = NULL;
583 KKASSERT(sc->opencnt == 0);
584 KKASSERT(TAILQ_EMPTY(&sc->tag_pendq));
586 while ((tag = TAILQ_FIRST(&sc->tag_freeq)) != NULL) {
587 TAILQ_REMOVE(&sc->tag_freeq, tag, entry);
594 /************************************************************************
595 * XA DEVICE INTERFACE *
596 ************************************************************************/
599 xa_open(struct dev_open_args *ap)
601 cdev_t dev = ap->a_head.a_dev;
605 dev->si_bsize_phys = 512;
606 dev->si_bsize_best = 32768;
609 * Interlock open with opencnt, wait for attachment operations
612 lwkt_gettoken(&xdisk_token);
616 lwkt_reltoken(&xdisk_token);
617 return ENXIO; /* raced destruction */
619 if (sc->serializing) {
620 tsleep(sc, 0, "xarace", hz / 10);
626 * Serialize initial open
628 if (sc->opencnt++ > 0) {
629 lwkt_reltoken(&xdisk_token);
632 lwkt_reltoken(&xdisk_token);
635 * Issue BLK_OPEN if necessary. ENXIO is returned if we have trouble.
637 if (sc->open_tag == NULL) {
638 xa_restart_deferred(sc); /* eats serializing */
645 * Wait for completion of the BLK_OPEN
647 lwkt_gettoken(&xdisk_token);
648 while (sc->serializing)
649 tsleep(sc, 0, "xaopen", hz);
651 error = sc->last_error;
653 KKASSERT(sc->opencnt > 0);
655 xa_terminate_check(sc);
656 sc = NULL; /* sc may be invalid now */
658 lwkt_reltoken(&xdisk_token);
664 xa_close(struct dev_close_args *ap)
666 cdev_t dev = ap->a_head.a_dev;
672 return ENXIO; /* raced destruction */
673 lwkt_gettoken(&xdisk_token);
674 lwkt_gettoken(&sc->tok);
677 * NOTE: Clearing open_tag allows a concurrent open to re-open
678 * the device and prevents autonomous completion of the tag.
680 if (sc->opencnt == 1 && sc->open_tag) {
683 kdmsg_state_reply(tag->state, 0); /* close our side */
684 xa_wait(tag); /* wait on remote */
686 lwkt_reltoken(&sc->tok);
687 KKASSERT(sc->opencnt > 0);
689 xa_terminate_check(sc);
690 lwkt_reltoken(&xdisk_token);
696 xa_strategy(struct dev_strategy_args *ap)
698 xa_softc_t *sc = ap->a_head.a_dev->si_drv1;
700 struct bio *bio = ap->a_bio;
703 * Allow potentially temporary link failures to fail the I/Os
704 * only if the device is not open. That is, we allow the disk
705 * probe code prior to mount to fail.
707 if (sc->opencnt == 0) {
708 bio->bio_buf->b_error = ENXIO;
709 bio->bio_buf->b_flags |= B_ERROR;
714 tag = xa_setup_cmd(sc, bio);
716 xa_start(tag, NULL, 1);
721 xa_ioctl(struct dev_ioctl_args *ap)
727 xa_size(struct dev_psize_args *ap)
731 if ((sc = ap->a_head.a_dev->si_drv1) == NULL)
733 ap->a_result = sc->info.d_media_blocks;
737 /************************************************************************
738 * XA BLOCK PROTOCOL STATE MACHINE *
739 ************************************************************************
741 * Implement tag/msg setup and related functions.
744 xa_setup_cmd(xa_softc_t *sc, struct bio *bio)
749 * Only get a tag if we have a valid virtual circuit to the server.
751 lwkt_gettoken(&sc->tok);
752 if ((tag = TAILQ_FIRST(&sc->tag_freeq)) != NULL) {
753 TAILQ_REMOVE(&sc->tag_freeq, tag, entry);
755 TAILQ_INSERT_TAIL(&sc->tag_pendq, tag, entry);
759 * If we can't dispatch now and this is a bio, queue it for later.
761 if (tag == NULL && bio) {
762 TAILQ_INSERT_TAIL(&sc->bioq, bio, bio_act);
764 lwkt_reltoken(&sc->tok);
770 xa_start(xa_tag_t *tag, kdmsg_msg_t *msg, int async)
772 xa_softc_t *sc = tag->sc;
787 msg = kdmsg_msg_alloc(sc->open_tag->state,
789 DMSGF_CREATE | DMSGF_DELETE,
790 xa_bio_completion, tag);
791 msg->any.blk_read.keyid = sc->keyid;
792 msg->any.blk_read.offset = bio->bio_offset;
793 msg->any.blk_read.bytes = bp->b_bcount;
796 msg = kdmsg_msg_alloc(sc->open_tag->state,
798 DMSGF_CREATE | DMSGF_DELETE,
799 xa_bio_completion, tag);
800 msg->any.blk_write.keyid = sc->keyid;
801 msg->any.blk_write.offset = bio->bio_offset;
802 msg->any.blk_write.bytes = bp->b_bcount;
803 msg->aux_data = bp->b_data;
804 msg->aux_size = bp->b_bcount;
807 msg = kdmsg_msg_alloc(sc->open_tag->state,
809 DMSGF_CREATE | DMSGF_DELETE,
810 xa_bio_completion, tag);
811 msg->any.blk_flush.keyid = sc->keyid;
812 msg->any.blk_flush.offset = bio->bio_offset;
813 msg->any.blk_flush.bytes = bp->b_bcount;
815 case BUF_CMD_FREEBLKS:
816 msg = kdmsg_msg_alloc(sc->open_tag->state,
818 DMSGF_CREATE | DMSGF_DELETE,
819 xa_bio_completion, tag);
820 msg->any.blk_freeblks.keyid = sc->keyid;
821 msg->any.blk_freeblks.offset = bio->bio_offset;
822 msg->any.blk_freeblks.bytes = bp->b_bcount;
825 bp->b_flags |= B_ERROR;
834 tag->state = msg->state;
835 kdmsg_msg_write(msg);
837 tag->status.head.error = DMSG_ERR_IO;
843 xa_wait(xa_tag_t *tag)
845 xa_softc_t *sc = tag->sc;
848 kprintf("xdisk: xa_wait %p\n", tag);
850 lwkt_gettoken(&sc->tok);
852 while (tag->done == 0)
853 tsleep(tag, 0, "xawait", 0);
854 lwkt_reltoken(&sc->tok);
855 error = tag->status.head.error;
863 xa_done(xa_tag_t *tag, int wasbio)
865 KKASSERT(tag->bio == NULL);
872 xa_release(tag, wasbio);
877 xa_release(xa_tag_t *tag, int wasbio)
879 xa_softc_t *sc = tag->sc;
882 lwkt_gettoken(&sc->tok);
883 if (wasbio && (bio = TAILQ_FIRST(&sc->bioq)) != NULL) {
884 TAILQ_REMOVE(&sc->bioq, bio, bio_act);
886 lwkt_reltoken(&sc->tok);
887 xa_start(tag, NULL, 1);
889 TAILQ_REMOVE(&sc->tag_pendq, tag, entry);
890 TAILQ_INSERT_TAIL(&sc->tag_freeq, tag, entry);
891 lwkt_reltoken(&sc->tok);
896 * Handle messages under the BLKOPEN transaction.
899 xa_sync_completion(kdmsg_state_t *state, kdmsg_msg_t *msg)
901 xa_tag_t *tag = state->any.any;
906 * If the tag has been cleaned out we already closed our side
907 * of the transaction and we are waiting for the other side to
911 if (msg->any.head.cmd & DMSGF_CREATE)
912 kdmsg_state_reply(state, DMSG_ERR_LOSTLINK);
920 lwkt_gettoken(&sc->tok);
923 * Handle initial response to our open and restart any deferred
926 * NOTE: DELETE may also be set.
928 if (msg->any.head.cmd & DMSGF_CREATE) {
929 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
930 case DMSG_LNK_ERROR | DMSGF_REPLY:
931 bzero(&tag->status, sizeof(tag->status));
932 tag->status.head = msg->any.head;
934 case DMSG_BLK_ERROR | DMSGF_REPLY:
935 tag->status = msg->any.blk_error;
938 sc->last_error = tag->status.head.error;
939 kprintf("xdisk: blk_open completion status %d\n",
941 if (sc->last_error == 0) {
942 while ((bio = TAILQ_FIRST(&sc->bioq)) != NULL) {
943 tag = xa_setup_cmd(sc, NULL);
946 TAILQ_REMOVE(&sc->bioq, bio, bio_act);
948 xa_start(tag, NULL, 1);
956 * Handle unexpected termination (or lost comm channel) from other
957 * side. Autonomous completion only if open_tag matches,
958 * otherwise another thread is probably waiting on the tag.
960 * (see xa_close() for other interactions)
962 if (msg->any.head.cmd & DMSGF_DELETE) {
963 kdmsg_state_reply(tag->state, 0);
964 if (sc->open_tag == tag) {
972 lwkt_reltoken(&sc->tok);
977 xa_bio_completion(kdmsg_state_t *state, kdmsg_msg_t *msg)
979 xa_tag_t *tag = state->any.any;
980 xa_softc_t *sc = tag->sc;
985 * Get the bio from the tag. If no bio is present we just do
988 if ((bio = tag->bio) == NULL)
993 * Process return status
995 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
996 case DMSG_LNK_ERROR | DMSGF_REPLY:
997 bzero(&tag->status, sizeof(tag->status));
998 tag->status.head = msg->any.head;
999 if (tag->status.head.error)
1000 tag->status.resid = bp->b_bcount;
1002 tag->status.resid = 0;
1004 case DMSG_BLK_ERROR | DMSGF_REPLY:
1005 tag->status = msg->any.blk_error;
1010 * If the device is open stall the bio on DMSG errors. If an
1011 * actual I/O error occured on the remote device, DMSG_ERR_IO
1014 if (tag->status.head.error &&
1015 (msg->any.head.cmd & DMSGF_DELETE) && sc->opencnt) {
1016 if (tag->status.head.error != DMSG_ERR_IO)
1021 * Process bio completion
1023 * For reads any returned data is zero-extended if necessary, so
1024 * the server can short-cut any all-zeros reads if it desires.
1028 if (msg->aux_data && msg->aux_size) {
1029 if (msg->aux_size < bp->b_bcount) {
1030 bcopy(msg->aux_data, bp->b_data, msg->aux_size);
1031 bzero(bp->b_data + msg->aux_size,
1032 bp->b_bcount - msg->aux_size);
1034 bcopy(msg->aux_data, bp->b_data, bp->b_bcount);
1037 bzero(bp->b_data, bp->b_bcount);
1042 case BUF_CMD_FREEBLKS:
1044 if (tag->status.resid > bp->b_bcount)
1045 tag->status.resid = bp->b_bcount;
1046 bp->b_resid = tag->status.resid;
1047 if (tag->status.head.error != 0) {
1049 bp->b_flags |= B_ERROR;
1059 * Handle completion of the transaction. If the bioq is not empty
1060 * we can initiate another bio on the same tag.
1062 * NOTE: Most of our transactions will be single-message
1063 * CREATE+DELETEs, so we won't have to terminate the
1064 * transaction separately, here. But just in case they
1065 * aren't be sure to terminate the transaction.
1068 if (msg->any.head.cmd & DMSGF_DELETE) {
1070 if ((state->txcmd & DMSGF_DELETE) == 0)
1071 kdmsg_msg_reply(msg, 0);
1076 * Handle the case where the transaction failed due to a
1077 * connectivity issue. The tag is put away with wasbio=0
1078 * and we put the BIO back onto the bioq for a later restart.
1081 lwkt_gettoken(&sc->tok);
1082 kprintf("BIO CIRC FAILURE, REPEND BIO %p\n", bio);
1085 if ((state->txcmd & DMSGF_DELETE) == 0)
1086 kdmsg_msg_reply(msg, 0);
1091 TAILQ_INSERT_TAIL(&sc->bioq, bio, bio_act);
1093 lwkt_reltoken(&sc->tok);
1098 * Restart as much deferred I/O as we can. The serializer is set and we
1099 * eat it (clear it) when done.
1101 * Called with sc->tok held
1105 xa_restart_deferred(xa_softc_t *sc)
1107 kdmsg_state_t *span;
1112 KKASSERT(sc->serializing);
1115 * Determine if a restart is needed.
1117 if (sc->opencnt == 0) {
1119 * Device is not open, nothing to do, eat serializing.
1121 sc->serializing = 0;
1123 } else if (sc->open_tag == NULL) {
1125 * BLK_OPEN required before we can restart any BIOs.
1126 * Select the best LNK_SPAN to issue the BLK_OPEN under.
1128 * serializing interlocks waiting open()s.
1131 TAILQ_FOREACH(span, &sc->spanq, user_entry) {
1132 if ((span->rxcmd & DMSGF_DELETE) == 0)
1139 tag = xa_setup_cmd(sc, NULL);
1144 kprintf("xdisk: BLK_OPEN\n");
1146 msg = kdmsg_msg_alloc(span,
1149 xa_sync_completion, tag);
1150 msg->any.blk_open.modes = DMSG_BLKOPEN_RD;
1151 xa_start(tag, msg, 0);
1154 sc->serializing = 0;
1157 /* else leave serializing set until BLK_OPEN response */
1160 sc->serializing = 0;