2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/kernel.h>
38 #include <sys/systm.h>
39 #include <sys/queue.h>
41 #include <sys/malloc.h>
42 #include <sys/mount.h>
43 #include <sys/socket.h>
44 #include <sys/vnode.h>
48 #include <sys/thread.h>
49 #include <sys/globaldata.h>
50 #include <sys/limits.h>
54 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
56 static void kdmsg_iocom_thread_rd(void *arg);
57 static void kdmsg_iocom_thread_wr(void *arg);
60 * Initialize the roll-up communications structure for a network
61 * messaging session. This function does not install the socket.
64 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle,
65 struct malloc_type *mmsg,
66 void (*cctl_wakeup)(kdmsg_iocom_t *),
67 int (*lnk_rcvmsg)(kdmsg_msg_t *msg),
68 int (*dbg_rcvmsg)(kdmsg_msg_t *msg),
69 int (*misc_rcvmsg)(kdmsg_msg_t *msg))
71 bzero(iocom, sizeof(*iocom));
72 iocom->handle = handle;
74 iocom->clusterctl_wakeup = cctl_wakeup;
75 iocom->lnk_rcvmsg = lnk_rcvmsg;
76 iocom->dbg_rcvmsg = dbg_rcvmsg;
77 iocom->misc_rcvmsg = misc_rcvmsg;
78 iocom->router.iocom = iocom;
79 lockinit(&iocom->msglk, "h2msg", 0, 0);
80 TAILQ_INIT(&iocom->msgq);
81 RB_INIT(&iocom->staterd_tree);
82 RB_INIT(&iocom->statewr_tree);
86 * [Re]connect using the passed file pointer. The caller must ref the
87 * fp for us. We own that ref now.
90 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
91 const char *subsysname)
94 * Destroy the current connection
96 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
97 while (iocom->msgrd_td || iocom->msgwr_td) {
98 wakeup(&iocom->msg_ctl);
99 tsleep(iocom, 0, "clstrkl", hz);
103 * Drop communications descriptor
106 fdrop(iocom->msg_fp);
107 iocom->msg_fp = NULL;
109 kprintf("RESTART CONNECTION\n");
112 * Setup new communications descriptor
118 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
119 NULL, 0, -1, "%s-msgrd", subsysname);
120 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
121 NULL, 0, -1, "%s-msgwr", subsysname);
125 * Cluster controller thread. Perform messaging functions. We have one
126 * thread for the reader and one for the writer. The writer handles
127 * shutdown requests (which should break the reader thread).
131 kdmsg_iocom_thread_rd(void *arg)
133 kdmsg_iocom_t *iocom = arg;
136 kdmsg_state_t *state;
140 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
142 * Retrieve the message from the pipe or socket.
144 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
145 NULL, 1, UIO_SYSSPACE);
148 if (hdr.magic != DMSG_HDR_MAGIC) {
149 kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
153 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
154 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
155 kprintf("kdmsg: bad header size %zd\n", hbytes);
159 /* XXX messy: mask cmd to avoid allocating state */
160 msg = kdmsg_msg_alloc(&iocom->router,
161 hdr.cmd & DMSGF_BASECMDMASK,
164 msg->hdr_size = hbytes;
165 if (hbytes > sizeof(hdr)) {
166 error = fp_read(iocom->msg_fp, &msg->any.head + 1,
167 hbytes - sizeof(hdr),
168 NULL, 1, UIO_SYSSPACE);
170 kprintf("kdmsg: short msg received\n");
175 msg->aux_size = hdr.aux_bytes * DMSG_ALIGN;
176 if (msg->aux_size > DMSG_AUX_MAX) {
177 kprintf("kdmsg: illegal msg payload size %zd\n",
183 msg->aux_data = kmalloc(msg->aux_size, iocom->mmsg,
185 error = fp_read(iocom->msg_fp, msg->aux_data,
187 NULL, 1, UIO_SYSSPACE);
189 kprintf("kdmsg: short msg payload received\n");
195 * State machine tracking, state assignment for msg,
196 * returns error and discard status. Errors are fatal
197 * to the connection except for EALREADY which forces
198 * a discard without execution.
200 error = kdmsg_state_msgrx(msg);
203 * Raw protocol or connection error
206 if (error == EALREADY)
208 } else if (msg->state && msg->state->func) {
210 * Message related to state which already has a
211 * handling function installed for it.
213 error = msg->state->func(msg->state, msg);
214 kdmsg_state_cleanuprx(msg);
215 } else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
218 * Message related to the LNK protocol set
220 error = iocom->lnk_rcvmsg(msg);
221 kdmsg_state_cleanuprx(msg);
222 } else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
225 * Message related to the DBG protocol set
227 error = iocom->dbg_rcvmsg(msg);
228 kdmsg_state_cleanuprx(msg);
231 * Other higher-level messages (e.g. vnops)
233 error = iocom->misc_rcvmsg(msg);
234 kdmsg_state_cleanuprx(msg);
240 kprintf("kdmsg: read failed error %d\n", error);
242 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
244 if (msg->state && msg->state->msg == msg)
245 msg->state->msg = NULL;
249 if ((state = iocom->freerd_state) != NULL) {
250 iocom->freerd_state = NULL;
251 kdmsg_state_free(state);
255 * Shutdown the socket before waiting for the transmit side.
257 * If we are dying due to e.g. a socket disconnect verses being
258 * killed explicity we have to set KILL in order to kick the tx
259 * side when it might not have any other work to do. KILL might
260 * already be set if we are in an unmount or reconnect.
262 fp_shutdown(iocom->msg_fp, SHUT_RDWR);
264 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
265 wakeup(&iocom->msg_ctl);
268 * Wait for the transmit side to drain remaining messages
269 * before cleaning up the rx state. The transmit side will
270 * set KILLTX and wait for the rx side to completely finish
271 * (set msgrd_td to NULL) before cleaning up any remaining
274 lockmgr(&iocom->msglk, LK_RELEASE);
275 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
276 wakeup(&iocom->msg_ctl);
277 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
278 wakeup(&iocom->msg_ctl);
279 tsleep(iocom, 0, "clstrkw", hz);
282 iocom->msgrd_td = NULL;
285 * iocom can be ripped out from under us at this point but
294 kdmsg_iocom_thread_wr(void *arg)
296 kdmsg_iocom_t *iocom = arg;
298 kdmsg_state_t *state;
307 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
309 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
311 * Sleep if no messages pending. Interlock with flag while
314 if (TAILQ_EMPTY(&iocom->msgq)) {
315 atomic_set_int(&iocom->msg_ctl,
316 KDMSG_CLUSTERCTL_SLEEPING);
317 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
318 atomic_clear_int(&iocom->msg_ctl,
319 KDMSG_CLUSTERCTL_SLEEPING);
322 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
324 * Remove msg from the transmit queue and do
325 * persist and half-closed state handling.
327 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
328 lockmgr(&iocom->msglk, LK_RELEASE);
330 error = kdmsg_state_msgtx(msg);
331 if (error == EALREADY) {
334 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
339 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
344 * Dump the message to the pipe or socket.
346 error = fp_write(iocom->msg_fp, &msg->any,
347 msg->hdr_size, &res, UIO_SYSSPACE);
348 if (error || res != msg->hdr_size) {
351 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
355 error = fp_write(iocom->msg_fp,
356 msg->aux_data, msg->aux_size,
358 if (error || res != msg->aux_size) {
361 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
365 kdmsg_state_cleanuptx(msg);
366 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
371 * Cleanup messages pending transmission and release msgq lock.
374 kprintf("kdmsg: write failed error %d\n", error);
377 if (msg->state && msg->state->msg == msg)
378 msg->state->msg = NULL;
383 * Shutdown the socket. This will cause the rx thread to get an
384 * EOF and ensure that both threads get to a termination state.
386 fp_shutdown(iocom->msg_fp, SHUT_RDWR);
389 * Set KILLTX (which the rx side waits for), then wait for the RX
390 * side to completely finish before we clean out any remaining
393 lockmgr(&iocom->msglk, LK_RELEASE);
394 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
395 wakeup(&iocom->msg_ctl);
396 while (iocom->msgrd_td) {
397 wakeup(&iocom->msg_ctl);
398 tsleep(iocom, 0, "clstrkw", hz);
400 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
403 * Simulate received MSGF_DELETE's for any remaining states.
406 RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
408 (state->rxcmd & DMSGF_DELETE) == 0) {
409 lockmgr(&iocom->msglk, LK_RELEASE);
410 msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
412 if ((state->rxcmd & DMSGF_CREATE) == 0)
413 msg->any.head.cmd |= DMSGF_CREATE;
414 msg->any.head.cmd |= DMSGF_DELETE;
416 state->rxcmd = msg->any.head.cmd &
418 msg->state->func(state, msg);
419 kdmsg_state_cleanuprx(msg);
420 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
423 if (state->func == NULL) {
424 state->flags &= ~KDMSG_STATE_INSERTED;
425 RB_REMOVE(kdmsg_state_tree,
426 &iocom->staterd_tree, state);
427 kdmsg_state_free(state);
433 * NOTE: We have to drain the msgq to handle situations
434 * where received states have built up output
435 * messages, to avoid creating messages with
436 * duplicate CREATE/DELETE flags.
439 kdmsg_drain_msgq(iocom);
440 RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
442 (state->rxcmd & DMSGF_DELETE) == 0) {
443 lockmgr(&iocom->msglk, LK_RELEASE);
444 msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
446 if ((state->rxcmd & DMSGF_CREATE) == 0)
447 msg->any.head.cmd |= DMSGF_CREATE;
448 msg->any.head.cmd |= DMSGF_DELETE |
451 state->rxcmd = msg->any.head.cmd &
453 msg->state->func(state, msg);
454 kdmsg_state_cleanuprx(msg);
455 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
458 if (state->func == NULL) {
459 state->flags &= ~KDMSG_STATE_INSERTED;
460 RB_REMOVE(kdmsg_state_tree,
461 &iocom->statewr_tree, state);
462 kdmsg_state_free(state);
467 kdmsg_drain_msgq(iocom);
469 panic("kdmsg: comm thread shutdown couldn't drain");
470 if (RB_ROOT(&iocom->statewr_tree))
473 if ((state = iocom->freewr_state) != NULL) {
474 iocom->freewr_state = NULL;
475 kdmsg_state_free(state);
478 lockmgr(&iocom->msglk, LK_RELEASE);
481 * The state trees had better be empty now
483 KKASSERT(RB_EMPTY(&iocom->staterd_tree));
484 KKASSERT(RB_EMPTY(&iocom->statewr_tree));
485 KKASSERT(iocom->conn_state == NULL);
488 * iocom can be ripped out from under us once msgwr_td is set to NULL.
489 * The wakeup is safe.
491 iocom->msgwr_td = NULL;
497 * This cleans out the pending transmit message queue, adjusting any
498 * persistent states properly in the process.
500 * Caller must hold pmp->iocom.msglk
503 kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
508 * Clean out our pending transmit queue, executing the
509 * appropriate state adjustments. If this tries to open
510 * any new outgoing transactions we have to loop up and
513 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
514 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
515 lockmgr(&iocom->msglk, LK_RELEASE);
516 if (msg->state && msg->state->msg == msg)
517 msg->state->msg = NULL;
518 if (kdmsg_state_msgtx(msg))
521 kdmsg_state_cleanuptx(msg);
522 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
527 * Process state tracking for a message after reception, prior to
530 * Called with msglk held and the msg dequeued.
532 * All messages are called with dummy state and return actual state.
533 * (One-off messages often just return the same dummy state).
535 * May request that caller discard the message by setting *discardp to 1.
536 * The returned state is not used in this case and is allowed to be NULL.
540 * These routines handle persistent and command/reply message state via the
541 * CREATE and DELETE flags. The first message in a command or reply sequence
542 * sets CREATE, the last message in a command or reply sequence sets DELETE.
544 * There can be any number of intermediate messages belonging to the same
545 * sequence sent inbetween the CREATE message and the DELETE message,
546 * which set neither flag. This represents a streaming command or reply.
548 * Any command message received with CREATE set expects a reply sequence to
549 * be returned. Reply sequences work the same as command sequences except the
550 * REPLY bit is also sent. Both the command side and reply side can
551 * degenerate into a single message with both CREATE and DELETE set. Note
552 * that one side can be streaming and the other side not, or neither, or both.
554 * The msgid is unique for the initiator. That is, two sides sending a new
555 * message can use the same msgid without colliding.
559 * ABORT sequences work by setting the ABORT flag along with normal message
560 * state. However, ABORTs can also be sent on half-closed messages, that is
561 * even if the command or reply side has already sent a DELETE, as long as
562 * the message has not been fully closed it can still send an ABORT+DELETE
563 * to terminate the half-closed message state.
565 * Since ABORT+DELETEs can race we silently discard ABORT's for message
566 * state which has already been fully closed. REPLY+ABORT+DELETEs can
567 * also race, and in this situation the other side might have already
568 * initiated a new unrelated command with the same message id. Since
569 * the abort has not set the CREATE flag the situation can be detected
570 * and the message will also be discarded.
572 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
573 * The ABORT request is essentially integrated into the command instead
574 * of being sent later on. In this situation the command implementation
575 * detects that CREATE and ABORT are both set (vs ABORT alone) and can
576 * special-case non-blocking operation for the command.
578 * NOTE! Messages with ABORT set without CREATE or DELETE are considered
579 * to be mid-stream aborts for command/reply sequences. ABORTs on
580 * one-way messages are not supported.
582 * NOTE! If a command sequence does not support aborts the ABORT flag is
587 * One-off messages (no reply expected) are sent with neither CREATE or DELETE
588 * set. One-off messages cannot be aborted and typically aren't processed
589 * by these routines. The REPLY bit can be used to distinguish whether a
590 * one-off message is a command or reply. For example, one-off replies
591 * will typically just contain status updates.
594 kdmsg_state_msgrx(kdmsg_msg_t *msg)
596 kdmsg_iocom_t *iocom;
597 kdmsg_state_t *state;
600 iocom = msg->router->iocom;
603 * XXX resolve msg->any.head.source and msg->any.head.target
604 * into LNK_SPAN references.
606 * XXX replace msg->router
610 * Make sure a state structure is ready to go in case we need a new
611 * one. This is the only routine which uses freerd_state so no
612 * races are possible.
614 if ((state = iocom->freerd_state) == NULL) {
615 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
616 state->flags = KDMSG_STATE_DYNAMIC;
617 iocom->freerd_state = state;
621 * Lock RB tree and locate existing persistent state, if any.
623 * If received msg is a command state is on staterd_tree.
624 * If received msg is a reply state is on statewr_tree.
626 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
628 state->msgid = msg->any.head.msgid;
629 state->router = &iocom->router;
630 kprintf("received msg %08x msgid %jx source=%jx target=%jx\n",
632 (intmax_t)msg->any.head.msgid,
633 (intmax_t)msg->any.head.source,
634 (intmax_t)msg->any.head.target);
635 if (msg->any.head.cmd & DMSGF_REPLY)
636 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
638 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
642 * Short-cut one-off or mid-stream messages (state may be NULL).
644 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
645 DMSGF_ABORT)) == 0) {
646 lockmgr(&iocom->msglk, LK_RELEASE);
651 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
652 * inside the case statements.
654 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY)) {
656 case DMSGF_CREATE | DMSGF_DELETE:
658 * New persistant command received.
661 kprintf("kdmsg_state_msgrx: duplicate transaction\n");
665 state = iocom->freerd_state;
666 iocom->freerd_state = NULL;
668 state->router = msg->router;
670 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
671 state->txcmd = DMSGF_REPLY;
672 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
673 state->flags |= KDMSG_STATE_INSERTED;
678 * Persistent state is expected but might not exist if an
679 * ABORT+DELETE races the close.
682 if (msg->any.head.cmd & DMSGF_ABORT) {
685 kprintf("kdmsg_state_msgrx: no state "
693 * Handle another ABORT+DELETE case if the msgid has already
696 if ((state->rxcmd & DMSGF_CREATE) == 0) {
697 if (msg->any.head.cmd & DMSGF_ABORT) {
700 kprintf("kdmsg_state_msgrx: state reused "
710 * Check for mid-stream ABORT command received, otherwise
713 if (msg->any.head.cmd & DMSGF_ABORT) {
715 (state->rxcmd & DMSGF_CREATE) == 0) {
722 case DMSGF_REPLY | DMSGF_CREATE:
723 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
725 * When receiving a reply with CREATE set the original
726 * persistent state message should already exist.
729 kprintf("kdmsg_state_msgrx: no state match for "
730 "REPLY cmd=%08x msgid=%016jx\n",
732 (intmax_t)msg->any.head.msgid);
736 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
739 case DMSGF_REPLY | DMSGF_DELETE:
741 * Received REPLY+ABORT+DELETE in case where msgid has
742 * already been fully closed, ignore the message.
745 if (msg->any.head.cmd & DMSGF_ABORT) {
748 kprintf("kdmsg_state_msgrx: no state match "
749 "for REPLY|DELETE\n");
756 * Received REPLY+ABORT+DELETE in case where msgid has
757 * already been reused for an unrelated message,
758 * ignore the message.
760 if ((state->rxcmd & DMSGF_CREATE) == 0) {
761 if (msg->any.head.cmd & DMSGF_ABORT) {
764 kprintf("kdmsg_state_msgrx: state reused "
765 "for REPLY|DELETE\n");
774 * Check for mid-stream ABORT reply received to sent command.
776 if (msg->any.head.cmd & DMSGF_ABORT) {
778 (state->rxcmd & DMSGF_CREATE) == 0) {
786 lockmgr(&iocom->msglk, LK_RELEASE);
791 kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
793 kdmsg_iocom_t *iocom;
794 kdmsg_state_t *state;
796 iocom = msg->router->iocom;
798 if ((state = msg->state) == NULL) {
800 } else if (msg->any.head.cmd & DMSGF_DELETE) {
801 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
802 state->rxcmd |= DMSGF_DELETE;
803 if (state->txcmd & DMSGF_DELETE) {
804 if (state->msg == msg)
806 KKASSERT(state->flags & KDMSG_STATE_INSERTED);
807 if (state->rxcmd & DMSGF_REPLY) {
808 KKASSERT(msg->any.head.cmd &
810 RB_REMOVE(kdmsg_state_tree,
811 &iocom->statewr_tree, state);
813 KKASSERT((msg->any.head.cmd &
815 RB_REMOVE(kdmsg_state_tree,
816 &iocom->staterd_tree, state);
818 state->flags &= ~KDMSG_STATE_INSERTED;
819 lockmgr(&iocom->msglk, LK_RELEASE);
820 kdmsg_state_free(state);
822 lockmgr(&iocom->msglk, LK_RELEASE);
825 } else if (state->msg != msg) {
831 * Process state tracking for a message prior to transmission.
833 * Called with msglk held and the msg dequeued.
835 * One-off messages are usually with dummy state and msg->state may be NULL
838 * New transactions (when CREATE is set) will insert the state.
840 * May request that caller discard the message by setting *discardp to 1.
841 * A NULL state may be returned in this case.
844 kdmsg_state_msgtx(kdmsg_msg_t *msg)
846 kdmsg_iocom_t *iocom;
847 kdmsg_state_t *state;
850 iocom = msg->router->iocom;
853 * Make sure a state structure is ready to go in case we need a new
854 * one. This is the only routine which uses freewr_state so no
855 * races are possible.
857 if ((state = iocom->freewr_state) == NULL) {
858 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
859 state->flags = KDMSG_STATE_DYNAMIC;
860 iocom->freewr_state = state;
864 * Lock RB tree. If persistent state is present it will have already
865 * been assigned to msg.
867 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
871 * Short-cut one-off or mid-stream messages (state may be NULL).
873 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
874 DMSGF_ABORT)) == 0) {
875 lockmgr(&iocom->msglk, LK_RELEASE);
881 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
882 * inside the case statements.
884 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
887 case DMSGF_CREATE | DMSGF_DELETE:
889 * Insert the new persistent message state and mark
890 * half-closed if DELETE is set. Since this is a new
891 * message it isn't possible to transition into the fully
894 * XXX state must be assigned and inserted by
895 * kdmsg_msg_write(). txcmd is assigned by us
898 KKASSERT(state != NULL);
899 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
900 state->rxcmd = DMSGF_REPLY;
905 * Sent ABORT+DELETE in case where msgid has already
906 * been fully closed, ignore the message.
909 if (msg->any.head.cmd & DMSGF_ABORT) {
912 kprintf("kdmsg_state_msgtx: no state match "
913 "for DELETE cmd=%08x msgid=%016jx\n",
915 (intmax_t)msg->any.head.msgid);
922 * Sent ABORT+DELETE in case where msgid has
923 * already been reused for an unrelated message,
924 * ignore the message.
926 if ((state->txcmd & DMSGF_CREATE) == 0) {
927 if (msg->any.head.cmd & DMSGF_ABORT) {
930 kprintf("kdmsg_state_msgtx: state reused "
940 * Check for mid-stream ABORT command sent
942 if (msg->any.head.cmd & DMSGF_ABORT) {
944 (state->txcmd & DMSGF_CREATE) == 0) {
951 case DMSGF_REPLY | DMSGF_CREATE:
952 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
954 * When transmitting a reply with CREATE set the original
955 * persistent state message should already exist.
958 kprintf("kdmsg_state_msgtx: no state match "
959 "for REPLY | CREATE\n");
963 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
966 case DMSGF_REPLY | DMSGF_DELETE:
968 * When transmitting a reply with DELETE set the original
969 * persistent state message should already exist.
971 * This is very similar to the REPLY|CREATE|* case except
972 * txcmd is already stored, so we just add the DELETE flag.
974 * Sent REPLY+ABORT+DELETE in case where msgid has
975 * already been fully closed, ignore the message.
978 if (msg->any.head.cmd & DMSGF_ABORT) {
981 kprintf("kdmsg_state_msgtx: no state match "
982 "for REPLY | DELETE\n");
989 * Sent REPLY+ABORT+DELETE in case where msgid has already
990 * been reused for an unrelated message, ignore the message.
992 if ((state->txcmd & DMSGF_CREATE) == 0) {
993 if (msg->any.head.cmd & DMSGF_ABORT) {
996 kprintf("kdmsg_state_msgtx: state reused "
997 "for REPLY | DELETE\n");
1006 * Check for mid-stream ABORT reply sent.
1008 * One-off REPLY messages are allowed for e.g. status updates.
1010 if (msg->any.head.cmd & DMSGF_ABORT) {
1011 if (state == NULL ||
1012 (state->txcmd & DMSGF_CREATE) == 0) {
1020 lockmgr(&iocom->msglk, LK_RELEASE);
1025 kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
1027 kdmsg_iocom_t *iocom;
1028 kdmsg_state_t *state;
1030 iocom = msg->router->iocom;
1032 if ((state = msg->state) == NULL) {
1033 kdmsg_msg_free(msg);
1034 } else if (msg->any.head.cmd & DMSGF_DELETE) {
1035 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1036 state->txcmd |= DMSGF_DELETE;
1037 if (state->rxcmd & DMSGF_DELETE) {
1038 if (state->msg == msg)
1040 KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1041 if (state->txcmd & DMSGF_REPLY) {
1042 KKASSERT(msg->any.head.cmd &
1044 RB_REMOVE(kdmsg_state_tree,
1045 &iocom->staterd_tree, state);
1047 KKASSERT((msg->any.head.cmd &
1049 RB_REMOVE(kdmsg_state_tree,
1050 &iocom->statewr_tree, state);
1052 state->flags &= ~KDMSG_STATE_INSERTED;
1053 lockmgr(&iocom->msglk, LK_RELEASE);
1054 kdmsg_state_free(state);
1056 lockmgr(&iocom->msglk, LK_RELEASE);
1058 kdmsg_msg_free(msg);
1059 } else if (state->msg != msg) {
1060 kdmsg_msg_free(msg);
1065 kdmsg_state_free(kdmsg_state_t *state)
1067 kdmsg_iocom_t *iocom;
1070 iocom = state->router->iocom;
1072 KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
1075 kfree(state, iocom->mmsg);
1077 kdmsg_msg_free(msg);
1081 kdmsg_msg_alloc(kdmsg_router_t *router, uint32_t cmd,
1082 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1084 kdmsg_iocom_t *iocom;
1086 kdmsg_state_t *state;
1089 iocom = router->iocom;
1090 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1091 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1092 iocom->mmsg, M_WAITOK | M_ZERO);
1093 msg->hdr_size = hbytes;
1094 msg->router = router;
1095 KKASSERT(router != NULL);
1096 msg->any.head.magic = DMSG_HDR_MAGIC;
1097 msg->any.head.source = 0;
1098 msg->any.head.target = router->target;
1099 msg->any.head.cmd = cmd;
1101 if (cmd & DMSGF_CREATE) {
1103 * New transaction, requires tracking state and a unique
1104 * msgid to be allocated.
1106 KKASSERT(msg->state == NULL);
1107 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1108 state->flags = KDMSG_STATE_DYNAMIC;
1110 state->any.any = data;
1112 state->msgid = (uint64_t)(uintptr_t)state;
1113 state->router = msg->router;
1115 msg->any.head.source = 0;
1116 msg->any.head.target = state->router->target;
1117 msg->any.head.msgid = state->msgid;
1119 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1120 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
1121 panic("duplicate msgid allocated");
1122 state->flags |= KDMSG_STATE_INSERTED;
1123 msg->any.head.msgid = state->msgid;
1124 lockmgr(&iocom->msglk, LK_RELEASE);
1131 kdmsg_msg_free(kdmsg_msg_t *msg)
1133 kdmsg_iocom_t *iocom;
1135 iocom = msg->router->iocom;
1137 if (msg->aux_data && msg->aux_size) {
1138 kfree(msg->aux_data, iocom->mmsg);
1139 msg->aux_data = NULL;
1143 kfree(msg, iocom->mmsg);
1147 * Indexed messages are stored in a red-black tree indexed by their
1148 * msgid. Only persistent messages are indexed.
1151 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
1153 if (state1->router < state2->router)
1155 if (state1->router > state2->router)
1157 if (state1->msgid < state2->msgid)
1159 if (state1->msgid > state2->msgid)
1165 * Write a message. All requisit command flags have been set.
1167 * If msg->state is non-NULL the message is written to the existing
1168 * transaction. msgid will be set accordingly.
1170 * If msg->state is NULL and CREATE is set new state is allocated and
1171 * (func, data) is installed. A msgid is assigned.
1173 * If msg->state is NULL and CREATE is not set the message is assumed
1174 * to be a one-way message. The originator must assign the msgid
1175 * (or leave it 0, which is typical.
1177 * This function merely queues the message to the management thread, it
1178 * does not write to the message socket/pipe.
1181 kdmsg_msg_write(kdmsg_msg_t *msg)
1183 kdmsg_iocom_t *iocom;
1184 kdmsg_state_t *state;
1186 iocom = msg->router->iocom;
1190 * Continuance or termination of existing transaction.
1191 * The transaction could have been initiated by either end.
1193 * (Function callback and aux data for the receive side can
1194 * be replaced or left alone).
1197 msg->any.head.msgid = state->msgid;
1198 msg->any.head.source = 0;
1199 msg->any.head.target = state->router->target;
1200 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1203 * One-off message (always uses msgid 0 to distinguish
1204 * between a possibly lost in-transaction message due to
1205 * competing aborts and a real one-off message?)
1207 msg->any.head.msgid = 0;
1208 msg->any.head.source = 0;
1209 msg->any.head.target = msg->router->target;
1210 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1214 * Finish up the msg fields
1216 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
1219 msg->any.head.hdr_crc = 0;
1220 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
1222 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
1223 iocom->clusterctl_wakeup(iocom);
1224 lockmgr(&iocom->msglk, LK_RELEASE);
1228 * Reply to a message and terminate our side of the transaction.
1230 * If msg->state is non-NULL we are replying to a one-way message.
1233 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
1235 kdmsg_state_t *state = msg->state;
1240 * Reply with a simple error code and terminate the transaction.
1242 cmd = DMSG_LNK_ERROR;
1245 * Check if our direction has even been initiated yet, set CREATE.
1247 * Check what direction this is (command or reply direction). Note
1248 * that txcmd might not have been initiated yet.
1250 * If our direction has already been closed we just return without
1254 if (state->txcmd & DMSGF_DELETE)
1256 if ((state->txcmd & DMSGF_CREATE) == 0)
1257 cmd |= DMSGF_CREATE;
1258 if (state->txcmd & DMSGF_REPLY)
1260 cmd |= DMSGF_DELETE;
1262 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1265 kprintf("MSG_REPLY state=%p msg %08x\n", state, cmd);
1267 /* XXX messy mask cmd to avoid allocating state */
1268 nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1270 nmsg->any.head.cmd = cmd;
1271 nmsg->any.head.error = error;
1272 nmsg->state = state;
1273 kdmsg_msg_write(nmsg);
1277 * Reply to a message and continue our side of the transaction.
1279 * If msg->state is non-NULL we are replying to a one-way message and this
1280 * function degenerates into the same as kdmsg_msg_reply().
1283 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
1285 kdmsg_state_t *state = msg->state;
1290 * Return a simple result code, do NOT terminate the transaction.
1292 cmd = DMSG_LNK_ERROR;
1295 * Check if our direction has even been initiated yet, set CREATE.
1297 * Check what direction this is (command or reply direction). Note
1298 * that txcmd might not have been initiated yet.
1300 * If our direction has already been closed we just return without
1304 if (state->txcmd & DMSGF_DELETE)
1306 if ((state->txcmd & DMSGF_CREATE) == 0)
1307 cmd |= DMSGF_CREATE;
1308 if (state->txcmd & DMSGF_REPLY)
1310 /* continuing transaction, do not set MSGF_DELETE */
1312 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1316 /* XXX messy mask cmd to avoid allocating state */
1317 nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1319 nmsg->any.head.cmd = cmd;
1320 nmsg->any.head.error = error;
1321 nmsg->state = state;
1322 kdmsg_msg_write(nmsg);