2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * TODO: txcmd CREATE state is deferred by txmsgq, need to calculate
36 * a streaming response. See subr_diskiocom()'s diskiodone().
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/kernel.h>
42 #include <sys/systm.h>
43 #include <sys/queue.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/socket.h>
48 #include <sys/vnode.h>
52 #include <sys/thread.h>
53 #include <sys/globaldata.h>
54 #include <sys/limits.h>
58 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
59 RB_GENERATE(kdmsg_circuit_tree, kdmsg_circuit, rbnode, kdmsg_circuit_cmp);
61 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg);
62 static int kdmsg_circ_msgrx(kdmsg_msg_t *msg);
63 static int kdmsg_state_msgrx(kdmsg_msg_t *msg);
64 static int kdmsg_state_msgtx(kdmsg_msg_t *msg);
65 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg);
66 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg);
67 static void kdmsg_state_abort(kdmsg_state_t *state);
68 static void kdmsg_state_free(kdmsg_state_t *state);
70 static void kdmsg_iocom_thread_rd(void *arg);
71 static void kdmsg_iocom_thread_wr(void *arg);
72 static int kdmsg_autorxmsg(kdmsg_msg_t *msg);
73 static void kdmsg_autocirc(kdmsg_msg_t *msg);
74 static int kdmsg_autocirc_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
76 static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);
79 kdmsg_circ_hold(kdmsg_circuit_t *circ)
81 atomic_add_int(&circ->refs, 1);
85 kdmsg_circ_drop(kdmsg_circuit_t *circ)
89 if (atomic_fetchadd_int(&circ->refs, -1) == 1) {
90 KKASSERT(circ->span_state == NULL &&
91 circ->circ_state == NULL &&
92 circ->rcirc_state == NULL &&
96 kfree(circ, iocom->mmsg);
102 * Initialize the roll-up communications structure for a network
103 * messaging session. This function does not install the socket.
106 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags,
107 struct malloc_type *mmsg,
108 int (*rcvmsg)(kdmsg_msg_t *msg))
110 bzero(iocom, sizeof(*iocom));
111 iocom->handle = handle;
113 iocom->rcvmsg = rcvmsg;
114 iocom->flags = flags;
115 lockinit(&iocom->msglk, "h2msg", 0, 0);
116 TAILQ_INIT(&iocom->msgq);
117 RB_INIT(&iocom->circ_tree);
118 RB_INIT(&iocom->staterd_tree);
119 RB_INIT(&iocom->statewr_tree);
123 * [Re]connect using the passed file pointer. The caller must ref the
124 * fp for us. We own that ref now.
127 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
128 const char *subsysname)
131 * Destroy the current connection
133 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
134 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
135 while (iocom->msgrd_td || iocom->msgwr_td) {
136 wakeup(&iocom->msg_ctl);
137 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz);
141 * Drop communications descriptor
144 fdrop(iocom->msg_fp);
145 iocom->msg_fp = NULL;
149 * Setup new communications descriptor
154 iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC;
156 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
157 NULL, 0, -1, "%s-msgrd", subsysname);
158 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
159 NULL, 0, -1, "%s-msgwr", subsysname);
160 lockmgr(&iocom->msglk, LK_RELEASE);
164 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls
165 * this function to handle the state machine for LNK_CONN and LNK_SPAN.
167 * NOTE: Caller typically also sets the IOCOMF_AUTOCONN, IOCOMF_AUTOSPAN,
168 * and IOCOMF_AUTOCIRC in the kdmsg_iocom_init() call. Clients
169 * typically set IOCOMF_AUTOFORGE to automatically forged circuits
170 * for received SPANs.
172 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
173 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
176 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom,
177 void (*auto_callback)(kdmsg_msg_t *msg))
181 iocom->auto_callback = auto_callback;
183 msg = kdmsg_msg_alloc(iocom, NULL,
184 DMSG_LNK_CONN | DMSGF_CREATE,
185 kdmsg_lnk_conn_reply, NULL);
186 iocom->auto_lnk_conn.head = msg->any.head;
187 msg->any.lnk_conn = iocom->auto_lnk_conn;
188 iocom->conn_state = msg->state;
189 kdmsg_msg_write(msg);
194 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
196 kdmsg_iocom_t *iocom = state->iocom;
199 if (msg->any.head.cmd & DMSGF_CREATE) {
200 rmsg = kdmsg_msg_alloc(iocom, NULL,
201 DMSG_LNK_SPAN | DMSGF_CREATE,
202 kdmsg_lnk_span_reply, NULL);
203 iocom->auto_lnk_span.head = rmsg->any.head;
204 rmsg->any.lnk_span = iocom->auto_lnk_span;
205 kdmsg_msg_write(rmsg);
209 * Process shim after the CONN is acknowledged and before the CONN
210 * transaction is deleted. For deletions this gives device drivers
211 * the ability to interlock new operations on the circuit before
212 * it becomes illegal and panics.
214 if (iocom->auto_callback)
215 iocom->auto_callback(msg);
217 if ((state->txcmd & DMSGF_DELETE) == 0 &&
218 (msg->any.head.cmd & DMSGF_DELETE)) {
219 iocom->conn_state = NULL;
220 kdmsg_msg_reply(msg, 0);
228 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
231 * Be sure to process shim before terminating the SPAN
232 * transaction. Gives device drivers the ability to
233 * interlock new operations on the circuit before it
234 * becomes illegal and panics.
236 if (state->iocom->auto_callback)
237 state->iocom->auto_callback(msg);
239 if ((state->txcmd & DMSGF_DELETE) == 0 &&
240 (msg->any.head.cmd & DMSGF_DELETE)) {
241 kdmsg_msg_reply(msg, 0);
247 * Disconnect and clean up
250 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom)
253 * Ask the cluster controller to go away
255 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
256 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
258 while (iocom->msgrd_td || iocom->msgwr_td) {
259 wakeup(&iocom->msg_ctl);
260 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz);
264 * Drop communications descriptor
267 fdrop(iocom->msg_fp);
268 iocom->msg_fp = NULL;
270 lockmgr(&iocom->msglk, LK_RELEASE);
274 * Cluster controller thread. Perform messaging functions. We have one
275 * thread for the reader and one for the writer. The writer handles
276 * shutdown requests (which should break the reader thread).
280 kdmsg_iocom_thread_rd(void *arg)
282 kdmsg_iocom_t *iocom = arg;
284 kdmsg_msg_t *msg = NULL;
285 kdmsg_state_t *state;
290 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
292 * Retrieve the message from the pipe or socket.
294 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
295 NULL, 1, UIO_SYSSPACE);
298 if (hdr.magic != DMSG_HDR_MAGIC) {
299 kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
303 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
304 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
305 kprintf("kdmsg: bad header size %zd\n", hbytes);
309 /* XXX messy: mask cmd to avoid allocating state */
310 msg = kdmsg_msg_alloc(iocom, NULL,
311 hdr.cmd & DMSGF_BASECMDMASK,
314 msg->hdr_size = hbytes;
315 if (hbytes > sizeof(hdr)) {
316 error = fp_read(iocom->msg_fp, &msg->any.head + 1,
317 hbytes - sizeof(hdr),
318 NULL, 1, UIO_SYSSPACE);
320 kprintf("kdmsg: short msg received\n");
325 msg->aux_size = hdr.aux_bytes;
326 if (msg->aux_size > DMSG_AUX_MAX) {
327 kprintf("kdmsg: illegal msg payload size %zd\n",
333 abytes = DMSG_DOALIGN(msg->aux_size);
334 msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK);
335 msg->flags |= KDMSG_FLAG_AUXALLOC;
336 error = fp_read(iocom->msg_fp, msg->aux_data,
337 abytes, NULL, 1, UIO_SYSSPACE);
339 kprintf("kdmsg: short msg payload received\n");
344 (void)kdmsg_circ_msgrx(msg);
345 error = kdmsg_msg_receive_handling(msg);
350 kprintf("kdmsg: read failed error %d\n", error);
352 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
356 if ((state = iocom->freerd_state) != NULL) {
357 iocom->freerd_state = NULL;
358 kdmsg_state_free(state);
362 * Shutdown the socket before waiting for the transmit side.
364 * If we are dying due to e.g. a socket disconnect verses being
365 * killed explicity we have to set KILL in order to kick the tx
366 * side when it might not have any other work to do. KILL might
367 * already be set if we are in an unmount or reconnect.
369 fp_shutdown(iocom->msg_fp, SHUT_RDWR);
371 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
372 wakeup(&iocom->msg_ctl);
375 * Wait for the transmit side to drain remaining messages
376 * before cleaning up the rx state. The transmit side will
377 * set KILLTX and wait for the rx side to completely finish
378 * (set msgrd_td to NULL) before cleaning up any remaining
381 lockmgr(&iocom->msglk, LK_RELEASE);
382 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
383 wakeup(&iocom->msg_ctl);
384 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
385 wakeup(&iocom->msg_ctl);
386 tsleep(iocom, 0, "clstrkw", hz);
389 iocom->msgrd_td = NULL;
392 * iocom can be ripped out from under us at this point but
401 kdmsg_iocom_thread_wr(void *arg)
403 kdmsg_iocom_t *iocom = arg;
405 kdmsg_state_t *state;
415 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
417 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
419 * Sleep if no messages pending. Interlock with flag while
422 if (TAILQ_EMPTY(&iocom->msgq)) {
423 atomic_set_int(&iocom->msg_ctl,
424 KDMSG_CLUSTERCTL_SLEEPING);
425 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
426 atomic_clear_int(&iocom->msg_ctl,
427 KDMSG_CLUSTERCTL_SLEEPING);
430 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
432 * Remove msg from the transmit queue and do
433 * persist and half-closed state handling.
435 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
436 lockmgr(&iocom->msglk, LK_RELEASE);
438 error = kdmsg_state_msgtx(msg);
439 if (error == EALREADY) {
442 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
447 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
452 * Dump the message to the pipe or socket.
454 * We have to clean up the message as if the transmit
455 * succeeded even if it failed.
457 error = fp_write(iocom->msg_fp, &msg->any,
458 msg->hdr_size, &res, UIO_SYSSPACE);
459 if (error || res != msg->hdr_size) {
462 kdmsg_state_cleanuptx(msg);
463 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
467 abytes = DMSG_DOALIGN(msg->aux_size);
468 error = fp_write(iocom->msg_fp,
469 msg->aux_data, abytes,
471 if (error || res != abytes) {
474 kdmsg_state_cleanuptx(msg);
475 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
479 kdmsg_state_cleanuptx(msg);
480 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
485 * Cleanup messages pending transmission and release msgq lock.
488 kprintf("kdmsg: write failed error %d\n", error);
489 kprintf("thread_wr: Terminating iocom\n");
492 * Shutdown the socket. This will cause the rx thread to get an
493 * EOF and ensure that both threads get to a termination state.
495 fp_shutdown(iocom->msg_fp, SHUT_RDWR);
498 * Set KILLTX (which the rx side waits for), then wait for the RX
499 * side to completely finish before we clean out any remaining
502 lockmgr(&iocom->msglk, LK_RELEASE);
503 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
504 wakeup(&iocom->msg_ctl);
505 while (iocom->msgrd_td) {
506 wakeup(&iocom->msg_ctl);
507 tsleep(iocom, 0, "clstrkw", hz);
509 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
512 * Simulate received MSGF_DELETE's for any remaining states.
513 * (For remote masters).
515 * Drain the message queue to handle any device initiated writes
516 * due to state callbacks.
519 kdmsg_drain_msgq(iocom);
520 RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
521 if ((state->rxcmd & DMSGF_DELETE) == 0) {
522 lockmgr(&iocom->msglk, LK_RELEASE);
523 kdmsg_state_abort(state);
524 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
530 * Simulate received MSGF_DELETE's for any remaining states.
531 * (For local masters).
534 kdmsg_drain_msgq(iocom);
535 RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
536 if ((state->rxcmd & DMSGF_DELETE) == 0) {
537 lockmgr(&iocom->msglk, LK_RELEASE);
538 kdmsg_state_abort(state);
539 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
545 * Retry until all work is done
548 panic("kdmsg: comm thread shutdown couldn't drain");
549 if (TAILQ_FIRST(&iocom->msgq) ||
550 RB_ROOT(&iocom->staterd_tree) ||
551 RB_ROOT(&iocom->statewr_tree)) {
554 iocom->flags |= KDMSG_IOCOMF_EXITNOACC;
556 if ((state = iocom->freewr_state) != NULL) {
557 iocom->freewr_state = NULL;
558 kdmsg_state_free(state);
561 lockmgr(&iocom->msglk, LK_RELEASE);
564 * The state trees had better be empty now
566 KKASSERT(RB_EMPTY(&iocom->staterd_tree));
567 KKASSERT(RB_EMPTY(&iocom->statewr_tree));
568 KKASSERT(iocom->conn_state == NULL);
570 if (iocom->exit_func) {
572 * iocom is invalid after we call the exit function.
574 iocom->msgwr_td = NULL;
575 iocom->exit_func(iocom);
578 * iocom can be ripped out from under us once msgwr_td is
579 * set to NULL. The wakeup is safe.
581 iocom->msgwr_td = NULL;
588 * This cleans out the pending transmit message queue, adjusting any
589 * persistent states properly in the process.
591 * Caller must hold pmp->iocom.msglk
594 kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
599 * Clean out our pending transmit queue, executing the
600 * appropriate state adjustments. If this tries to open
601 * any new outgoing transactions we have to loop up and
604 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
605 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
606 lockmgr(&iocom->msglk, LK_RELEASE);
607 if (kdmsg_state_msgtx(msg))
610 kdmsg_state_cleanuptx(msg);
611 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
616 * Do all processing required to handle a freshly received message
617 * after its low level header has been validated.
621 kdmsg_msg_receive_handling(kdmsg_msg_t *msg)
623 kdmsg_iocom_t *iocom = msg->iocom;
627 * State machine tracking, state assignment for msg,
628 * returns error and discard status. Errors are fatal
629 * to the connection except for EALREADY which forces
630 * a discard without execution.
632 error = kdmsg_state_msgrx(msg);
635 * Raw protocol or connection error
638 if (error == EALREADY)
640 } else if (msg->state && msg->state->func) {
642 * Message related to state which already has a
643 * handling function installed for it.
645 error = msg->state->func(msg->state, msg);
646 kdmsg_state_cleanuprx(msg);
647 } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) {
648 error = kdmsg_autorxmsg(msg);
649 kdmsg_state_cleanuprx(msg);
651 error = iocom->rcvmsg(msg);
652 kdmsg_state_cleanuprx(msg);
658 * Process circuit tracking (NEEDS WORK)
662 kdmsg_circ_msgrx(kdmsg_msg_t *msg)
664 kdmsg_circuit_t dummy;
665 kdmsg_circuit_t *circ;
668 if (msg->any.head.circuit) {
669 dummy.msgid = msg->any.head.circuit;
670 lwkt_gettoken(&kdmsg_token);
671 circ = RB_FIND(kdmsg_circuit_tree, &msg->iocom->circ_tree,
675 kdmsg_circ_hold(circ);
678 kprintf("KDMSG_CIRC_MSGRX CMD %08x: IOCOM %p "
679 "Bad circuit %016jx\n",
682 (intmax_t)msg->any.head.circuit);
683 kprintf("KDMSG_CIRC_MSGRX: Avail circuits: ");
684 RB_FOREACH(circ, kdmsg_circuit_tree,
685 &msg->iocom->circ_tree) {
686 kprintf(" %016jx", (intmax_t)circ->msgid);
691 lwkt_reltoken(&kdmsg_token);
697 * Process state tracking for a message after reception, prior to
700 * Called with msglk held and the msg dequeued.
702 * All messages are called with dummy state and return actual state.
703 * (One-off messages often just return the same dummy state).
705 * May request that caller discard the message by setting *discardp to 1.
706 * The returned state is not used in this case and is allowed to be NULL.
710 * These routines handle persistent and command/reply message state via the
711 * CREATE and DELETE flags. The first message in a command or reply sequence
712 * sets CREATE, the last message in a command or reply sequence sets DELETE.
714 * There can be any number of intermediate messages belonging to the same
715 * sequence sent inbetween the CREATE message and the DELETE message,
716 * which set neither flag. This represents a streaming command or reply.
718 * Any command message received with CREATE set expects a reply sequence to
719 * be returned. Reply sequences work the same as command sequences except the
720 * REPLY bit is also sent. Both the command side and reply side can
721 * degenerate into a single message with both CREATE and DELETE set. Note
722 * that one side can be streaming and the other side not, or neither, or both.
724 * The msgid is unique for the initiator. That is, two sides sending a new
725 * message can use the same msgid without colliding.
729 * ABORT sequences work by setting the ABORT flag along with normal message
730 * state. However, ABORTs can also be sent on half-closed messages, that is
731 * even if the command or reply side has already sent a DELETE, as long as
732 * the message has not been fully closed it can still send an ABORT+DELETE
733 * to terminate the half-closed message state.
735 * Since ABORT+DELETEs can race we silently discard ABORT's for message
736 * state which has already been fully closed. REPLY+ABORT+DELETEs can
737 * also race, and in this situation the other side might have already
738 * initiated a new unrelated command with the same message id. Since
739 * the abort has not set the CREATE flag the situation can be detected
740 * and the message will also be discarded.
742 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
743 * The ABORT request is essentially integrated into the command instead
744 * of being sent later on. In this situation the command implementation
745 * detects that CREATE and ABORT are both set (vs ABORT alone) and can
746 * special-case non-blocking operation for the command.
748 * NOTE! Messages with ABORT set without CREATE or DELETE are considered
749 * to be mid-stream aborts for command/reply sequences. ABORTs on
750 * one-way messages are not supported.
752 * NOTE! If a command sequence does not support aborts the ABORT flag is
757 * One-off messages (no reply expected) are sent with neither CREATE or DELETE
758 * set. One-off messages cannot be aborted and typically aren't processed
759 * by these routines. The REPLY bit can be used to distinguish whether a
760 * one-off message is a command or reply. For example, one-off replies
761 * will typically just contain status updates.
765 kdmsg_state_msgrx(kdmsg_msg_t *msg)
767 kdmsg_iocom_t *iocom = msg->iocom;
768 kdmsg_state_t *state;
772 * Make sure a state structure is ready to go in case we need a new
773 * one. This is the only routine which uses freerd_state so no
774 * races are possible.
776 if ((state = iocom->freerd_state) == NULL) {
777 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
778 state->flags = KDMSG_STATE_DYNAMIC;
779 iocom->freerd_state = state;
783 * Lock RB tree and locate existing persistent state, if any.
785 * If received msg is a command state is on staterd_tree.
786 * If received msg is a reply state is on statewr_tree.
788 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
790 state->msgid = msg->any.head.msgid;
791 state->circ = msg->circ;
792 state->iocom = iocom;
793 if (msg->any.head.cmd & DMSGF_REPLY)
794 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
796 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
800 * Short-cut one-off or mid-stream messages (state may be NULL).
802 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
803 DMSGF_ABORT)) == 0) {
804 lockmgr(&iocom->msglk, LK_RELEASE);
809 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
810 * inside the case statements.
812 switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) {
814 case DMSGF_CREATE | DMSGF_DELETE:
816 * New persistant command received.
819 kprintf("kdmsg_state_msgrx: duplicate transaction\n");
823 state = iocom->freerd_state;
824 iocom->freerd_state = NULL;
827 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK;
828 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
829 state->txcmd = DMSGF_REPLY;
830 state->msgid = msg->any.head.msgid;
831 if ((state->circ = msg->circ) != NULL)
832 kdmsg_circ_hold(state->circ);
833 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
834 state->flags |= KDMSG_STATE_INSERTED;
839 * Persistent state is expected but might not exist if an
840 * ABORT+DELETE races the close.
843 if (msg->any.head.cmd & DMSGF_ABORT) {
846 kprintf("kdmsg_state_msgrx: "
847 "no state for DELETE\n");
854 * Handle another ABORT+DELETE case if the msgid has already
857 if ((state->rxcmd & DMSGF_CREATE) == 0) {
858 if (msg->any.head.cmd & DMSGF_ABORT) {
861 kprintf("kdmsg_state_msgrx: "
862 "state reused for DELETE\n");
871 * Check for mid-stream ABORT command received, otherwise
874 if (msg->any.head.cmd & DMSGF_ABORT) {
876 (state->rxcmd & DMSGF_CREATE) == 0) {
883 case DMSGF_REPLY | DMSGF_CREATE:
884 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
886 * When receiving a reply with CREATE set the original
887 * persistent state message should already exist.
890 kprintf("kdmsg_state_msgrx: no state match for "
891 "REPLY cmd=%08x msgid=%016jx\n",
893 (intmax_t)msg->any.head.msgid);
897 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
900 case DMSGF_REPLY | DMSGF_DELETE:
902 * Received REPLY+ABORT+DELETE in case where msgid has
903 * already been fully closed, ignore the message.
906 if (msg->any.head.cmd & DMSGF_ABORT) {
909 kprintf("kdmsg_state_msgrx: no state match "
910 "for REPLY|DELETE\n");
917 * Received REPLY+ABORT+DELETE in case where msgid has
918 * already been reused for an unrelated message,
919 * ignore the message.
921 if ((state->rxcmd & DMSGF_CREATE) == 0) {
922 if (msg->any.head.cmd & DMSGF_ABORT) {
925 kprintf("kdmsg_state_msgrx: state reused "
926 "for REPLY|DELETE\n");
935 * Check for mid-stream ABORT reply received to sent command.
937 if (msg->any.head.cmd & DMSGF_ABORT) {
939 (state->rxcmd & DMSGF_CREATE) == 0) {
947 lockmgr(&iocom->msglk, LK_RELEASE);
952 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set.
953 * This routine must call iocom->rcvmsg() for anything not automatically
957 kdmsg_autorxmsg(kdmsg_msg_t *msg)
959 kdmsg_iocom_t *iocom = msg->iocom;
960 kdmsg_circuit_t *circ;
965 * Process a combination of the transaction command and the message
966 * flags. For the purposes of this routine, the message command is
967 * only relevant when it initiates a transaction (where it is
970 cmd = (msg->state ? msg->state->icmd : msg->any.head.cmd) &
972 cmd |= msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY);
975 case DMSG_LNK_CONN | DMSGF_CREATE:
976 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE:
978 * Received LNK_CONN transaction. Transmit response and
979 * leave transaction open, which allows the other end to
980 * start to the SPAN protocol.
982 * Handle shim after acknowledging the CONN.
984 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
985 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) {
986 kdmsg_msg_result(msg, 0);
987 if (iocom->auto_callback)
988 iocom->auto_callback(msg);
990 error = iocom->rcvmsg(msg);
995 case DMSG_LNK_CONN | DMSGF_DELETE:
997 * This message is usually simulated after a link is lost
998 * to clean up the transaction.
1000 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) {
1001 if (iocom->auto_callback)
1002 iocom->auto_callback(msg);
1003 kdmsg_msg_reply(msg, 0);
1005 error = iocom->rcvmsg(msg);
1008 case DMSG_LNK_SPAN | DMSGF_CREATE:
1009 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE:
1011 * Received LNK_SPAN transaction. We do not have to respond
1012 * but we must leave the transaction open.
1014 * If AUTOCIRC is set automatically initiate a virtual circuit
1015 * to the received span. This will attach a kdmsg_circuit
1016 * to the SPAN state. The circuit is lost when the span is
1019 * Handle shim after acknowledging the SPAN.
1021 if (iocom->flags & KDMSG_IOCOMF_AUTOSPAN) {
1022 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
1023 if (iocom->flags & KDMSG_IOCOMF_AUTOFORGE)
1024 kdmsg_autocirc(msg);
1025 if (iocom->auto_callback)
1026 iocom->auto_callback(msg);
1031 error = iocom->rcvmsg(msg);
1035 case DMSG_LNK_SPAN | DMSGF_DELETE:
1037 * Process shims (auto_callback) before cleaning up the
1038 * circuit structure and closing the transactions. Device
1039 * driver should ensure that the circuit is not used after
1040 * the auto_callback() returns.
1042 * Handle shim before closing the SPAN transaction.
1044 if (iocom->flags & KDMSG_IOCOMF_AUTOSPAN) {
1045 if (iocom->auto_callback)
1046 iocom->auto_callback(msg);
1047 if (iocom->flags & KDMSG_IOCOMF_AUTOFORGE)
1048 kdmsg_autocirc(msg);
1049 kdmsg_msg_reply(msg, 0);
1051 error = iocom->rcvmsg(msg);
1054 case DMSG_LNK_CIRC | DMSGF_CREATE:
1055 case DMSG_LNK_CIRC | DMSGF_CREATE | DMSGF_DELETE:
1057 * Received LNK_CIRC transaction. We must respond and should
1058 * leave the transaction open, allowing the circuit. The
1059 * remote can start issuing commands to us over the circuit
1060 * even before we respond.
1062 if (iocom->flags & KDMSG_IOCOMF_AUTOCIRC) {
1063 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
1064 circ = kmalloc(sizeof(*circ), iocom->mmsg,
1066 lwkt_gettoken(&kdmsg_token);
1067 msg->state->any.circ = circ;
1068 circ->iocom = iocom;
1069 circ->rcirc_state = msg->state;
1070 kdmsg_circ_hold(circ); /* for rcirc_state */
1072 circ->msgid = circ->rcirc_state->msgid;
1073 /* XXX no span link for received circuits */
1074 kdmsg_circ_hold(circ); /* for circ_state */
1076 if (RB_INSERT(kdmsg_circuit_tree,
1077 &iocom->circ_tree, circ)) {
1078 panic("duplicate circuitid allocated");
1080 lwkt_reltoken(&kdmsg_token);
1081 kdmsg_msg_result(msg, 0);
1084 * Handle shim after adding the circuit and
1085 * after acknowledging the CIRC.
1087 if (iocom->auto_callback)
1088 iocom->auto_callback(msg);
1093 error = iocom->rcvmsg(msg);
1097 case DMSG_LNK_CIRC | DMSGF_DELETE:
1098 if (iocom->flags & KDMSG_IOCOMF_AUTOCIRC) {
1099 circ = msg->state->any.circ;
1104 * Handle shim before terminating the circuit.
1107 kprintf("KDMSG VC: RECEIVE CIRC DELETE "
1108 "IOCOM %p MSGID %016jx\n",
1109 msg->iocom, circ->msgid);
1111 if (iocom->auto_callback)
1112 iocom->auto_callback(msg);
1114 KKASSERT(circ->rcirc_state == msg->state);
1115 lwkt_gettoken(&kdmsg_token);
1116 circ->rcirc_state = NULL;
1117 msg->state->any.circ = NULL;
1118 RB_REMOVE(kdmsg_circuit_tree, &iocom->circ_tree, circ);
1119 lwkt_reltoken(&kdmsg_token);
1120 kdmsg_circ_drop(circ); /* for rcirc_state */
1121 kdmsg_msg_reply(msg, 0);
1123 error = iocom->rcvmsg(msg);
1128 * Anything unhandled goes into rcvmsg.
1130 * NOTE: Replies to link-level messages initiated by our side
1131 * are handled by the state callback, they are NOT
1134 error = iocom->rcvmsg(msg);
1141 * Handle automatic forging of virtual circuits based on received SPANs.
1142 * (AUTOFORGE). Note that other code handles tracking received circuit
1143 * transactions (AUTOCIRC).
1145 * We can ignore non-transactions here. Use trans->icmd to test the
1146 * transactional command (once past the CREATE the individual message
1147 * commands are not usually the icmd).
1153 kdmsg_autocirc(kdmsg_msg_t *msg)
1155 kdmsg_iocom_t *iocom = msg->iocom;
1156 kdmsg_circuit_t *circ;
1157 kdmsg_msg_t *xmsg; /* CIRC */
1159 if (msg->state == NULL)
1163 * Gaining the SPAN, automatically forge a circuit to the target.
1165 * NOTE!! The shim is not executed until we receive an acknowlegement
1166 * to our forged LNK_CIRC (see kdmsg_autocirc_reply()).
1168 if (msg->state->icmd == DMSG_LNK_SPAN &&
1169 (msg->any.head.cmd & DMSGF_CREATE)) {
1170 circ = kmalloc(sizeof(*circ), iocom->mmsg, M_WAITOK | M_ZERO);
1171 lwkt_gettoken(&kdmsg_token);
1172 msg->state->any.circ = circ;
1173 circ->iocom = iocom;
1174 circ->span_state = msg->state;
1175 kdmsg_circ_hold(circ); /* for span_state */
1176 xmsg = kdmsg_msg_alloc(iocom, NULL,
1177 DMSG_LNK_CIRC | DMSGF_CREATE,
1178 kdmsg_autocirc_reply, circ);
1179 circ->circ_state = xmsg->state;
1180 circ->weight = msg->any.lnk_span.dist;
1181 circ->msgid = circ->circ_state->msgid;
1182 kdmsg_circ_hold(circ); /* for circ_state */
1184 kprintf("KDMSG VC: CREATE SPAN->CIRC IOCOM %p MSGID %016jx\n",
1185 msg->iocom, circ->msgid);
1188 if (RB_INSERT(kdmsg_circuit_tree, &iocom->circ_tree, circ))
1189 panic("duplicate circuitid allocated");
1190 lwkt_reltoken(&kdmsg_token);
1192 xmsg->any.lnk_circ.target = msg->any.head.msgid;
1193 kdmsg_msg_write(xmsg);
1199 * NOTE: When losing a SPAN, any circuits using the span should be
1200 * deleted by the remote end first. XXX might not be ordered
1201 * on actual loss of connection.
1203 if (msg->state->icmd == DMSG_LNK_SPAN &&
1204 (msg->any.head.cmd & DMSGF_DELETE) &&
1205 msg->state->any.circ) {
1206 circ = msg->state->any.circ;
1207 lwkt_gettoken(&kdmsg_token);
1208 circ->span_state = NULL;
1209 msg->state->any.circ = NULL;
1210 RB_REMOVE(kdmsg_circuit_tree, &iocom->circ_tree, circ);
1212 kprintf("KDMSG VC: DELETE SPAN->CIRC IOCOM %p MSGID %016jx\n",
1213 msg->iocom, (intmax_t)circ->msgid);
1215 kdmsg_circ_drop(circ); /* for span_state */
1216 lwkt_reltoken(&kdmsg_token);
1222 kdmsg_autocirc_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
1224 kdmsg_iocom_t *iocom = state->iocom;
1225 kdmsg_circuit_t *circ = state->any.circ;
1228 * Call shim after receiving an acknowlegement to our forged
1229 * circuit and before processing a received termination.
1231 if (iocom->auto_callback)
1232 iocom->auto_callback(msg);
1235 * If the remote is terminating the VC we terminate our side
1237 if ((state->txcmd & DMSGF_DELETE) == 0 &&
1238 (msg->any.head.cmd & DMSGF_DELETE)) {
1240 kprintf("KDMSG VC: DELETE CIRC FROM REMOTE\n");
1242 lwkt_gettoken(&kdmsg_token);
1243 circ->circ_state = NULL;
1244 state->any.circ = NULL;
1245 kdmsg_circ_drop(circ); /* for circ_state */
1246 lwkt_reltoken(&kdmsg_token);
1247 kdmsg_msg_reply(msg, 0);
1253 * Post-receive-handling message and state cleanup. This routine is called
1254 * after the state function handling/callback to properly dispose of the
1255 * message and update or dispose of the state.
1259 kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
1261 kdmsg_iocom_t *iocom = msg->iocom;
1262 kdmsg_state_t *state;
1264 if ((state = msg->state) == NULL) {
1265 kdmsg_msg_free(msg);
1266 } else if (msg->any.head.cmd & DMSGF_DELETE) {
1267 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1268 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0);
1269 state->rxcmd |= DMSGF_DELETE;
1270 if (state->txcmd & DMSGF_DELETE) {
1271 KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1272 if (state->rxcmd & DMSGF_REPLY) {
1273 KKASSERT(msg->any.head.cmd &
1275 RB_REMOVE(kdmsg_state_tree,
1276 &iocom->statewr_tree, state);
1278 KKASSERT((msg->any.head.cmd &
1280 RB_REMOVE(kdmsg_state_tree,
1281 &iocom->staterd_tree, state);
1283 state->flags &= ~KDMSG_STATE_INSERTED;
1284 if (msg != state->msg)
1285 kdmsg_msg_free(msg);
1286 lockmgr(&iocom->msglk, LK_RELEASE);
1287 kdmsg_state_free(state);
1289 if (msg != state->msg)
1290 kdmsg_msg_free(msg);
1291 lockmgr(&iocom->msglk, LK_RELEASE);
1293 } else if (msg != state->msg) {
1294 kdmsg_msg_free(msg);
1299 * Simulate receiving a message which terminates an active transaction
1300 * state. Our simulated received message must set DELETE and may also
1301 * have to set CREATE. It must also ensure that all fields are set such
1302 * that the receive handling code can find the state (kdmsg_state_msgrx())
1303 * or an endless loop will ensue.
1305 * This is used when the other end of the link or virtual circuit is dead
1306 * so the device driver gets a completed transaction for all pending states.
1310 kdmsg_state_abort(kdmsg_state_t *state)
1312 kdmsg_iocom_t *iocom = state->iocom;
1316 * Prevent recursive aborts which could otherwise occur if the
1317 * simulated message reception runs state->func which then turns
1318 * around and tries to reply to a broken circuit when then calls
1319 * the state abort code again.
1321 if (state->flags & KDMSG_STATE_ABORTING)
1323 state->flags |= KDMSG_STATE_ABORTING;
1326 * Simulatem essage reception
1328 msg = kdmsg_msg_alloc(iocom, state->circ,
1331 if ((state->rxcmd & DMSGF_CREATE) == 0)
1332 msg->any.head.cmd |= DMSGF_CREATE;
1333 msg->any.head.cmd |= DMSGF_DELETE | (state->rxcmd & DMSGF_REPLY);
1334 msg->any.head.error = DMSG_ERR_LOSTLINK;
1335 msg->any.head.msgid = state->msgid;
1337 kdmsg_msg_receive_handling(msg);
1341 * Process state tracking for a message prior to transmission.
1343 * Called with msglk held and the msg dequeued. Returns non-zero if
1344 * the message is bad and should be deleted by the caller.
1346 * One-off messages are usually with dummy state and msg->state may be NULL
1347 * in this situation.
1349 * New transactions (when CREATE is set) will insert the state.
1351 * May request that caller discard the message by setting *discardp to 1.
1352 * A NULL state may be returned in this case.
1356 kdmsg_state_msgtx(kdmsg_msg_t *msg)
1358 kdmsg_iocom_t *iocom = msg->iocom;
1359 kdmsg_state_t *state;
1363 * Make sure a state structure is ready to go in case we need a new
1364 * one. This is the only routine which uses freewr_state so no
1365 * races are possible.
1367 if ((state = iocom->freewr_state) == NULL) {
1368 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1369 state->flags = KDMSG_STATE_DYNAMIC;
1370 state->iocom = iocom;
1371 iocom->freewr_state = state;
1375 * Lock RB tree. If persistent state is present it will have already
1376 * been assigned to msg.
1378 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1382 * Short-cut one-off or mid-stream messages (state may be NULL).
1384 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
1385 DMSGF_ABORT)) == 0) {
1386 lockmgr(&iocom->msglk, LK_RELEASE);
1392 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
1393 * inside the case statements.
1395 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
1398 case DMSGF_CREATE | DMSGF_DELETE:
1400 * Insert the new persistent message state and mark
1401 * half-closed if DELETE is set. Since this is a new
1402 * message it isn't possible to transition into the fully
1403 * closed state here.
1405 * XXX state must be assigned and inserted by
1406 * kdmsg_msg_write(). txcmd is assigned by us
1409 KKASSERT(state != NULL);
1410 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK;
1411 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
1412 state->rxcmd = DMSGF_REPLY;
1417 * Sent ABORT+DELETE in case where msgid has already
1418 * been fully closed, ignore the message.
1420 if (state == NULL) {
1421 if (msg->any.head.cmd & DMSGF_ABORT) {
1424 kprintf("kdmsg_state_msgtx: no state match "
1425 "for DELETE cmd=%08x msgid=%016jx\n",
1427 (intmax_t)msg->any.head.msgid);
1434 * Sent ABORT+DELETE in case where msgid has
1435 * already been reused for an unrelated message,
1436 * ignore the message.
1438 if ((state->txcmd & DMSGF_CREATE) == 0) {
1439 if (msg->any.head.cmd & DMSGF_ABORT) {
1442 kprintf("kdmsg_state_msgtx: state reused "
1452 * Check for mid-stream ABORT command sent
1454 if (msg->any.head.cmd & DMSGF_ABORT) {
1455 if (state == NULL ||
1456 (state->txcmd & DMSGF_CREATE) == 0) {
1463 case DMSGF_REPLY | DMSGF_CREATE:
1464 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
1466 * When transmitting a reply with CREATE set the original
1467 * persistent state message should already exist.
1469 if (state == NULL) {
1470 kprintf("kdmsg_state_msgtx: no state match "
1471 "for REPLY | CREATE\n");
1475 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
1478 case DMSGF_REPLY | DMSGF_DELETE:
1480 * When transmitting a reply with DELETE set the original
1481 * persistent state message should already exist.
1483 * This is very similar to the REPLY|CREATE|* case except
1484 * txcmd is already stored, so we just add the DELETE flag.
1486 * Sent REPLY+ABORT+DELETE in case where msgid has
1487 * already been fully closed, ignore the message.
1489 if (state == NULL) {
1490 if (msg->any.head.cmd & DMSGF_ABORT) {
1493 kprintf("kdmsg_state_msgtx: no state match "
1494 "for REPLY | DELETE\n");
1501 * Sent REPLY+ABORT+DELETE in case where msgid has already
1502 * been reused for an unrelated message, ignore the message.
1504 if ((state->txcmd & DMSGF_CREATE) == 0) {
1505 if (msg->any.head.cmd & DMSGF_ABORT) {
1508 kprintf("kdmsg_state_msgtx: state reused "
1509 "for REPLY | DELETE\n");
1518 * Check for mid-stream ABORT reply sent.
1520 * One-off REPLY messages are allowed for e.g. status updates.
1522 if (msg->any.head.cmd & DMSGF_ABORT) {
1523 if (state == NULL ||
1524 (state->txcmd & DMSGF_CREATE) == 0) {
1532 lockmgr(&iocom->msglk, LK_RELEASE);
1538 kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
1540 kdmsg_iocom_t *iocom = msg->iocom;
1541 kdmsg_state_t *state;
1543 if ((state = msg->state) == NULL) {
1544 kdmsg_msg_free(msg);
1545 } else if (msg->any.head.cmd & DMSGF_DELETE) {
1546 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1547 KKASSERT((state->txcmd & DMSGF_DELETE) == 0);
1548 state->txcmd |= DMSGF_DELETE;
1549 if (state->rxcmd & DMSGF_DELETE) {
1550 KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1551 if (state->txcmd & DMSGF_REPLY) {
1552 KKASSERT(msg->any.head.cmd &
1554 RB_REMOVE(kdmsg_state_tree,
1555 &iocom->staterd_tree, state);
1557 KKASSERT((msg->any.head.cmd &
1559 RB_REMOVE(kdmsg_state_tree,
1560 &iocom->statewr_tree, state);
1562 state->flags &= ~KDMSG_STATE_INSERTED;
1563 if (msg != state->msg)
1564 kdmsg_msg_free(msg);
1565 lockmgr(&iocom->msglk, LK_RELEASE);
1566 kdmsg_state_free(state);
1568 if (msg != state->msg)
1569 kdmsg_msg_free(msg);
1570 lockmgr(&iocom->msglk, LK_RELEASE);
1572 } else if (msg != state->msg) {
1573 kdmsg_msg_free(msg);
1579 kdmsg_state_free(kdmsg_state_t *state)
1581 kdmsg_iocom_t *iocom = state->iocom;
1584 KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
1587 kfree(state, iocom->mmsg);
1590 kdmsg_msg_free(msg);
1595 kdmsg_msg_alloc(kdmsg_iocom_t *iocom, kdmsg_circuit_t *circ, uint32_t cmd,
1596 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1599 kdmsg_state_t *state;
1602 KKASSERT(iocom != NULL);
1603 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1604 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1605 iocom->mmsg, M_WAITOK | M_ZERO);
1606 msg->hdr_size = hbytes;
1608 msg->any.head.magic = DMSG_HDR_MAGIC;
1609 msg->any.head.cmd = cmd;
1611 kdmsg_circ_hold(circ);
1613 msg->any.head.circuit = circ->msgid;
1616 if (cmd & DMSGF_CREATE) {
1618 * New transaction, requires tracking state and a unique
1619 * msgid to be allocated.
1621 KKASSERT(msg->state == NULL);
1622 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1623 state->flags = KDMSG_STATE_DYNAMIC;
1625 state->any.any = data;
1627 state->msgid = (uint64_t)(uintptr_t)state;
1629 state->iocom = iocom;
1632 kdmsg_circ_hold(circ);
1633 /*msg->any.head.msgid = state->msgid;XXX*/
1635 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1636 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
1637 panic("duplicate msgid allocated");
1638 state->flags |= KDMSG_STATE_INSERTED;
1639 msg->any.head.msgid = state->msgid;
1640 lockmgr(&iocom->msglk, LK_RELEASE);
1646 kdmsg_msg_alloc_state(kdmsg_state_t *state, uint32_t cmd,
1647 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1649 kdmsg_iocom_t *iocom = state->iocom;
1653 KKASSERT(iocom != NULL);
1654 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1655 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1656 iocom->mmsg, M_WAITOK | M_ZERO);
1657 msg->hdr_size = hbytes;
1659 msg->any.head.magic = DMSG_HDR_MAGIC;
1660 msg->any.head.cmd = cmd;
1663 kdmsg_circ_hold(state->circ);
1664 msg->circ = state->circ;
1665 msg->any.head.circuit = state->circ->msgid;
1671 kdmsg_msg_free(kdmsg_msg_t *msg)
1673 kdmsg_iocom_t *iocom = msg->iocom;
1675 if ((msg->flags & KDMSG_FLAG_AUXALLOC) &&
1676 msg->aux_data && msg->aux_size) {
1677 kfree(msg->aux_data, iocom->mmsg);
1678 msg->flags &= ~KDMSG_FLAG_AUXALLOC;
1681 kdmsg_circ_drop(msg->circ);
1685 if (msg->state->msg == msg)
1686 msg->state->msg = NULL;
1689 msg->aux_data = NULL;
1692 kfree(msg, iocom->mmsg);
1696 * Circuits are tracked in a red-black tree by their circuit id (msgid).
1699 kdmsg_circuit_cmp(kdmsg_circuit_t *circ1, kdmsg_circuit_t *circ2)
1701 if (circ1->msgid < circ2->msgid)
1703 if (circ1->msgid > circ2->msgid)
1709 * Indexed messages are stored in a red-black tree indexed by their
1710 * msgid. Only persistent messages are indexed.
1713 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
1715 if (state1->iocom < state2->iocom)
1717 if (state1->iocom > state2->iocom)
1719 if (state1->circ < state2->circ)
1721 if (state1->circ > state2->circ)
1723 if (state1->msgid < state2->msgid)
1725 if (state1->msgid > state2->msgid)
1731 * Write a message. All requisit command flags have been set.
1733 * If msg->state is non-NULL the message is written to the existing
1734 * transaction. msgid will be set accordingly.
1736 * If msg->state is NULL and CREATE is set new state is allocated and
1737 * (func, data) is installed. A msgid is assigned.
1739 * If msg->state is NULL and CREATE is not set the message is assumed
1740 * to be a one-way message. The originator must assign the msgid
1741 * (or leave it 0, which is typical.
1743 * This function merely queues the message to the management thread, it
1744 * does not write to the message socket/pipe.
1747 kdmsg_msg_write(kdmsg_msg_t *msg)
1749 kdmsg_iocom_t *iocom = msg->iocom;
1750 kdmsg_state_t *state;
1754 * Continuance or termination of existing transaction.
1755 * The transaction could have been initiated by either end.
1757 * (Function callback and aux data for the receive side can
1758 * be replaced or left alone).
1761 msg->any.head.msgid = state->msgid;
1762 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1765 * One-off message (always uses msgid 0 to distinguish
1766 * between a possibly lost in-transaction message due to
1767 * competing aborts and a real one-off message?)
1770 msg->any.head.msgid = 0;
1771 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1775 * With AUTOCIRC and AUTOFORGE it is possible for the circuit to
1776 * get ripped out in the rxthread while some other thread is
1777 * holding a ref on it inbetween allocating and sending a dmsg.
1779 if (msg->circ && msg->circ->rcirc_state == NULL &&
1780 (msg->circ->span_state == NULL || msg->circ->circ_state == NULL)) {
1781 kprintf("kdmsg_msg_write: Attempt to write message to "
1782 "terminated circuit: msg %08x\n", msg->any.head.cmd);
1783 lockmgr(&iocom->msglk, LK_RELEASE);
1784 if (kdmsg_state_msgtx(msg)) {
1785 if (state == NULL || msg != state->msg)
1786 kdmsg_msg_free(msg);
1787 } else if ((msg->state->rxcmd & DMSGF_DELETE) == 0) {
1788 /* XXX SMP races simulating a response here */
1789 kdmsg_state_t *state = msg->state;
1790 kdmsg_state_cleanuptx(msg);
1791 kdmsg_state_abort(state);
1793 kdmsg_state_cleanuptx(msg);
1799 * This flag is not set until after the tx thread has drained
1800 * the txmsgq and simulated responses. After that point the
1801 * txthread is dead and can no longer simulate responses.
1803 * Device drivers should never try to send a message once this
1804 * flag is set. They should have detected (through the state
1805 * closures) that the link is in trouble.
1807 if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) {
1808 lockmgr(&iocom->msglk, LK_RELEASE);
1809 panic("kdmsg_msg_write: Attempt to write message to "
1810 "terminated iocom\n");
1814 * Finish up the msg fields. Note that msg->aux_size and the
1815 * aux_bytes stored in the message header represent the unaligned
1816 * (actual) bytes of data, but the buffer is sized to an aligned
1817 * size and the CRC is generated over the aligned length.
1819 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
1822 if (msg->aux_data && msg->aux_size) {
1823 uint32_t abytes = DMSG_DOALIGN(msg->aux_size);
1825 msg->any.head.aux_bytes = msg->aux_size;
1826 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes);
1828 msg->any.head.hdr_crc = 0;
1829 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
1831 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
1833 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
1834 atomic_clear_int(&iocom->msg_ctl,
1835 KDMSG_CLUSTERCTL_SLEEPING);
1836 wakeup(&iocom->msg_ctl);
1839 lockmgr(&iocom->msglk, LK_RELEASE);
1843 * Reply to a message and terminate our side of the transaction.
1845 * If msg->state is non-NULL we are replying to a one-way message.
1848 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
1850 kdmsg_state_t *state = msg->state;
1855 * Reply with a simple error code and terminate the transaction.
1857 cmd = DMSG_LNK_ERROR;
1860 * Check if our direction has even been initiated yet, set CREATE.
1862 * Check what direction this is (command or reply direction). Note
1863 * that txcmd might not have been initiated yet.
1865 * If our direction has already been closed we just return without
1869 if (state->txcmd & DMSGF_DELETE)
1871 if ((state->txcmd & DMSGF_CREATE) == 0)
1872 cmd |= DMSGF_CREATE;
1873 if (state->txcmd & DMSGF_REPLY)
1875 cmd |= DMSGF_DELETE;
1877 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1881 /* XXX messy mask cmd to avoid allocating state */
1882 nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
1883 nmsg->any.head.error = error;
1884 kdmsg_msg_write(nmsg);
1888 * Reply to a message and continue our side of the transaction.
1890 * If msg->state is non-NULL we are replying to a one-way message and this
1891 * function degenerates into the same as kdmsg_msg_reply().
1894 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
1896 kdmsg_state_t *state = msg->state;
1901 * Return a simple result code, do NOT terminate the transaction.
1903 cmd = DMSG_LNK_ERROR;
1906 * Check if our direction has even been initiated yet, set CREATE.
1908 * Check what direction this is (command or reply direction). Note
1909 * that txcmd might not have been initiated yet.
1911 * If our direction has already been closed we just return without
1915 if (state->txcmd & DMSGF_DELETE)
1917 if ((state->txcmd & DMSGF_CREATE) == 0)
1918 cmd |= DMSGF_CREATE;
1919 if (state->txcmd & DMSGF_REPLY)
1921 /* continuing transaction, do not set MSGF_DELETE */
1923 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1927 /* XXX messy mask cmd to avoid allocating state */
1928 nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
1929 nmsg->any.head.error = error;
1930 kdmsg_msg_write(nmsg);
1934 * Reply to a message and terminate our side of the transaction.
1936 * If msg->state is non-NULL we are replying to a one-way message.
1939 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error)
1945 * Reply with a simple error code and terminate the transaction.
1947 cmd = DMSG_LNK_ERROR;
1950 * Check if our direction has even been initiated yet, set CREATE.
1952 * Check what direction this is (command or reply direction). Note
1953 * that txcmd might not have been initiated yet.
1955 * If our direction has already been closed we just return without
1959 if (state->txcmd & DMSGF_DELETE)
1961 if ((state->txcmd & DMSGF_CREATE) == 0)
1962 cmd |= DMSGF_CREATE;
1963 if (state->txcmd & DMSGF_REPLY)
1965 cmd |= DMSGF_DELETE;
1967 if ((state->txcmd & DMSGF_REPLY) == 0)
1971 /* XXX messy mask cmd to avoid allocating state */
1972 nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
1973 nmsg->any.head.error = error;
1974 kdmsg_msg_write(nmsg);
1978 * Reply to a message and continue our side of the transaction.
1980 * If msg->state is non-NULL we are replying to a one-way message and this
1981 * function degenerates into the same as kdmsg_msg_reply().
1984 kdmsg_state_result(kdmsg_state_t *state, uint32_t error)
1990 * Return a simple result code, do NOT terminate the transaction.
1992 cmd = DMSG_LNK_ERROR;
1995 * Check if our direction has even been initiated yet, set CREATE.
1997 * Check what direction this is (command or reply direction). Note
1998 * that txcmd might not have been initiated yet.
2000 * If our direction has already been closed we just return without
2004 if (state->txcmd & DMSGF_DELETE)
2006 if ((state->txcmd & DMSGF_CREATE) == 0)
2007 cmd |= DMSGF_CREATE;
2008 if (state->txcmd & DMSGF_REPLY)
2010 /* continuing transaction, do not set MSGF_DELETE */
2012 if ((state->txcmd & DMSGF_REPLY) == 0)
2016 /* XXX messy mask cmd to avoid allocating state */
2017 nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
2018 nmsg->any.head.error = error;
2019 kdmsg_msg_write(nmsg);