d65ee1029ae45de7f804acbc2b85bc24caa1a6d3
[dragonfly.git] / sys / kern / kern_dmsg.c
1 /*-
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/kernel.h>
37 #include <sys/conf.h>
38 #include <sys/systm.h>
39 #include <sys/queue.h>
40 #include <sys/tree.h>
41 #include <sys/malloc.h>
42 #include <sys/mount.h>
43 #include <sys/socket.h>
44 #include <sys/vnode.h>
45 #include <sys/file.h>
46 #include <sys/proc.h>
47 #include <sys/priv.h>
48 #include <sys/thread.h>
49 #include <sys/globaldata.h>
50 #include <sys/limits.h>
51
52 #include <sys/dmsg.h>
53
54 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
55
56 static void kdmsg_iocom_thread_rd(void *arg);
57 static void kdmsg_iocom_thread_wr(void *arg);
58
59 /*
60  * Initialize the roll-up communications structure for a network
61  * messaging session.  This function does not install the socket.
62  */
63 void
64 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle,
65                  struct malloc_type *mmsg,
66                  void (*cctl_wakeup)(kdmsg_iocom_t *),
67                  int (*lnk_rcvmsg)(kdmsg_msg_t *msg),
68                  int (*dbg_rcvmsg)(kdmsg_msg_t *msg),
69                  int (*misc_rcvmsg)(kdmsg_msg_t *msg))
70 {
71         bzero(iocom, sizeof(*iocom));
72         iocom->handle = handle;
73         iocom->mmsg = mmsg;
74         iocom->clusterctl_wakeup = cctl_wakeup;
75         iocom->lnk_rcvmsg = lnk_rcvmsg;
76         iocom->dbg_rcvmsg = dbg_rcvmsg;
77         iocom->misc_rcvmsg = misc_rcvmsg;
78         iocom->router.iocom = iocom;
79         lockinit(&iocom->msglk, "h2msg", 0, 0);
80         TAILQ_INIT(&iocom->msgq);
81         RB_INIT(&iocom->staterd_tree);
82         RB_INIT(&iocom->statewr_tree);
83 }
84
85 /*
86  * [Re]connect using the passed file pointer.  The caller must ref the
87  * fp for us.  We own that ref now.
88  */
89 void
90 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
91                       const char *subsysname)
92 {
93         /*
94          * Destroy the current connection
95          */
96         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
97         while (iocom->msgrd_td || iocom->msgwr_td) {
98                 wakeup(&iocom->msg_ctl);
99                 tsleep(iocom, 0, "clstrkl", hz);
100         }
101
102         /*
103          * Drop communications descriptor
104          */
105         if (iocom->msg_fp) {
106                 fdrop(iocom->msg_fp);
107                 iocom->msg_fp = NULL;
108         }
109         kprintf("RESTART CONNECTION\n");
110
111         /*
112          * Setup new communications descriptor
113          */
114         iocom->msg_ctl = 0;
115         iocom->msg_fp = fp;
116         iocom->msg_seq = 0;
117
118         lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
119                     NULL, 0, -1, "%s-msgrd", subsysname);
120         lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
121                     NULL, 0, -1, "%s-msgwr", subsysname);
122 }
123
124 /*
125  * Cluster controller thread.  Perform messaging functions.  We have one
126  * thread for the reader and one for the writer.  The writer handles
127  * shutdown requests (which should break the reader thread).
128  */
129 static
130 void
131 kdmsg_iocom_thread_rd(void *arg)
132 {
133         kdmsg_iocom_t *iocom = arg;
134         dmsg_hdr_t hdr;
135         kdmsg_msg_t *msg;
136         kdmsg_state_t *state;
137         size_t hbytes;
138         int error = 0;
139
140         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
141                 /*
142                  * Retrieve the message from the pipe or socket.
143                  */
144                 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
145                                 NULL, 1, UIO_SYSSPACE);
146                 if (error)
147                         break;
148                 if (hdr.magic != DMSG_HDR_MAGIC) {
149                         kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
150                         error = EINVAL;
151                         break;
152                 }
153                 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
154                 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
155                         kprintf("kdmsg: bad header size %zd\n", hbytes);
156                         error = EINVAL;
157                         break;
158                 }
159                 /* XXX messy: mask cmd to avoid allocating state */
160                 msg = kdmsg_msg_alloc(&iocom->router,
161                                         hdr.cmd & DMSGF_BASECMDMASK,
162                                         NULL, NULL);
163                 msg->any.head = hdr;
164                 msg->hdr_size = hbytes;
165                 if (hbytes > sizeof(hdr)) {
166                         error = fp_read(iocom->msg_fp, &msg->any.head + 1,
167                                         hbytes - sizeof(hdr),
168                                         NULL, 1, UIO_SYSSPACE);
169                         if (error) {
170                                 kprintf("kdmsg: short msg received\n");
171                                 error = EINVAL;
172                                 break;
173                         }
174                 }
175                 msg->aux_size = hdr.aux_bytes * DMSG_ALIGN;
176                 if (msg->aux_size > DMSG_AUX_MAX) {
177                         kprintf("kdmsg: illegal msg payload size %zd\n",
178                                 msg->aux_size);
179                         error = EINVAL;
180                         break;
181                 }
182                 if (msg->aux_size) {
183                         msg->aux_data = kmalloc(msg->aux_size, iocom->mmsg,
184                                                 M_WAITOK | M_ZERO);
185                         error = fp_read(iocom->msg_fp, msg->aux_data,
186                                         msg->aux_size,
187                                         NULL, 1, UIO_SYSSPACE);
188                         if (error) {
189                                 kprintf("kdmsg: short msg payload received\n");
190                                 break;
191                         }
192                 }
193
194                 /*
195                  * State machine tracking, state assignment for msg,
196                  * returns error and discard status.  Errors are fatal
197                  * to the connection except for EALREADY which forces
198                  * a discard without execution.
199                  */
200                 error = kdmsg_state_msgrx(msg);
201                 if (error) {
202                         /*
203                          * Raw protocol or connection error
204                          */
205                         kdmsg_msg_free(msg);
206                         if (error == EALREADY)
207                                 error = 0;
208                 } else if (msg->state && msg->state->func) {
209                         /*
210                          * Message related to state which already has a
211                          * handling function installed for it.
212                          */
213                         error = msg->state->func(msg->state, msg);
214                         kdmsg_state_cleanuprx(msg);
215                 } else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
216                            DMSG_PROTO_LNK) {
217                         /*
218                          * Message related to the LNK protocol set
219                          */
220                         error = iocom->lnk_rcvmsg(msg);
221                         kdmsg_state_cleanuprx(msg);
222                 } else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
223                            DMSG_PROTO_DBG) {
224                         /*
225                          * Message related to the DBG protocol set
226                          */
227                         error = iocom->dbg_rcvmsg(msg);
228                         kdmsg_state_cleanuprx(msg);
229                 } else {
230                         /*
231                          * Other higher-level messages (e.g. vnops)
232                          */
233                         error = iocom->misc_rcvmsg(msg);
234                         kdmsg_state_cleanuprx(msg);
235                 }
236                 msg = NULL;
237         }
238
239         if (error)
240                 kprintf("kdmsg: read failed error %d\n", error);
241
242         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
243         if (msg) {
244                 if (msg->state && msg->state->msg == msg)
245                         msg->state->msg = NULL;
246                 kdmsg_msg_free(msg);
247         }
248
249         if ((state = iocom->freerd_state) != NULL) {
250                 iocom->freerd_state = NULL;
251                 kdmsg_state_free(state);
252         }
253
254         /*
255          * Shutdown the socket before waiting for the transmit side.
256          *
257          * If we are dying due to e.g. a socket disconnect verses being
258          * killed explicity we have to set KILL in order to kick the tx
259          * side when it might not have any other work to do.  KILL might
260          * already be set if we are in an unmount or reconnect.
261          */
262         fp_shutdown(iocom->msg_fp, SHUT_RDWR);
263
264         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
265         wakeup(&iocom->msg_ctl);
266
267         /*
268          * Wait for the transmit side to drain remaining messages
269          * before cleaning up the rx state.  The transmit side will
270          * set KILLTX and wait for the rx side to completely finish
271          * (set msgrd_td to NULL) before cleaning up any remaining
272          * tx states.
273          */
274         lockmgr(&iocom->msglk, LK_RELEASE);
275         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
276         wakeup(&iocom->msg_ctl);
277         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
278                 wakeup(&iocom->msg_ctl);
279                 tsleep(iocom, 0, "clstrkw", hz);
280         }
281
282         iocom->msgrd_td = NULL;
283
284         /*
285          * iocom can be ripped out from under us at this point but
286          * wakeup() is safe.
287          */
288         wakeup(iocom);
289         lwkt_exit();
290 }
291
292 static
293 void
294 kdmsg_iocom_thread_wr(void *arg)
295 {
296         kdmsg_iocom_t *iocom = arg;
297         kdmsg_msg_t *msg;
298         kdmsg_state_t *state;
299         ssize_t res;
300         int error = 0;
301         int retries = 20;
302
303         /*
304          * Transmit loop
305          */
306         msg = NULL;
307         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
308
309         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
310                 /*
311                  * Sleep if no messages pending.  Interlock with flag while
312                  * holding msglk.
313                  */
314                 if (TAILQ_EMPTY(&iocom->msgq)) {
315                         atomic_set_int(&iocom->msg_ctl,
316                                        KDMSG_CLUSTERCTL_SLEEPING);
317                         lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
318                         atomic_clear_int(&iocom->msg_ctl,
319                                          KDMSG_CLUSTERCTL_SLEEPING);
320                 }
321
322                 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
323                         /*
324                          * Remove msg from the transmit queue and do
325                          * persist and half-closed state handling.
326                          */
327                         TAILQ_REMOVE(&iocom->msgq, msg, qentry);
328                         lockmgr(&iocom->msglk, LK_RELEASE);
329
330                         error = kdmsg_state_msgtx(msg);
331                         if (error == EALREADY) {
332                                 error = 0;
333                                 kdmsg_msg_free(msg);
334                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
335                                 continue;
336                         }
337                         if (error) {
338                                 kdmsg_msg_free(msg);
339                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
340                                 break;
341                         }
342
343                         /*
344                          * Dump the message to the pipe or socket.
345                          */
346                         error = fp_write(iocom->msg_fp, &msg->any,
347                                          msg->hdr_size, &res, UIO_SYSSPACE);
348                         if (error || res != msg->hdr_size) {
349                                 if (error == 0)
350                                         error = EINVAL;
351                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
352                                 break;
353                         }
354                         if (msg->aux_size) {
355                                 error = fp_write(iocom->msg_fp,
356                                                  msg->aux_data, msg->aux_size,
357                                                  &res, UIO_SYSSPACE);
358                                 if (error || res != msg->aux_size) {
359                                         if (error == 0)
360                                                 error = EINVAL;
361                                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
362                                         break;
363                                 }
364                         }
365                         kdmsg_state_cleanuptx(msg);
366                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
367                 }
368         }
369
370         /*
371          * Cleanup messages pending transmission and release msgq lock.
372          */
373         if (error)
374                 kprintf("kdmsg: write failed error %d\n", error);
375
376         if (msg) {
377                 if (msg->state && msg->state->msg == msg)
378                         msg->state->msg = NULL;
379                 kdmsg_msg_free(msg);
380         }
381
382         /*
383          * Shutdown the socket.  This will cause the rx thread to get an
384          * EOF and ensure that both threads get to a termination state.
385          */
386         fp_shutdown(iocom->msg_fp, SHUT_RDWR);
387
388         /*
389          * Set KILLTX (which the rx side waits for), then wait for the RX
390          * side to completely finish before we clean out any remaining
391          * command states.
392          */
393         lockmgr(&iocom->msglk, LK_RELEASE);
394         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
395         wakeup(&iocom->msg_ctl);
396         while (iocom->msgrd_td) {
397                 wakeup(&iocom->msg_ctl);
398                 tsleep(iocom, 0, "clstrkw", hz);
399         }
400         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
401
402         /*
403          * Simulate received MSGF_DELETE's for any remaining states.
404          */
405 cleanuprd:
406         RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
407                 if (state->func &&
408                     (state->rxcmd & DMSGF_DELETE) == 0) {
409                         lockmgr(&iocom->msglk, LK_RELEASE);
410                         msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
411                                               NULL, NULL);
412                         if ((state->rxcmd & DMSGF_CREATE) == 0)
413                                 msg->any.head.cmd |= DMSGF_CREATE;
414                         msg->any.head.cmd |= DMSGF_DELETE;
415                         msg->state = state;
416                         state->rxcmd = msg->any.head.cmd &
417                                        ~DMSGF_DELETE;
418                         msg->state->func(state, msg);
419                         kdmsg_state_cleanuprx(msg);
420                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
421                         goto cleanuprd;
422                 }
423                 if (state->func == NULL) {
424                         state->flags &= ~KDMSG_STATE_INSERTED;
425                         RB_REMOVE(kdmsg_state_tree,
426                                   &iocom->staterd_tree, state);
427                         kdmsg_state_free(state);
428                         goto cleanuprd;
429                 }
430         }
431
432         /*
433          * NOTE: We have to drain the msgq to handle situations
434          *       where received states have built up output
435          *       messages, to avoid creating messages with
436          *       duplicate CREATE/DELETE flags.
437          */
438 cleanupwr:
439         kdmsg_drain_msgq(iocom);
440         RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
441                 if (state->func &&
442                     (state->rxcmd & DMSGF_DELETE) == 0) {
443                         lockmgr(&iocom->msglk, LK_RELEASE);
444                         msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
445                                               NULL, NULL);
446                         if ((state->rxcmd & DMSGF_CREATE) == 0)
447                                 msg->any.head.cmd |= DMSGF_CREATE;
448                         msg->any.head.cmd |= DMSGF_DELETE |
449                                              DMSGF_REPLY;
450                         msg->state = state;
451                         state->rxcmd = msg->any.head.cmd &
452                                        ~DMSGF_DELETE;
453                         msg->state->func(state, msg);
454                         kdmsg_state_cleanuprx(msg);
455                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
456                         goto cleanupwr;
457                 }
458                 if (state->func == NULL) {
459                         state->flags &= ~KDMSG_STATE_INSERTED;
460                         RB_REMOVE(kdmsg_state_tree,
461                                   &iocom->statewr_tree, state);
462                         kdmsg_state_free(state);
463                         goto cleanupwr;
464                 }
465         }
466
467         kdmsg_drain_msgq(iocom);
468         if (--retries == 0)
469                 panic("kdmsg: comm thread shutdown couldn't drain");
470         if (RB_ROOT(&iocom->statewr_tree))
471                 goto cleanupwr;
472
473         if ((state = iocom->freewr_state) != NULL) {
474                 iocom->freewr_state = NULL;
475                 kdmsg_state_free(state);
476         }
477
478         lockmgr(&iocom->msglk, LK_RELEASE);
479
480         /*
481          * The state trees had better be empty now
482          */
483         KKASSERT(RB_EMPTY(&iocom->staterd_tree));
484         KKASSERT(RB_EMPTY(&iocom->statewr_tree));
485         KKASSERT(iocom->conn_state == NULL);
486
487         /*
488          * iocom can be ripped out from under us once msgwr_td is set to NULL.
489          * The wakeup is safe.
490          */
491         iocom->msgwr_td = NULL;
492         wakeup(iocom);
493         lwkt_exit();
494 }
495
496 /*
497  * This cleans out the pending transmit message queue, adjusting any
498  * persistent states properly in the process.
499  *
500  * Caller must hold pmp->iocom.msglk
501  */
502 void
503 kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
504 {
505         kdmsg_msg_t *msg;
506
507         /*
508          * Clean out our pending transmit queue, executing the
509          * appropriate state adjustments.  If this tries to open
510          * any new outgoing transactions we have to loop up and
511          * clean them out.
512          */
513         while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
514                 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
515                 lockmgr(&iocom->msglk, LK_RELEASE);
516                 if (msg->state && msg->state->msg == msg)
517                         msg->state->msg = NULL;
518                 if (kdmsg_state_msgtx(msg))
519                         kdmsg_msg_free(msg);
520                 else
521                         kdmsg_state_cleanuptx(msg);
522                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
523         }
524 }
525
526 /*
527  * Process state tracking for a message after reception, prior to
528  * execution.
529  *
530  * Called with msglk held and the msg dequeued.
531  *
532  * All messages are called with dummy state and return actual state.
533  * (One-off messages often just return the same dummy state).
534  *
535  * May request that caller discard the message by setting *discardp to 1.
536  * The returned state is not used in this case and is allowed to be NULL.
537  *
538  * --
539  *
540  * These routines handle persistent and command/reply message state via the
541  * CREATE and DELETE flags.  The first message in a command or reply sequence
542  * sets CREATE, the last message in a command or reply sequence sets DELETE.
543  *
544  * There can be any number of intermediate messages belonging to the same
545  * sequence sent inbetween the CREATE message and the DELETE message,
546  * which set neither flag.  This represents a streaming command or reply.
547  *
548  * Any command message received with CREATE set expects a reply sequence to
549  * be returned.  Reply sequences work the same as command sequences except the
550  * REPLY bit is also sent.  Both the command side and reply side can
551  * degenerate into a single message with both CREATE and DELETE set.  Note
552  * that one side can be streaming and the other side not, or neither, or both.
553  *
554  * The msgid is unique for the initiator.  That is, two sides sending a new
555  * message can use the same msgid without colliding.
556  *
557  * --
558  *
559  * ABORT sequences work by setting the ABORT flag along with normal message
560  * state.  However, ABORTs can also be sent on half-closed messages, that is
561  * even if the command or reply side has already sent a DELETE, as long as
562  * the message has not been fully closed it can still send an ABORT+DELETE
563  * to terminate the half-closed message state.
564  *
565  * Since ABORT+DELETEs can race we silently discard ABORT's for message
566  * state which has already been fully closed.  REPLY+ABORT+DELETEs can
567  * also race, and in this situation the other side might have already
568  * initiated a new unrelated command with the same message id.  Since
569  * the abort has not set the CREATE flag the situation can be detected
570  * and the message will also be discarded.
571  *
572  * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
573  * The ABORT request is essentially integrated into the command instead
574  * of being sent later on.  In this situation the command implementation
575  * detects that CREATE and ABORT are both set (vs ABORT alone) and can
576  * special-case non-blocking operation for the command.
577  *
578  * NOTE!  Messages with ABORT set without CREATE or DELETE are considered
579  *        to be mid-stream aborts for command/reply sequences.  ABORTs on
580  *        one-way messages are not supported.
581  *
582  * NOTE!  If a command sequence does not support aborts the ABORT flag is
583  *        simply ignored.
584  *
585  * --
586  *
587  * One-off messages (no reply expected) are sent with neither CREATE or DELETE
588  * set.  One-off messages cannot be aborted and typically aren't processed
589  * by these routines.  The REPLY bit can be used to distinguish whether a
590  * one-off message is a command or reply.  For example, one-off replies
591  * will typically just contain status updates.
592  */
593 int
594 kdmsg_state_msgrx(kdmsg_msg_t *msg)
595 {
596         kdmsg_iocom_t *iocom;
597         kdmsg_state_t *state;
598         int error;
599
600         iocom = msg->router->iocom;
601
602         /*
603          * XXX resolve msg->any.head.source and msg->any.head.target
604          *     into LNK_SPAN references.
605          *
606          * XXX replace msg->router
607          */
608
609         /*
610          * Make sure a state structure is ready to go in case we need a new
611          * one.  This is the only routine which uses freerd_state so no
612          * races are possible.
613          */
614         if ((state = iocom->freerd_state) == NULL) {
615                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
616                 state->flags = KDMSG_STATE_DYNAMIC;
617                 iocom->freerd_state = state;
618         }
619
620         /*
621          * Lock RB tree and locate existing persistent state, if any.
622          *
623          * If received msg is a command state is on staterd_tree.
624          * If received msg is a reply state is on statewr_tree.
625          */
626         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
627
628         state->msgid = msg->any.head.msgid;
629         state->router = &iocom->router;
630         kprintf("received msg %08x msgid %jx source=%jx target=%jx\n",
631                 msg->any.head.cmd,
632                 (intmax_t)msg->any.head.msgid,
633                 (intmax_t)msg->any.head.source,
634                 (intmax_t)msg->any.head.target);
635         if (msg->any.head.cmd & DMSGF_REPLY)
636                 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
637         else
638                 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
639         msg->state = state;
640
641         /*
642          * Short-cut one-off or mid-stream messages (state may be NULL).
643          */
644         if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
645                                   DMSGF_ABORT)) == 0) {
646                 lockmgr(&iocom->msglk, LK_RELEASE);
647                 return(0);
648         }
649
650         /*
651          * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
652          * inside the case statements.
653          */
654         switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY)) {
655         case DMSGF_CREATE:
656         case DMSGF_CREATE | DMSGF_DELETE:
657                 /*
658                  * New persistant command received.
659                  */
660                 if (state) {
661                         kprintf("kdmsg_state_msgrx: duplicate transaction\n");
662                         error = EINVAL;
663                         break;
664                 }
665                 state = iocom->freerd_state;
666                 iocom->freerd_state = NULL;
667                 msg->state = state;
668                 state->router = msg->router;
669                 state->msg = msg;
670                 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
671                 state->txcmd = DMSGF_REPLY;
672                 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
673                 state->flags |= KDMSG_STATE_INSERTED;
674                 error = 0;
675                 break;
676         case DMSGF_DELETE:
677                 /*
678                  * Persistent state is expected but might not exist if an
679                  * ABORT+DELETE races the close.
680                  */
681                 if (state == NULL) {
682                         if (msg->any.head.cmd & DMSGF_ABORT) {
683                                 error = EALREADY;
684                         } else {
685                                 kprintf("kdmsg_state_msgrx: no state "
686                                         "for DELETE\n");
687                                 error = EINVAL;
688                         }
689                         break;
690                 }
691
692                 /*
693                  * Handle another ABORT+DELETE case if the msgid has already
694                  * been reused.
695                  */
696                 if ((state->rxcmd & DMSGF_CREATE) == 0) {
697                         if (msg->any.head.cmd & DMSGF_ABORT) {
698                                 error = EALREADY;
699                         } else {
700                                 kprintf("kdmsg_state_msgrx: state reused "
701                                         "for DELETE\n");
702                                 error = EINVAL;
703                         }
704                         break;
705                 }
706                 error = 0;
707                 break;
708         default:
709                 /*
710                  * Check for mid-stream ABORT command received, otherwise
711                  * allow.
712                  */
713                 if (msg->any.head.cmd & DMSGF_ABORT) {
714                         if (state == NULL ||
715                             (state->rxcmd & DMSGF_CREATE) == 0) {
716                                 error = EALREADY;
717                                 break;
718                         }
719                 }
720                 error = 0;
721                 break;
722         case DMSGF_REPLY | DMSGF_CREATE:
723         case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
724                 /*
725                  * When receiving a reply with CREATE set the original
726                  * persistent state message should already exist.
727                  */
728                 if (state == NULL) {
729                         kprintf("kdmsg_state_msgrx: no state match for "
730                                 "REPLY cmd=%08x msgid=%016jx\n",
731                                 msg->any.head.cmd,
732                                 (intmax_t)msg->any.head.msgid);
733                         error = EINVAL;
734                         break;
735                 }
736                 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
737                 error = 0;
738                 break;
739         case DMSGF_REPLY | DMSGF_DELETE:
740                 /*
741                  * Received REPLY+ABORT+DELETE in case where msgid has
742                  * already been fully closed, ignore the message.
743                  */
744                 if (state == NULL) {
745                         if (msg->any.head.cmd & DMSGF_ABORT) {
746                                 error = EALREADY;
747                         } else {
748                                 kprintf("kdmsg_state_msgrx: no state match "
749                                         "for REPLY|DELETE\n");
750                                 error = EINVAL;
751                         }
752                         break;
753                 }
754
755                 /*
756                  * Received REPLY+ABORT+DELETE in case where msgid has
757                  * already been reused for an unrelated message,
758                  * ignore the message.
759                  */
760                 if ((state->rxcmd & DMSGF_CREATE) == 0) {
761                         if (msg->any.head.cmd & DMSGF_ABORT) {
762                                 error = EALREADY;
763                         } else {
764                                 kprintf("kdmsg_state_msgrx: state reused "
765                                         "for REPLY|DELETE\n");
766                                 error = EINVAL;
767                         }
768                         break;
769                 }
770                 error = 0;
771                 break;
772         case DMSGF_REPLY:
773                 /*
774                  * Check for mid-stream ABORT reply received to sent command.
775                  */
776                 if (msg->any.head.cmd & DMSGF_ABORT) {
777                         if (state == NULL ||
778                             (state->rxcmd & DMSGF_CREATE) == 0) {
779                                 error = EALREADY;
780                                 break;
781                         }
782                 }
783                 error = 0;
784                 break;
785         }
786         lockmgr(&iocom->msglk, LK_RELEASE);
787         return (error);
788 }
789
790 void
791 kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
792 {
793         kdmsg_iocom_t *iocom;
794         kdmsg_state_t *state;
795
796         iocom = msg->router->iocom;
797
798         if ((state = msg->state) == NULL) {
799                 kdmsg_msg_free(msg);
800         } else if (msg->any.head.cmd & DMSGF_DELETE) {
801                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
802                 state->rxcmd |= DMSGF_DELETE;
803                 if (state->txcmd & DMSGF_DELETE) {
804                         if (state->msg == msg)
805                                 state->msg = NULL;
806                         KKASSERT(state->flags & KDMSG_STATE_INSERTED);
807                         if (state->rxcmd & DMSGF_REPLY) {
808                                 KKASSERT(msg->any.head.cmd &
809                                          DMSGF_REPLY);
810                                 RB_REMOVE(kdmsg_state_tree,
811                                           &iocom->statewr_tree, state);
812                         } else {
813                                 KKASSERT((msg->any.head.cmd &
814                                           DMSGF_REPLY) == 0);
815                                 RB_REMOVE(kdmsg_state_tree,
816                                           &iocom->staterd_tree, state);
817                         }
818                         state->flags &= ~KDMSG_STATE_INSERTED;
819                         lockmgr(&iocom->msglk, LK_RELEASE);
820                         kdmsg_state_free(state);
821                 } else {
822                         lockmgr(&iocom->msglk, LK_RELEASE);
823                 }
824                 kdmsg_msg_free(msg);
825         } else if (state->msg != msg) {
826                 kdmsg_msg_free(msg);
827         }
828 }
829
830 /*
831  * Process state tracking for a message prior to transmission.
832  *
833  * Called with msglk held and the msg dequeued.
834  *
835  * One-off messages are usually with dummy state and msg->state may be NULL
836  * in this situation.
837  *
838  * New transactions (when CREATE is set) will insert the state.
839  *
840  * May request that caller discard the message by setting *discardp to 1.
841  * A NULL state may be returned in this case.
842  */
843 int
844 kdmsg_state_msgtx(kdmsg_msg_t *msg)
845 {
846         kdmsg_iocom_t *iocom;
847         kdmsg_state_t *state;
848         int error;
849
850         iocom = msg->router->iocom;
851
852         /*
853          * Make sure a state structure is ready to go in case we need a new
854          * one.  This is the only routine which uses freewr_state so no
855          * races are possible.
856          */
857         if ((state = iocom->freewr_state) == NULL) {
858                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
859                 state->flags = KDMSG_STATE_DYNAMIC;
860                 iocom->freewr_state = state;
861         }
862
863         /*
864          * Lock RB tree.  If persistent state is present it will have already
865          * been assigned to msg.
866          */
867         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
868         state = msg->state;
869
870         /*
871          * Short-cut one-off or mid-stream messages (state may be NULL).
872          */
873         if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
874                                   DMSGF_ABORT)) == 0) {
875                 lockmgr(&iocom->msglk, LK_RELEASE);
876                 return(0);
877         }
878
879
880         /*
881          * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
882          * inside the case statements.
883          */
884         switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
885                                     DMSGF_REPLY)) {
886         case DMSGF_CREATE:
887         case DMSGF_CREATE | DMSGF_DELETE:
888                 /*
889                  * Insert the new persistent message state and mark
890                  * half-closed if DELETE is set.  Since this is a new
891                  * message it isn't possible to transition into the fully
892                  * closed state here.
893                  *
894                  * XXX state must be assigned and inserted by
895                  *     kdmsg_msg_write().  txcmd is assigned by us
896                  *     on-transmit.
897                  */
898                 KKASSERT(state != NULL);
899                 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
900                 state->rxcmd = DMSGF_REPLY;
901                 error = 0;
902                 break;
903         case DMSGF_DELETE:
904                 /*
905                  * Sent ABORT+DELETE in case where msgid has already
906                  * been fully closed, ignore the message.
907                  */
908                 if (state == NULL) {
909                         if (msg->any.head.cmd & DMSGF_ABORT) {
910                                 error = EALREADY;
911                         } else {
912                                 kprintf("kdmsg_state_msgtx: no state match "
913                                         "for DELETE cmd=%08x msgid=%016jx\n",
914                                         msg->any.head.cmd,
915                                         (intmax_t)msg->any.head.msgid);
916                                 error = EINVAL;
917                         }
918                         break;
919                 }
920
921                 /*
922                  * Sent ABORT+DELETE in case where msgid has
923                  * already been reused for an unrelated message,
924                  * ignore the message.
925                  */
926                 if ((state->txcmd & DMSGF_CREATE) == 0) {
927                         if (msg->any.head.cmd & DMSGF_ABORT) {
928                                 error = EALREADY;
929                         } else {
930                                 kprintf("kdmsg_state_msgtx: state reused "
931                                         "for DELETE\n");
932                                 error = EINVAL;
933                         }
934                         break;
935                 }
936                 error = 0;
937                 break;
938         default:
939                 /*
940                  * Check for mid-stream ABORT command sent
941                  */
942                 if (msg->any.head.cmd & DMSGF_ABORT) {
943                         if (state == NULL ||
944                             (state->txcmd & DMSGF_CREATE) == 0) {
945                                 error = EALREADY;
946                                 break;
947                         }
948                 }
949                 error = 0;
950                 break;
951         case DMSGF_REPLY | DMSGF_CREATE:
952         case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
953                 /*
954                  * When transmitting a reply with CREATE set the original
955                  * persistent state message should already exist.
956                  */
957                 if (state == NULL) {
958                         kprintf("kdmsg_state_msgtx: no state match "
959                                 "for REPLY | CREATE\n");
960                         error = EINVAL;
961                         break;
962                 }
963                 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
964                 error = 0;
965                 break;
966         case DMSGF_REPLY | DMSGF_DELETE:
967                 /*
968                  * When transmitting a reply with DELETE set the original
969                  * persistent state message should already exist.
970                  *
971                  * This is very similar to the REPLY|CREATE|* case except
972                  * txcmd is already stored, so we just add the DELETE flag.
973                  *
974                  * Sent REPLY+ABORT+DELETE in case where msgid has
975                  * already been fully closed, ignore the message.
976                  */
977                 if (state == NULL) {
978                         if (msg->any.head.cmd & DMSGF_ABORT) {
979                                 error = EALREADY;
980                         } else {
981                                 kprintf("kdmsg_state_msgtx: no state match "
982                                         "for REPLY | DELETE\n");
983                                 error = EINVAL;
984                         }
985                         break;
986                 }
987
988                 /*
989                  * Sent REPLY+ABORT+DELETE in case where msgid has already
990                  * been reused for an unrelated message, ignore the message.
991                  */
992                 if ((state->txcmd & DMSGF_CREATE) == 0) {
993                         if (msg->any.head.cmd & DMSGF_ABORT) {
994                                 error = EALREADY;
995                         } else {
996                                 kprintf("kdmsg_state_msgtx: state reused "
997                                         "for REPLY | DELETE\n");
998                                 error = EINVAL;
999                         }
1000                         break;
1001                 }
1002                 error = 0;
1003                 break;
1004         case DMSGF_REPLY:
1005                 /*
1006                  * Check for mid-stream ABORT reply sent.
1007                  *
1008                  * One-off REPLY messages are allowed for e.g. status updates.
1009                  */
1010                 if (msg->any.head.cmd & DMSGF_ABORT) {
1011                         if (state == NULL ||
1012                             (state->txcmd & DMSGF_CREATE) == 0) {
1013                                 error = EALREADY;
1014                                 break;
1015                         }
1016                 }
1017                 error = 0;
1018                 break;
1019         }
1020         lockmgr(&iocom->msglk, LK_RELEASE);
1021         return (error);
1022 }
1023
1024 void
1025 kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
1026 {
1027         kdmsg_iocom_t *iocom;
1028         kdmsg_state_t *state;
1029
1030         iocom = msg->router->iocom;
1031
1032         if ((state = msg->state) == NULL) {
1033                 kdmsg_msg_free(msg);
1034         } else if (msg->any.head.cmd & DMSGF_DELETE) {
1035                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1036                 state->txcmd |= DMSGF_DELETE;
1037                 if (state->rxcmd & DMSGF_DELETE) {
1038                         if (state->msg == msg)
1039                                 state->msg = NULL;
1040                         KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1041                         if (state->txcmd & DMSGF_REPLY) {
1042                                 KKASSERT(msg->any.head.cmd &
1043                                          DMSGF_REPLY);
1044                                 RB_REMOVE(kdmsg_state_tree,
1045                                           &iocom->staterd_tree, state);
1046                         } else {
1047                                 KKASSERT((msg->any.head.cmd &
1048                                           DMSGF_REPLY) == 0);
1049                                 RB_REMOVE(kdmsg_state_tree,
1050                                           &iocom->statewr_tree, state);
1051                         }
1052                         state->flags &= ~KDMSG_STATE_INSERTED;
1053                         lockmgr(&iocom->msglk, LK_RELEASE);
1054                         kdmsg_state_free(state);
1055                 } else {
1056                         lockmgr(&iocom->msglk, LK_RELEASE);
1057                 }
1058                 kdmsg_msg_free(msg);
1059         } else if (state->msg != msg) {
1060                 kdmsg_msg_free(msg);
1061         }
1062 }
1063
1064 void
1065 kdmsg_state_free(kdmsg_state_t *state)
1066 {
1067         kdmsg_iocom_t *iocom;
1068         kdmsg_msg_t *msg;
1069
1070         iocom = state->router->iocom;
1071
1072         KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
1073         msg = state->msg;
1074         state->msg = NULL;
1075         kfree(state, iocom->mmsg);
1076         if (msg)
1077                 kdmsg_msg_free(msg);
1078 }
1079
1080 kdmsg_msg_t *
1081 kdmsg_msg_alloc(kdmsg_router_t *router, uint32_t cmd,
1082                 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1083 {
1084         kdmsg_iocom_t *iocom;
1085         kdmsg_msg_t *msg;
1086         kdmsg_state_t *state;
1087         size_t hbytes;
1088
1089         iocom = router->iocom;
1090         hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1091         msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1092                       iocom->mmsg, M_WAITOK | M_ZERO);
1093         msg->hdr_size = hbytes;
1094         msg->router = router;
1095         KKASSERT(router != NULL);
1096         msg->any.head.magic = DMSG_HDR_MAGIC;
1097         msg->any.head.source = 0;
1098         msg->any.head.target = router->target;
1099         msg->any.head.cmd = cmd;
1100
1101         if (cmd & DMSGF_CREATE) {
1102                 /*
1103                  * New transaction, requires tracking state and a unique
1104                  * msgid to be allocated.
1105                  */
1106                 KKASSERT(msg->state == NULL);
1107                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1108                 state->flags = KDMSG_STATE_DYNAMIC;
1109                 state->func = func;
1110                 state->any.any = data;
1111                 state->msg = msg;
1112                 state->msgid = (uint64_t)(uintptr_t)state;
1113                 state->router = msg->router;
1114                 msg->state = state;
1115                 msg->any.head.source = 0;
1116                 msg->any.head.target = state->router->target;
1117                 msg->any.head.msgid = state->msgid;
1118
1119                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1120                 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
1121                         panic("duplicate msgid allocated");
1122                 state->flags |= KDMSG_STATE_INSERTED;
1123                 msg->any.head.msgid = state->msgid;
1124                 lockmgr(&iocom->msglk, LK_RELEASE);
1125         }
1126
1127         return (msg);
1128 }
1129
1130 void
1131 kdmsg_msg_free(kdmsg_msg_t *msg)
1132 {
1133         kdmsg_iocom_t *iocom;
1134
1135         iocom = msg->router->iocom;
1136
1137         if (msg->aux_data && msg->aux_size) {
1138                 kfree(msg->aux_data, iocom->mmsg);
1139                 msg->aux_data = NULL;
1140                 msg->aux_size = 0;
1141                 msg->router = NULL;
1142         }
1143         kfree(msg, iocom->mmsg);
1144 }
1145
1146 /*
1147  * Indexed messages are stored in a red-black tree indexed by their
1148  * msgid.  Only persistent messages are indexed.
1149  */
1150 int
1151 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
1152 {
1153         if (state1->router < state2->router)
1154                 return(-1);
1155         if (state1->router > state2->router)
1156                 return(1);
1157         if (state1->msgid < state2->msgid)
1158                 return(-1);
1159         if (state1->msgid > state2->msgid)
1160                 return(1);
1161         return(0);
1162 }
1163
1164 /*
1165  * Write a message.  All requisit command flags have been set.
1166  *
1167  * If msg->state is non-NULL the message is written to the existing
1168  * transaction.  msgid will be set accordingly.
1169  *
1170  * If msg->state is NULL and CREATE is set new state is allocated and
1171  * (func, data) is installed.  A msgid is assigned.
1172  *
1173  * If msg->state is NULL and CREATE is not set the message is assumed
1174  * to be a one-way message.  The originator must assign the msgid
1175  * (or leave it 0, which is typical.
1176  *
1177  * This function merely queues the message to the management thread, it
1178  * does not write to the message socket/pipe.
1179  */
1180 void
1181 kdmsg_msg_write(kdmsg_msg_t *msg)
1182 {
1183         kdmsg_iocom_t *iocom;
1184         kdmsg_state_t *state;
1185
1186         iocom = msg->router->iocom;
1187
1188         if (msg->state) {
1189                 /*
1190                  * Continuance or termination of existing transaction.
1191                  * The transaction could have been initiated by either end.
1192                  *
1193                  * (Function callback and aux data for the receive side can
1194                  * be replaced or left alone).
1195                  */
1196                 state = msg->state;
1197                 msg->any.head.msgid = state->msgid;
1198                 msg->any.head.source = 0;
1199                 msg->any.head.target = state->router->target;
1200                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1201         } else {
1202                 /*
1203                  * One-off message (always uses msgid 0 to distinguish
1204                  * between a possibly lost in-transaction message due to
1205                  * competing aborts and a real one-off message?)
1206                  */
1207                 msg->any.head.msgid = 0;
1208                 msg->any.head.source = 0;
1209                 msg->any.head.target = msg->router->target;
1210                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1211         }
1212
1213         /*
1214          * Finish up the msg fields
1215          */
1216         msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
1217         ++iocom->msg_seq;
1218
1219         msg->any.head.hdr_crc = 0;
1220         msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
1221
1222         TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
1223         iocom->clusterctl_wakeup(iocom);
1224         lockmgr(&iocom->msglk, LK_RELEASE);
1225 }
1226
1227 /*
1228  * Reply to a message and terminate our side of the transaction.
1229  *
1230  * If msg->state is non-NULL we are replying to a one-way message.
1231  */
1232 void
1233 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
1234 {
1235         kdmsg_state_t *state = msg->state;
1236         kdmsg_msg_t *nmsg;
1237         uint32_t cmd;
1238
1239         /*
1240          * Reply with a simple error code and terminate the transaction.
1241          */
1242         cmd = DMSG_LNK_ERROR;
1243
1244         /*
1245          * Check if our direction has even been initiated yet, set CREATE.
1246          *
1247          * Check what direction this is (command or reply direction).  Note
1248          * that txcmd might not have been initiated yet.
1249          *
1250          * If our direction has already been closed we just return without
1251          * doing anything.
1252          */
1253         if (state) {
1254                 if (state->txcmd & DMSGF_DELETE)
1255                         return;
1256                 if ((state->txcmd & DMSGF_CREATE) == 0)
1257                         cmd |= DMSGF_CREATE;
1258                 if (state->txcmd & DMSGF_REPLY)
1259                         cmd |= DMSGF_REPLY;
1260                 cmd |= DMSGF_DELETE;
1261         } else {
1262                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1263                         cmd |= DMSGF_REPLY;
1264         }
1265         kprintf("MSG_REPLY state=%p msg %08x\n", state, cmd);
1266
1267         /* XXX messy mask cmd to avoid allocating state */
1268         nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1269                                NULL, NULL);
1270         nmsg->any.head.cmd = cmd;
1271         nmsg->any.head.error = error;
1272         nmsg->state = state;
1273         kdmsg_msg_write(nmsg);
1274 }
1275
1276 /*
1277  * Reply to a message and continue our side of the transaction.
1278  *
1279  * If msg->state is non-NULL we are replying to a one-way message and this
1280  * function degenerates into the same as kdmsg_msg_reply().
1281  */
1282 void
1283 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
1284 {
1285         kdmsg_state_t *state = msg->state;
1286         kdmsg_msg_t *nmsg;
1287         uint32_t cmd;
1288
1289         /*
1290          * Return a simple result code, do NOT terminate the transaction.
1291          */
1292         cmd = DMSG_LNK_ERROR;
1293
1294         /*
1295          * Check if our direction has even been initiated yet, set CREATE.
1296          *
1297          * Check what direction this is (command or reply direction).  Note
1298          * that txcmd might not have been initiated yet.
1299          *
1300          * If our direction has already been closed we just return without
1301          * doing anything.
1302          */
1303         if (state) {
1304                 if (state->txcmd & DMSGF_DELETE)
1305                         return;
1306                 if ((state->txcmd & DMSGF_CREATE) == 0)
1307                         cmd |= DMSGF_CREATE;
1308                 if (state->txcmd & DMSGF_REPLY)
1309                         cmd |= DMSGF_REPLY;
1310                 /* continuing transaction, do not set MSGF_DELETE */
1311         } else {
1312                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1313                         cmd |= DMSGF_REPLY;
1314         }
1315
1316         /* XXX messy mask cmd to avoid allocating state */
1317         nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1318                                NULL, NULL);
1319         nmsg->any.head.cmd = cmd;
1320         nmsg->any.head.error = error;
1321         nmsg->state = state;
1322         kdmsg_msg_write(nmsg);
1323 }