kern - More work replacing %D
[dragonfly.git] / sys / kern / kern_dmsg.c
1 /*-
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * TODO: txcmd CREATE state is deferred by txmsgq, need to calculate
36  *       a streaming response.  See subr_diskiocom()'s diskiodone().
37  */
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/kernel.h>
41 #include <sys/conf.h>
42 #include <sys/systm.h>
43 #include <sys/queue.h>
44 #include <sys/tree.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/socket.h>
48 #include <sys/vnode.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/priv.h>
52 #include <sys/thread.h>
53 #include <sys/globaldata.h>
54 #include <sys/limits.h>
55
56 #include <sys/dmsg.h>
57
58 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
59 RB_GENERATE(kdmsg_circuit_tree, kdmsg_circuit, rbnode, kdmsg_circuit_cmp);
60
61 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg);
62 static int kdmsg_circ_msgrx(kdmsg_msg_t *msg);
63 static int kdmsg_state_msgrx(kdmsg_msg_t *msg);
64 static int kdmsg_state_msgtx(kdmsg_msg_t *msg);
65 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg);
66 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg);
67 static void kdmsg_state_abort(kdmsg_state_t *state);
68 static void kdmsg_state_free(kdmsg_state_t *state);
69
70 static void kdmsg_iocom_thread_rd(void *arg);
71 static void kdmsg_iocom_thread_wr(void *arg);
72 static int kdmsg_autorxmsg(kdmsg_msg_t *msg);
73 static void kdmsg_autocirc(kdmsg_msg_t *msg);
74 static int kdmsg_autocirc_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
75
76 static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);
77
78 void
79 kdmsg_circ_hold(kdmsg_circuit_t *circ)
80 {
81         atomic_add_int(&circ->refs, 1);
82 }
83
84 void
85 kdmsg_circ_drop(kdmsg_circuit_t *circ)
86 {
87         kdmsg_iocom_t *iocom;
88
89         if (atomic_fetchadd_int(&circ->refs, -1) == 1) {
90                 KKASSERT(circ->span_state == NULL &&
91                          circ->circ_state == NULL &&
92                          circ->rcirc_state == NULL &&
93                          circ->recorded == 0);
94                 iocom = circ->iocom;
95                 circ->iocom = NULL;
96                 kfree(circ, iocom->mmsg);
97         }
98 }
99
100
101 /*
102  * Initialize the roll-up communications structure for a network
103  * messaging session.  This function does not install the socket.
104  */
105 void
106 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags,
107                  struct malloc_type *mmsg,
108                  int (*rcvmsg)(kdmsg_msg_t *msg))
109 {
110         bzero(iocom, sizeof(*iocom));
111         iocom->handle = handle;
112         iocom->mmsg = mmsg;
113         iocom->rcvmsg = rcvmsg;
114         iocom->flags = flags;
115         lockinit(&iocom->msglk, "h2msg", 0, 0);
116         TAILQ_INIT(&iocom->msgq);
117         RB_INIT(&iocom->circ_tree);
118         RB_INIT(&iocom->staterd_tree);
119         RB_INIT(&iocom->statewr_tree);
120 }
121
122 /*
123  * [Re]connect using the passed file pointer.  The caller must ref the
124  * fp for us.  We own that ref now.
125  */
126 void
127 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
128                       const char *subsysname)
129 {
130         /*
131          * Destroy the current connection
132          */
133         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
134         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
135         while (iocom->msgrd_td || iocom->msgwr_td) {
136                 wakeup(&iocom->msg_ctl);
137                 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz);
138         }
139
140         /*
141          * Drop communications descriptor
142          */
143         if (iocom->msg_fp) {
144                 fdrop(iocom->msg_fp);
145                 iocom->msg_fp = NULL;
146         }
147
148         /*
149          * Setup new communications descriptor
150          */
151         iocom->msg_ctl = 0;
152         iocom->msg_fp = fp;
153         iocom->msg_seq = 0;
154         iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC;
155
156         lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
157                     NULL, 0, -1, "%s-msgrd", subsysname);
158         lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
159                     NULL, 0, -1, "%s-msgwr", subsysname);
160         lockmgr(&iocom->msglk, LK_RELEASE);
161 }
162
163 /*
164  * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls
165  * this function to handle the state machine for LNK_CONN and LNK_SPAN.
166  *
167  * NOTE: Caller typically also sets the IOCOMF_AUTOCONN, IOCOMF_AUTOSPAN,
168  *       and IOCOMF_AUTOCIRC in the kdmsg_iocom_init() call.  Clients
169  *       typically set IOCOMF_AUTOFORGE to automatically forged circuits
170  *       for received SPANs.
171  */
172 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
173 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
174
175 void
176 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom,
177                          void (*auto_callback)(kdmsg_msg_t *msg))
178 {
179         kdmsg_msg_t *msg;
180
181         iocom->auto_callback = auto_callback;
182
183         msg = kdmsg_msg_alloc(iocom, NULL,
184                               DMSG_LNK_CONN | DMSGF_CREATE,
185                               kdmsg_lnk_conn_reply, NULL);
186         iocom->auto_lnk_conn.head = msg->any.head;
187         msg->any.lnk_conn = iocom->auto_lnk_conn;
188         iocom->conn_state = msg->state;
189         kdmsg_msg_write(msg);
190 }
191
192 static
193 int
194 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
195 {
196         kdmsg_iocom_t *iocom = state->iocom;
197         kdmsg_msg_t *rmsg;
198
199         if (msg->any.head.cmd & DMSGF_CREATE) {
200                 rmsg = kdmsg_msg_alloc(iocom, NULL,
201                                        DMSG_LNK_SPAN | DMSGF_CREATE,
202                                        kdmsg_lnk_span_reply, NULL);
203                 iocom->auto_lnk_span.head = rmsg->any.head;
204                 rmsg->any.lnk_span = iocom->auto_lnk_span;
205                 kdmsg_msg_write(rmsg);
206         }
207
208         /*
209          * Process shim after the CONN is acknowledged and before the CONN
210          * transaction is deleted.  For deletions this gives device drivers
211          * the ability to interlock new operations on the circuit before
212          * it becomes illegal and panics.
213          */
214         if (iocom->auto_callback)
215                 iocom->auto_callback(msg);
216
217         if ((state->txcmd & DMSGF_DELETE) == 0 &&
218             (msg->any.head.cmd & DMSGF_DELETE)) {
219                 iocom->conn_state = NULL;
220                 kdmsg_msg_reply(msg, 0);
221         }
222
223         return (0);
224 }
225
226 static
227 int
228 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
229 {
230         /*
231          * Be sure to process shim before terminating the SPAN
232          * transaction.  Gives device drivers the ability to
233          * interlock new operations on the circuit before it
234          * becomes illegal and panics.
235          */
236         if (state->iocom->auto_callback)
237                 state->iocom->auto_callback(msg);
238
239         if ((state->txcmd & DMSGF_DELETE) == 0 &&
240             (msg->any.head.cmd & DMSGF_DELETE)) {
241                 kdmsg_msg_reply(msg, 0);
242         }
243         return (0);
244 }
245
246 /*
247  * Disconnect and clean up
248  */
249 void
250 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom)
251 {
252         /*
253          * Ask the cluster controller to go away
254          */
255         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
256         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
257
258         while (iocom->msgrd_td || iocom->msgwr_td) {
259                 wakeup(&iocom->msg_ctl);
260                 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz);
261         }
262
263         /*
264          * Drop communications descriptor
265          */
266         if (iocom->msg_fp) {
267                 fdrop(iocom->msg_fp);
268                 iocom->msg_fp = NULL;
269         }
270         lockmgr(&iocom->msglk, LK_RELEASE);
271 }
272
273 /*
274  * Cluster controller thread.  Perform messaging functions.  We have one
275  * thread for the reader and one for the writer.  The writer handles
276  * shutdown requests (which should break the reader thread).
277  */
278 static
279 void
280 kdmsg_iocom_thread_rd(void *arg)
281 {
282         kdmsg_iocom_t *iocom = arg;
283         dmsg_hdr_t hdr;
284         kdmsg_msg_t *msg = NULL;
285         kdmsg_state_t *state;
286         size_t hbytes;
287         size_t abytes;
288         int error = 0;
289
290         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
291                 /*
292                  * Retrieve the message from the pipe or socket.
293                  */
294                 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
295                                 NULL, 1, UIO_SYSSPACE);
296                 if (error)
297                         break;
298                 if (hdr.magic != DMSG_HDR_MAGIC) {
299                         kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
300                         error = EINVAL;
301                         break;
302                 }
303                 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
304                 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
305                         kprintf("kdmsg: bad header size %zd\n", hbytes);
306                         error = EINVAL;
307                         break;
308                 }
309                 /* XXX messy: mask cmd to avoid allocating state */
310                 msg = kdmsg_msg_alloc(iocom, NULL,
311                                       hdr.cmd & DMSGF_BASECMDMASK,
312                                       NULL, NULL);
313                 msg->any.head = hdr;
314                 msg->hdr_size = hbytes;
315                 if (hbytes > sizeof(hdr)) {
316                         error = fp_read(iocom->msg_fp, &msg->any.head + 1,
317                                         hbytes - sizeof(hdr),
318                                         NULL, 1, UIO_SYSSPACE);
319                         if (error) {
320                                 kprintf("kdmsg: short msg received\n");
321                                 error = EINVAL;
322                                 break;
323                         }
324                 }
325                 msg->aux_size = hdr.aux_bytes;
326                 if (msg->aux_size > DMSG_AUX_MAX) {
327                         kprintf("kdmsg: illegal msg payload size %zd\n",
328                                 msg->aux_size);
329                         error = EINVAL;
330                         break;
331                 }
332                 if (msg->aux_size) {
333                         abytes = DMSG_DOALIGN(msg->aux_size);
334                         msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK);
335                         msg->flags |= KDMSG_FLAG_AUXALLOC;
336                         error = fp_read(iocom->msg_fp, msg->aux_data,
337                                         abytes, NULL, 1, UIO_SYSSPACE);
338                         if (error) {
339                                 kprintf("kdmsg: short msg payload received\n");
340                                 break;
341                         }
342                 }
343
344                 (void)kdmsg_circ_msgrx(msg);
345                 error = kdmsg_msg_receive_handling(msg);
346                 msg = NULL;
347         }
348
349         if (error)
350                 kprintf("kdmsg: read failed error %d\n", error);
351
352         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
353         if (msg)
354                 kdmsg_msg_free(msg);
355
356         if ((state = iocom->freerd_state) != NULL) {
357                 iocom->freerd_state = NULL;
358                 kdmsg_state_free(state);
359         }
360
361         /*
362          * Shutdown the socket before waiting for the transmit side.
363          *
364          * If we are dying due to e.g. a socket disconnect verses being
365          * killed explicity we have to set KILL in order to kick the tx
366          * side when it might not have any other work to do.  KILL might
367          * already be set if we are in an unmount or reconnect.
368          */
369         fp_shutdown(iocom->msg_fp, SHUT_RDWR);
370
371         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
372         wakeup(&iocom->msg_ctl);
373
374         /*
375          * Wait for the transmit side to drain remaining messages
376          * before cleaning up the rx state.  The transmit side will
377          * set KILLTX and wait for the rx side to completely finish
378          * (set msgrd_td to NULL) before cleaning up any remaining
379          * tx states.
380          */
381         lockmgr(&iocom->msglk, LK_RELEASE);
382         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
383         wakeup(&iocom->msg_ctl);
384         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
385                 wakeup(&iocom->msg_ctl);
386                 tsleep(iocom, 0, "clstrkw", hz);
387         }
388
389         iocom->msgrd_td = NULL;
390
391         /*
392          * iocom can be ripped out from under us at this point but
393          * wakeup() is safe.
394          */
395         wakeup(iocom);
396         lwkt_exit();
397 }
398
399 static
400 void
401 kdmsg_iocom_thread_wr(void *arg)
402 {
403         kdmsg_iocom_t *iocom = arg;
404         kdmsg_msg_t *msg;
405         kdmsg_state_t *state;
406         ssize_t res;
407         size_t abytes;
408         int error = 0;
409         int retries = 20;
410
411         /*
412          * Transmit loop
413          */
414         msg = NULL;
415         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
416
417         while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
418                 /*
419                  * Sleep if no messages pending.  Interlock with flag while
420                  * holding msglk.
421                  */
422                 if (TAILQ_EMPTY(&iocom->msgq)) {
423                         atomic_set_int(&iocom->msg_ctl,
424                                        KDMSG_CLUSTERCTL_SLEEPING);
425                         lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
426                         atomic_clear_int(&iocom->msg_ctl,
427                                          KDMSG_CLUSTERCTL_SLEEPING);
428                 }
429
430                 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
431                         /*
432                          * Remove msg from the transmit queue and do
433                          * persist and half-closed state handling.
434                          */
435                         TAILQ_REMOVE(&iocom->msgq, msg, qentry);
436                         lockmgr(&iocom->msglk, LK_RELEASE);
437
438                         error = kdmsg_state_msgtx(msg);
439                         if (error == EALREADY) {
440                                 error = 0;
441                                 kdmsg_msg_free(msg);
442                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
443                                 continue;
444                         }
445                         if (error) {
446                                 kdmsg_msg_free(msg);
447                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
448                                 break;
449                         }
450
451                         /*
452                          * Dump the message to the pipe or socket.
453                          *
454                          * We have to clean up the message as if the transmit
455                          * succeeded even if it failed.
456                          */
457                         error = fp_write(iocom->msg_fp, &msg->any,
458                                          msg->hdr_size, &res, UIO_SYSSPACE);
459                         if (error || res != msg->hdr_size) {
460                                 if (error == 0)
461                                         error = EINVAL;
462                                 kdmsg_state_cleanuptx(msg);
463                                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
464                                 break;
465                         }
466                         if (msg->aux_size) {
467                                 abytes = DMSG_DOALIGN(msg->aux_size);
468                                 error = fp_write(iocom->msg_fp,
469                                                  msg->aux_data, abytes,
470                                                  &res, UIO_SYSSPACE);
471                                 if (error || res != abytes) {
472                                         if (error == 0)
473                                                 error = EINVAL;
474                                         kdmsg_state_cleanuptx(msg);
475                                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
476                                         break;
477                                 }
478                         }
479                         kdmsg_state_cleanuptx(msg);
480                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
481                 }
482         }
483
484         /*
485          * Cleanup messages pending transmission and release msgq lock.
486          */
487         if (error)
488                 kprintf("kdmsg: write failed error %d\n", error);
489         kprintf("thread_wr: Terminating iocom\n");
490
491         /*
492          * Shutdown the socket.  This will cause the rx thread to get an
493          * EOF and ensure that both threads get to a termination state.
494          */
495         fp_shutdown(iocom->msg_fp, SHUT_RDWR);
496
497         /*
498          * Set KILLTX (which the rx side waits for), then wait for the RX
499          * side to completely finish before we clean out any remaining
500          * command states.
501          */
502         lockmgr(&iocom->msglk, LK_RELEASE);
503         atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
504         wakeup(&iocom->msg_ctl);
505         while (iocom->msgrd_td) {
506                 wakeup(&iocom->msg_ctl);
507                 tsleep(iocom, 0, "clstrkw", hz);
508         }
509         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
510
511         /*
512          * Simulate received MSGF_DELETE's for any remaining states.
513          * (For remote masters).
514          *
515          * Drain the message queue to handle any device initiated writes
516          * due to state callbacks.
517          */
518 cleanuprd:
519         kdmsg_drain_msgq(iocom);
520         RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
521                 if ((state->rxcmd & DMSGF_DELETE) == 0) {
522                         lockmgr(&iocom->msglk, LK_RELEASE);
523                         kdmsg_state_abort(state);
524                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
525                         goto cleanuprd;
526                 }
527         }
528
529         /*
530          * Simulate received MSGF_DELETE's for any remaining states.
531          * (For local masters).
532          */
533 cleanupwr:
534         kdmsg_drain_msgq(iocom);
535         RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
536                 if ((state->rxcmd & DMSGF_DELETE) == 0) {
537                         lockmgr(&iocom->msglk, LK_RELEASE);
538                         kdmsg_state_abort(state);
539                         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
540                         goto cleanupwr;
541                 }
542         }
543
544         /*
545          * Retry until all work is done
546          */
547         if (--retries == 0)
548                 panic("kdmsg: comm thread shutdown couldn't drain");
549         if (TAILQ_FIRST(&iocom->msgq) ||
550             RB_ROOT(&iocom->staterd_tree) ||
551             RB_ROOT(&iocom->statewr_tree)) {
552                 goto cleanuprd;
553         }
554         iocom->flags |= KDMSG_IOCOMF_EXITNOACC;
555
556         if ((state = iocom->freewr_state) != NULL) {
557                 iocom->freewr_state = NULL;
558                 kdmsg_state_free(state);
559         }
560
561         lockmgr(&iocom->msglk, LK_RELEASE);
562
563         /*
564          * The state trees had better be empty now
565          */
566         KKASSERT(RB_EMPTY(&iocom->staterd_tree));
567         KKASSERT(RB_EMPTY(&iocom->statewr_tree));
568         KKASSERT(iocom->conn_state == NULL);
569
570         if (iocom->exit_func) {
571                 /*
572                  * iocom is invalid after we call the exit function.
573                  */
574                 iocom->msgwr_td = NULL;
575                 iocom->exit_func(iocom);
576         } else {
577                 /*
578                  * iocom can be ripped out from under us once msgwr_td is
579                  * set to NULL.  The wakeup is safe.
580                  */
581                 iocom->msgwr_td = NULL;
582                 wakeup(iocom);
583         }
584         lwkt_exit();
585 }
586
587 /*
588  * This cleans out the pending transmit message queue, adjusting any
589  * persistent states properly in the process.
590  *
591  * Caller must hold pmp->iocom.msglk
592  */
593 void
594 kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
595 {
596         kdmsg_msg_t *msg;
597
598         /*
599          * Clean out our pending transmit queue, executing the
600          * appropriate state adjustments.  If this tries to open
601          * any new outgoing transactions we have to loop up and
602          * clean them out.
603          */
604         while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
605                 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
606                 lockmgr(&iocom->msglk, LK_RELEASE);
607                 if (kdmsg_state_msgtx(msg))
608                         kdmsg_msg_free(msg);
609                 else
610                         kdmsg_state_cleanuptx(msg);
611                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
612         }
613 }
614
615 /*
616  * Do all processing required to handle a freshly received message
617  * after its low level header has been validated.
618  */
619 static
620 int
621 kdmsg_msg_receive_handling(kdmsg_msg_t *msg)
622 {
623         kdmsg_iocom_t *iocom = msg->iocom;
624         int error;
625
626         /*
627          * State machine tracking, state assignment for msg,
628          * returns error and discard status.  Errors are fatal
629          * to the connection except for EALREADY which forces
630          * a discard without execution.
631          */
632         error = kdmsg_state_msgrx(msg);
633         if (error) {
634                 /*
635                  * Raw protocol or connection error
636                  */
637                 kdmsg_msg_free(msg);
638                 if (error == EALREADY)
639                         error = 0;
640         } else if (msg->state && msg->state->func) {
641                 /*
642                  * Message related to state which already has a
643                  * handling function installed for it.
644                  */
645                 error = msg->state->func(msg->state, msg);
646                 kdmsg_state_cleanuprx(msg);
647         } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) {
648                 error = kdmsg_autorxmsg(msg);
649                 kdmsg_state_cleanuprx(msg);
650         } else {
651                 error = iocom->rcvmsg(msg);
652                 kdmsg_state_cleanuprx(msg);
653         }
654         return error;
655 }
656
657 /*
658  * Process circuit tracking (NEEDS WORK)
659  */
660 static
661 int
662 kdmsg_circ_msgrx(kdmsg_msg_t *msg)
663 {
664         kdmsg_circuit_t dummy;
665         kdmsg_circuit_t *circ;
666         int error = 0;
667
668         if (msg->any.head.circuit) {
669                 dummy.msgid = msg->any.head.circuit;
670                 lwkt_gettoken(&kdmsg_token);
671                 circ = RB_FIND(kdmsg_circuit_tree, &msg->iocom->circ_tree,
672                                &dummy);
673                 if (circ) {
674                         msg->circ = circ;
675                         kdmsg_circ_hold(circ);
676                 }
677                 if (circ == NULL) {
678                         kprintf("KDMSG_CIRC_MSGRX CMD %08x: IOCOM %p "
679                                 "Bad circuit %016jx\n",
680                                 msg->any.head.cmd,
681                                 msg->iocom,
682                                 (intmax_t)msg->any.head.circuit);
683                         kprintf("KDMSG_CIRC_MSGRX: Avail circuits: ");
684                         RB_FOREACH(circ, kdmsg_circuit_tree,
685                                    &msg->iocom->circ_tree) {
686                                 kprintf(" %016jx", (intmax_t)circ->msgid);
687                         }
688                         kprintf("\n");
689                         error = EINVAL;
690                 }
691                 lwkt_reltoken(&kdmsg_token);
692         }
693         return (error);
694 }
695
696 /*
697  * Process state tracking for a message after reception, prior to
698  * execution.
699  *
700  * Called with msglk held and the msg dequeued.
701  *
702  * All messages are called with dummy state and return actual state.
703  * (One-off messages often just return the same dummy state).
704  *
705  * May request that caller discard the message by setting *discardp to 1.
706  * The returned state is not used in this case and is allowed to be NULL.
707  *
708  * --
709  *
710  * These routines handle persistent and command/reply message state via the
711  * CREATE and DELETE flags.  The first message in a command or reply sequence
712  * sets CREATE, the last message in a command or reply sequence sets DELETE.
713  *
714  * There can be any number of intermediate messages belonging to the same
715  * sequence sent inbetween the CREATE message and the DELETE message,
716  * which set neither flag.  This represents a streaming command or reply.
717  *
718  * Any command message received with CREATE set expects a reply sequence to
719  * be returned.  Reply sequences work the same as command sequences except the
720  * REPLY bit is also sent.  Both the command side and reply side can
721  * degenerate into a single message with both CREATE and DELETE set.  Note
722  * that one side can be streaming and the other side not, or neither, or both.
723  *
724  * The msgid is unique for the initiator.  That is, two sides sending a new
725  * message can use the same msgid without colliding.
726  *
727  * --
728  *
729  * ABORT sequences work by setting the ABORT flag along with normal message
730  * state.  However, ABORTs can also be sent on half-closed messages, that is
731  * even if the command or reply side has already sent a DELETE, as long as
732  * the message has not been fully closed it can still send an ABORT+DELETE
733  * to terminate the half-closed message state.
734  *
735  * Since ABORT+DELETEs can race we silently discard ABORT's for message
736  * state which has already been fully closed.  REPLY+ABORT+DELETEs can
737  * also race, and in this situation the other side might have already
738  * initiated a new unrelated command with the same message id.  Since
739  * the abort has not set the CREATE flag the situation can be detected
740  * and the message will also be discarded.
741  *
742  * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
743  * The ABORT request is essentially integrated into the command instead
744  * of being sent later on.  In this situation the command implementation
745  * detects that CREATE and ABORT are both set (vs ABORT alone) and can
746  * special-case non-blocking operation for the command.
747  *
748  * NOTE!  Messages with ABORT set without CREATE or DELETE are considered
749  *        to be mid-stream aborts for command/reply sequences.  ABORTs on
750  *        one-way messages are not supported.
751  *
752  * NOTE!  If a command sequence does not support aborts the ABORT flag is
753  *        simply ignored.
754  *
755  * --
756  *
757  * One-off messages (no reply expected) are sent with neither CREATE or DELETE
758  * set.  One-off messages cannot be aborted and typically aren't processed
759  * by these routines.  The REPLY bit can be used to distinguish whether a
760  * one-off message is a command or reply.  For example, one-off replies
761  * will typically just contain status updates.
762  */
763 static
764 int
765 kdmsg_state_msgrx(kdmsg_msg_t *msg)
766 {
767         kdmsg_iocom_t *iocom = msg->iocom;
768         kdmsg_state_t *state;
769         int error;
770
771         /*
772          * Make sure a state structure is ready to go in case we need a new
773          * one.  This is the only routine which uses freerd_state so no
774          * races are possible.
775          */
776         if ((state = iocom->freerd_state) == NULL) {
777                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
778                 state->flags = KDMSG_STATE_DYNAMIC;
779                 iocom->freerd_state = state;
780         }
781
782         /*
783          * Lock RB tree and locate existing persistent state, if any.
784          *
785          * If received msg is a command state is on staterd_tree.
786          * If received msg is a reply state is on statewr_tree.
787          */
788         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
789
790         state->msgid = msg->any.head.msgid;
791         state->circ = msg->circ;
792         state->iocom = iocom;
793         if (msg->any.head.cmd & DMSGF_REPLY)
794                 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
795         else
796                 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
797         msg->state = state;
798
799         /*
800          * Short-cut one-off or mid-stream messages (state may be NULL).
801          */
802         if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
803                                   DMSGF_ABORT)) == 0) {
804                 lockmgr(&iocom->msglk, LK_RELEASE);
805                 return(0);
806         }
807
808         /*
809          * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
810          * inside the case statements.
811          */
812         switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) {
813         case DMSGF_CREATE:
814         case DMSGF_CREATE | DMSGF_DELETE:
815                 /*
816                  * New persistant command received.
817                  */
818                 if (state) {
819                         kprintf("kdmsg_state_msgrx: duplicate transaction\n");
820                         error = EINVAL;
821                         break;
822                 }
823                 state = iocom->freerd_state;
824                 iocom->freerd_state = NULL;
825                 msg->state = state;
826                 state->msg = msg;
827                 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK;
828                 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
829                 state->txcmd = DMSGF_REPLY;
830                 state->msgid = msg->any.head.msgid;
831                 if ((state->circ = msg->circ) != NULL)
832                         kdmsg_circ_hold(state->circ);
833                 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
834                 state->flags |= KDMSG_STATE_INSERTED;
835                 error = 0;
836                 break;
837         case DMSGF_DELETE:
838                 /*
839                  * Persistent state is expected but might not exist if an
840                  * ABORT+DELETE races the close.
841                  */
842                 if (state == NULL) {
843                         if (msg->any.head.cmd & DMSGF_ABORT) {
844                                 error = EALREADY;
845                         } else {
846                                 kprintf("kdmsg_state_msgrx: "
847                                         "no state for DELETE\n");
848                                 error = EINVAL;
849                         }
850                         break;
851                 }
852
853                 /*
854                  * Handle another ABORT+DELETE case if the msgid has already
855                  * been reused.
856                  */
857                 if ((state->rxcmd & DMSGF_CREATE) == 0) {
858                         if (msg->any.head.cmd & DMSGF_ABORT) {
859                                 error = EALREADY;
860                         } else {
861                                 kprintf("kdmsg_state_msgrx: "
862                                         "state reused for DELETE\n");
863                                 error = EINVAL;
864                         }
865                         break;
866                 }
867                 error = 0;
868                 break;
869         default:
870                 /*
871                  * Check for mid-stream ABORT command received, otherwise
872                  * allow.
873                  */
874                 if (msg->any.head.cmd & DMSGF_ABORT) {
875                         if (state == NULL ||
876                             (state->rxcmd & DMSGF_CREATE) == 0) {
877                                 error = EALREADY;
878                                 break;
879                         }
880                 }
881                 error = 0;
882                 break;
883         case DMSGF_REPLY | DMSGF_CREATE:
884         case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
885                 /*
886                  * When receiving a reply with CREATE set the original
887                  * persistent state message should already exist.
888                  */
889                 if (state == NULL) {
890                         kprintf("kdmsg_state_msgrx: no state match for "
891                                 "REPLY cmd=%08x msgid=%016jx\n",
892                                 msg->any.head.cmd,
893                                 (intmax_t)msg->any.head.msgid);
894                         error = EINVAL;
895                         break;
896                 }
897                 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
898                 error = 0;
899                 break;
900         case DMSGF_REPLY | DMSGF_DELETE:
901                 /*
902                  * Received REPLY+ABORT+DELETE in case where msgid has
903                  * already been fully closed, ignore the message.
904                  */
905                 if (state == NULL) {
906                         if (msg->any.head.cmd & DMSGF_ABORT) {
907                                 error = EALREADY;
908                         } else {
909                                 kprintf("kdmsg_state_msgrx: no state match "
910                                         "for REPLY|DELETE\n");
911                                 error = EINVAL;
912                         }
913                         break;
914                 }
915
916                 /*
917                  * Received REPLY+ABORT+DELETE in case where msgid has
918                  * already been reused for an unrelated message,
919                  * ignore the message.
920                  */
921                 if ((state->rxcmd & DMSGF_CREATE) == 0) {
922                         if (msg->any.head.cmd & DMSGF_ABORT) {
923                                 error = EALREADY;
924                         } else {
925                                 kprintf("kdmsg_state_msgrx: state reused "
926                                         "for REPLY|DELETE\n");
927                                 error = EINVAL;
928                         }
929                         break;
930                 }
931                 error = 0;
932                 break;
933         case DMSGF_REPLY:
934                 /*
935                  * Check for mid-stream ABORT reply received to sent command.
936                  */
937                 if (msg->any.head.cmd & DMSGF_ABORT) {
938                         if (state == NULL ||
939                             (state->rxcmd & DMSGF_CREATE) == 0) {
940                                 error = EALREADY;
941                                 break;
942                         }
943                 }
944                 error = 0;
945                 break;
946         }
947         lockmgr(&iocom->msglk, LK_RELEASE);
948         return (error);
949 }
950
951 /*
952  * Called instead of iocom->rcvmsg() if any of the AUTO flags are set.
953  * This routine must call iocom->rcvmsg() for anything not automatically
954  * handled.
955  */
956 static int
957 kdmsg_autorxmsg(kdmsg_msg_t *msg)
958 {
959         kdmsg_iocom_t *iocom = msg->iocom;
960         kdmsg_circuit_t *circ;
961         int error = 0;
962         uint32_t cmd;
963
964         /*
965          * Process a combination of the transaction command and the message
966          * flags.  For the purposes of this routine, the message command is
967          * only relevant when it initiates a transaction (where it is
968          * recorded in icmd).
969          */
970         cmd = (msg->state ? msg->state->icmd : msg->any.head.cmd) &
971               DMSGF_BASECMDMASK;
972         cmd |= msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY);
973
974         switch(cmd) {
975         case DMSG_LNK_CONN | DMSGF_CREATE:
976         case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE:
977                 /*
978                  * Received LNK_CONN transaction.  Transmit response and
979                  * leave transaction open, which allows the other end to
980                  * start to the SPAN protocol.
981                  *
982                  * Handle shim after acknowledging the CONN.
983                  */
984                 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
985                         if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) {
986                                 kdmsg_msg_result(msg, 0);
987                                 if (iocom->auto_callback)
988                                         iocom->auto_callback(msg);
989                         } else {
990                                 error = iocom->rcvmsg(msg);
991                         }
992                         break;
993                 }
994                 /* fall through */
995         case DMSG_LNK_CONN | DMSGF_DELETE:
996                 /*
997                  * This message is usually simulated after a link is lost
998                  * to clean up the transaction.
999                  */
1000                 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) {
1001                         if (iocom->auto_callback)
1002                                 iocom->auto_callback(msg);
1003                         kdmsg_msg_reply(msg, 0);
1004                 } else {
1005                         error = iocom->rcvmsg(msg);
1006                 }
1007                 break;
1008         case DMSG_LNK_SPAN | DMSGF_CREATE:
1009         case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE:
1010                 /*
1011                  * Received LNK_SPAN transaction.  We do not have to respond
1012                  * but we must leave the transaction open.
1013                  *
1014                  * If AUTOCIRC is set automatically initiate a virtual circuit
1015                  * to the received span.  This will attach a kdmsg_circuit
1016                  * to the SPAN state.  The circuit is lost when the span is
1017                  * lost.
1018                  *
1019                  * Handle shim after acknowledging the SPAN.
1020                  */
1021                 if (iocom->flags & KDMSG_IOCOMF_AUTOSPAN) {
1022                         if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
1023                                 if (iocom->flags & KDMSG_IOCOMF_AUTOFORGE)
1024                                         kdmsg_autocirc(msg);
1025                                 if (iocom->auto_callback)
1026                                         iocom->auto_callback(msg);
1027                                 break;
1028                         }
1029                         /* fall through */
1030                 } else {
1031                         error = iocom->rcvmsg(msg);
1032                         break;
1033                 }
1034                 /* fall through */
1035         case DMSG_LNK_SPAN | DMSGF_DELETE:
1036                 /*
1037                  * Process shims (auto_callback) before cleaning up the
1038                  * circuit structure and closing the transactions.  Device
1039                  * driver should ensure that the circuit is not used after
1040                  * the auto_callback() returns.
1041                  *
1042                  * Handle shim before closing the SPAN transaction.
1043                  */
1044                 if (iocom->flags & KDMSG_IOCOMF_AUTOSPAN) {
1045                         if (iocom->auto_callback)
1046                                 iocom->auto_callback(msg);
1047                         if (iocom->flags & KDMSG_IOCOMF_AUTOFORGE)
1048                                 kdmsg_autocirc(msg);
1049                         kdmsg_msg_reply(msg, 0);
1050                 } else {
1051                         error = iocom->rcvmsg(msg);
1052                 }
1053                 break;
1054         case DMSG_LNK_CIRC | DMSGF_CREATE:
1055         case DMSG_LNK_CIRC | DMSGF_CREATE | DMSGF_DELETE:
1056                 /*
1057                  * Received LNK_CIRC transaction.  We must respond and should
1058                  * leave the transaction open, allowing the circuit.  The
1059                  * remote can start issuing commands to us over the circuit
1060                  * even before we respond.
1061                  */
1062                 if (iocom->flags & KDMSG_IOCOMF_AUTOCIRC) {
1063                         if ((msg->any.head.cmd & DMSGF_DELETE) == 0) {
1064                                 circ = kmalloc(sizeof(*circ), iocom->mmsg,
1065                                                M_WAITOK | M_ZERO);
1066                                 lwkt_gettoken(&kdmsg_token);
1067                                 msg->state->any.circ = circ;
1068                                 circ->iocom = iocom;
1069                                 circ->rcirc_state = msg->state;
1070                                 kdmsg_circ_hold(circ);  /* for rcirc_state */
1071                                 circ->weight = 0;
1072                                 circ->msgid = circ->rcirc_state->msgid;
1073                                 /* XXX no span link for received circuits */
1074                                 kdmsg_circ_hold(circ);  /* for circ_state */
1075
1076                                 if (RB_INSERT(kdmsg_circuit_tree,
1077                                               &iocom->circ_tree, circ)) {
1078                                         panic("duplicate circuitid allocated");
1079                                 }
1080                                 lwkt_reltoken(&kdmsg_token);
1081                                 kdmsg_msg_result(msg, 0);
1082
1083                                 /*
1084                                  * Handle shim after adding the circuit and
1085                                  * after acknowledging the CIRC.
1086                                  */
1087                                 if (iocom->auto_callback)
1088                                         iocom->auto_callback(msg);
1089                                 break;
1090                         }
1091                         /* fall through */
1092                 } else {
1093                         error = iocom->rcvmsg(msg);
1094                         break;
1095                 }
1096                 /* fall through */
1097         case DMSG_LNK_CIRC | DMSGF_DELETE:
1098                 if (iocom->flags & KDMSG_IOCOMF_AUTOCIRC) {
1099                         circ = msg->state->any.circ;
1100                         if (circ == NULL)
1101                                 break;
1102
1103                         /*
1104                          * Handle shim before terminating the circuit.
1105                          */
1106 #if 0
1107                         kprintf("KDMSG VC: RECEIVE CIRC DELETE "
1108                                 "IOCOM %p MSGID %016jx\n",
1109                                 msg->iocom, circ->msgid);
1110 #endif
1111                         if (iocom->auto_callback)
1112                                 iocom->auto_callback(msg);
1113
1114                         KKASSERT(circ->rcirc_state == msg->state);
1115                         lwkt_gettoken(&kdmsg_token);
1116                         circ->rcirc_state = NULL;
1117                         msg->state->any.circ = NULL;
1118                         RB_REMOVE(kdmsg_circuit_tree, &iocom->circ_tree, circ);
1119                         lwkt_reltoken(&kdmsg_token);
1120                         kdmsg_circ_drop(circ);  /* for rcirc_state */
1121                         kdmsg_msg_reply(msg, 0);
1122                 } else {
1123                         error = iocom->rcvmsg(msg);
1124                 }
1125                 break;
1126         default:
1127                 /*
1128                  * Anything unhandled goes into rcvmsg.
1129                  *
1130                  * NOTE: Replies to link-level messages initiated by our side
1131                  *       are handled by the state callback, they are NOT
1132                  *       handled here.
1133                  */
1134                 error = iocom->rcvmsg(msg);
1135                 break;
1136         }
1137         return (error);
1138 }
1139
1140 /*
1141  * Handle automatic forging of virtual circuits based on received SPANs.
1142  * (AUTOFORGE).  Note that other code handles tracking received circuit
1143  * transactions (AUTOCIRC).
1144  *
1145  * We can ignore non-transactions here.  Use trans->icmd to test the
1146  * transactional command (once past the CREATE the individual message
1147  * commands are not usually the icmd).
1148  *
1149  * XXX locks
1150  */
1151 static
1152 void
1153 kdmsg_autocirc(kdmsg_msg_t *msg)
1154 {
1155         kdmsg_iocom_t *iocom = msg->iocom;
1156         kdmsg_circuit_t *circ;
1157         kdmsg_msg_t *xmsg;      /* CIRC */
1158
1159         if (msg->state == NULL)
1160                 return;
1161
1162         /*
1163          * Gaining the SPAN, automatically forge a circuit to the target.
1164          *
1165          * NOTE!! The shim is not executed until we receive an acknowlegement
1166          *        to our forged LNK_CIRC (see kdmsg_autocirc_reply()).
1167          */
1168         if (msg->state->icmd == DMSG_LNK_SPAN &&
1169             (msg->any.head.cmd & DMSGF_CREATE)) {
1170                 circ = kmalloc(sizeof(*circ), iocom->mmsg, M_WAITOK | M_ZERO);
1171                 lwkt_gettoken(&kdmsg_token);
1172                 msg->state->any.circ = circ;
1173                 circ->iocom = iocom;
1174                 circ->span_state = msg->state;
1175                 kdmsg_circ_hold(circ);  /* for span_state */
1176                 xmsg = kdmsg_msg_alloc(iocom, NULL,
1177                                        DMSG_LNK_CIRC | DMSGF_CREATE,
1178                                        kdmsg_autocirc_reply, circ);
1179                 circ->circ_state = xmsg->state;
1180                 circ->weight = msg->any.lnk_span.dist;
1181                 circ->msgid = circ->circ_state->msgid;
1182                 kdmsg_circ_hold(circ);  /* for circ_state */
1183 #if 0
1184                 kprintf("KDMSG VC: CREATE SPAN->CIRC IOCOM %p MSGID %016jx\n",
1185                         msg->iocom, circ->msgid);
1186 #endif
1187
1188                 if (RB_INSERT(kdmsg_circuit_tree, &iocom->circ_tree, circ))
1189                         panic("duplicate circuitid allocated");
1190                 lwkt_reltoken(&kdmsg_token);
1191
1192                 xmsg->any.lnk_circ.target = msg->any.head.msgid;
1193                 kdmsg_msg_write(xmsg);
1194         }
1195
1196         /*
1197          * Losing the SPAN
1198          *
1199          * NOTE: When losing a SPAN, any circuits using the span should be
1200          *       deleted by the remote end first.  XXX might not be ordered
1201          *       on actual loss of connection.
1202          */
1203         if (msg->state->icmd == DMSG_LNK_SPAN &&
1204             (msg->any.head.cmd & DMSGF_DELETE) &&
1205             msg->state->any.circ) {
1206                 circ = msg->state->any.circ;
1207                 lwkt_gettoken(&kdmsg_token);
1208                 circ->span_state = NULL;
1209                 msg->state->any.circ = NULL;
1210                 RB_REMOVE(kdmsg_circuit_tree, &iocom->circ_tree, circ);
1211 #if 0
1212                 kprintf("KDMSG VC: DELETE SPAN->CIRC IOCOM %p MSGID %016jx\n",
1213                         msg->iocom, (intmax_t)circ->msgid);
1214 #endif
1215                 kdmsg_circ_drop(circ);  /* for span_state */
1216                 lwkt_reltoken(&kdmsg_token);
1217         }
1218 }
1219
1220 static
1221 int
1222 kdmsg_autocirc_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
1223 {
1224         kdmsg_iocom_t *iocom = state->iocom;
1225         kdmsg_circuit_t *circ = state->any.circ;
1226
1227         /*
1228          * Call shim after receiving an acknowlegement to our forged
1229          * circuit and before processing a received termination.
1230          */
1231         if (iocom->auto_callback)
1232                 iocom->auto_callback(msg);
1233
1234         /*
1235          * If the remote is terminating the VC we terminate our side
1236          */
1237         if ((state->txcmd & DMSGF_DELETE) == 0 &&
1238             (msg->any.head.cmd & DMSGF_DELETE)) {
1239 #if 0
1240                 kprintf("KDMSG VC: DELETE CIRC FROM REMOTE\n");
1241 #endif
1242                 lwkt_gettoken(&kdmsg_token);
1243                 circ->circ_state = NULL;
1244                 state->any.circ = NULL;
1245                 kdmsg_circ_drop(circ);          /* for circ_state */
1246                 lwkt_reltoken(&kdmsg_token);
1247                 kdmsg_msg_reply(msg, 0);
1248         }
1249         return (0);
1250 }
1251
1252 /*
1253  * Post-receive-handling message and state cleanup.  This routine is called
1254  * after the state function handling/callback to properly dispose of the
1255  * message and update or dispose of the state.
1256  */
1257 static
1258 void
1259 kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
1260 {
1261         kdmsg_iocom_t *iocom = msg->iocom;
1262         kdmsg_state_t *state;
1263
1264         if ((state = msg->state) == NULL) {
1265                 kdmsg_msg_free(msg);
1266         } else if (msg->any.head.cmd & DMSGF_DELETE) {
1267                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1268                 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0);
1269                 state->rxcmd |= DMSGF_DELETE;
1270                 if (state->txcmd & DMSGF_DELETE) {
1271                         KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1272                         if (state->rxcmd & DMSGF_REPLY) {
1273                                 KKASSERT(msg->any.head.cmd &
1274                                          DMSGF_REPLY);
1275                                 RB_REMOVE(kdmsg_state_tree,
1276                                           &iocom->statewr_tree, state);
1277                         } else {
1278                                 KKASSERT((msg->any.head.cmd &
1279                                           DMSGF_REPLY) == 0);
1280                                 RB_REMOVE(kdmsg_state_tree,
1281                                           &iocom->staterd_tree, state);
1282                         }
1283                         state->flags &= ~KDMSG_STATE_INSERTED;
1284                         if (msg != state->msg)
1285                                 kdmsg_msg_free(msg);
1286                         lockmgr(&iocom->msglk, LK_RELEASE);
1287                         kdmsg_state_free(state);
1288                 } else {
1289                         if (msg != state->msg)
1290                                 kdmsg_msg_free(msg);
1291                         lockmgr(&iocom->msglk, LK_RELEASE);
1292                 }
1293         } else if (msg != state->msg) {
1294                 kdmsg_msg_free(msg);
1295         }
1296 }
1297
1298 /*
1299  * Simulate receiving a message which terminates an active transaction
1300  * state.  Our simulated received message must set DELETE and may also
1301  * have to set CREATE.  It must also ensure that all fields are set such
1302  * that the receive handling code can find the state (kdmsg_state_msgrx())
1303  * or an endless loop will ensue.
1304  *
1305  * This is used when the other end of the link or virtual circuit is dead
1306  * so the device driver gets a completed transaction for all pending states.
1307  */
1308 static
1309 void
1310 kdmsg_state_abort(kdmsg_state_t *state)
1311 {
1312         kdmsg_iocom_t *iocom = state->iocom;
1313         kdmsg_msg_t *msg;
1314
1315         /*
1316          * Prevent recursive aborts which could otherwise occur if the
1317          * simulated message reception runs state->func which then turns
1318          * around and tries to reply to a broken circuit when then calls
1319          * the state abort code again.
1320          */
1321         if (state->flags & KDMSG_STATE_ABORTING)
1322                 return;
1323         state->flags |= KDMSG_STATE_ABORTING;
1324
1325         /*
1326          * Simulatem essage reception
1327          */
1328         msg = kdmsg_msg_alloc(iocom, state->circ,
1329                               DMSG_LNK_ERROR,
1330                               NULL, NULL);
1331         if ((state->rxcmd & DMSGF_CREATE) == 0)
1332                 msg->any.head.cmd |= DMSGF_CREATE;
1333         msg->any.head.cmd |= DMSGF_DELETE | (state->rxcmd & DMSGF_REPLY);
1334         msg->any.head.error = DMSG_ERR_LOSTLINK;
1335         msg->any.head.msgid = state->msgid;
1336         msg->state = state;
1337         kdmsg_msg_receive_handling(msg);
1338 }
1339
1340 /*
1341  * Process state tracking for a message prior to transmission.
1342  *
1343  * Called with msglk held and the msg dequeued.  Returns non-zero if
1344  * the message is bad and should be deleted by the caller.
1345  *
1346  * One-off messages are usually with dummy state and msg->state may be NULL
1347  * in this situation.
1348  *
1349  * New transactions (when CREATE is set) will insert the state.
1350  *
1351  * May request that caller discard the message by setting *discardp to 1.
1352  * A NULL state may be returned in this case.
1353  */
1354 static
1355 int
1356 kdmsg_state_msgtx(kdmsg_msg_t *msg)
1357 {
1358         kdmsg_iocom_t *iocom = msg->iocom;
1359         kdmsg_state_t *state;
1360         int error;
1361
1362         /*
1363          * Make sure a state structure is ready to go in case we need a new
1364          * one.  This is the only routine which uses freewr_state so no
1365          * races are possible.
1366          */
1367         if ((state = iocom->freewr_state) == NULL) {
1368                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1369                 state->flags = KDMSG_STATE_DYNAMIC;
1370                 state->iocom = iocom;
1371                 iocom->freewr_state = state;
1372         }
1373
1374         /*
1375          * Lock RB tree.  If persistent state is present it will have already
1376          * been assigned to msg.
1377          */
1378         lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1379         state = msg->state;
1380
1381         /*
1382          * Short-cut one-off or mid-stream messages (state may be NULL).
1383          */
1384         if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
1385                                   DMSGF_ABORT)) == 0) {
1386                 lockmgr(&iocom->msglk, LK_RELEASE);
1387                 return(0);
1388         }
1389
1390
1391         /*
1392          * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
1393          * inside the case statements.
1394          */
1395         switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
1396                                     DMSGF_REPLY)) {
1397         case DMSGF_CREATE:
1398         case DMSGF_CREATE | DMSGF_DELETE:
1399                 /*
1400                  * Insert the new persistent message state and mark
1401                  * half-closed if DELETE is set.  Since this is a new
1402                  * message it isn't possible to transition into the fully
1403                  * closed state here.
1404                  *
1405                  * XXX state must be assigned and inserted by
1406                  *     kdmsg_msg_write().  txcmd is assigned by us
1407                  *     on-transmit.
1408                  */
1409                 KKASSERT(state != NULL);
1410                 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK;
1411                 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
1412                 state->rxcmd = DMSGF_REPLY;
1413                 error = 0;
1414                 break;
1415         case DMSGF_DELETE:
1416                 /*
1417                  * Sent ABORT+DELETE in case where msgid has already
1418                  * been fully closed, ignore the message.
1419                  */
1420                 if (state == NULL) {
1421                         if (msg->any.head.cmd & DMSGF_ABORT) {
1422                                 error = EALREADY;
1423                         } else {
1424                                 kprintf("kdmsg_state_msgtx: no state match "
1425                                         "for DELETE cmd=%08x msgid=%016jx\n",
1426                                         msg->any.head.cmd,
1427                                         (intmax_t)msg->any.head.msgid);
1428                                 error = EINVAL;
1429                         }
1430                         break;
1431                 }
1432
1433                 /*
1434                  * Sent ABORT+DELETE in case where msgid has
1435                  * already been reused for an unrelated message,
1436                  * ignore the message.
1437                  */
1438                 if ((state->txcmd & DMSGF_CREATE) == 0) {
1439                         if (msg->any.head.cmd & DMSGF_ABORT) {
1440                                 error = EALREADY;
1441                         } else {
1442                                 kprintf("kdmsg_state_msgtx: state reused "
1443                                         "for DELETE\n");
1444                                 error = EINVAL;
1445                         }
1446                         break;
1447                 }
1448                 error = 0;
1449                 break;
1450         default:
1451                 /*
1452                  * Check for mid-stream ABORT command sent
1453                  */
1454                 if (msg->any.head.cmd & DMSGF_ABORT) {
1455                         if (state == NULL ||
1456                             (state->txcmd & DMSGF_CREATE) == 0) {
1457                                 error = EALREADY;
1458                                 break;
1459                         }
1460                 }
1461                 error = 0;
1462                 break;
1463         case DMSGF_REPLY | DMSGF_CREATE:
1464         case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
1465                 /*
1466                  * When transmitting a reply with CREATE set the original
1467                  * persistent state message should already exist.
1468                  */
1469                 if (state == NULL) {
1470                         kprintf("kdmsg_state_msgtx: no state match "
1471                                 "for REPLY | CREATE\n");
1472                         error = EINVAL;
1473                         break;
1474                 }
1475                 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
1476                 error = 0;
1477                 break;
1478         case DMSGF_REPLY | DMSGF_DELETE:
1479                 /*
1480                  * When transmitting a reply with DELETE set the original
1481                  * persistent state message should already exist.
1482                  *
1483                  * This is very similar to the REPLY|CREATE|* case except
1484                  * txcmd is already stored, so we just add the DELETE flag.
1485                  *
1486                  * Sent REPLY+ABORT+DELETE in case where msgid has
1487                  * already been fully closed, ignore the message.
1488                  */
1489                 if (state == NULL) {
1490                         if (msg->any.head.cmd & DMSGF_ABORT) {
1491                                 error = EALREADY;
1492                         } else {
1493                                 kprintf("kdmsg_state_msgtx: no state match "
1494                                         "for REPLY | DELETE\n");
1495                                 error = EINVAL;
1496                         }
1497                         break;
1498                 }
1499
1500                 /*
1501                  * Sent REPLY+ABORT+DELETE in case where msgid has already
1502                  * been reused for an unrelated message, ignore the message.
1503                  */
1504                 if ((state->txcmd & DMSGF_CREATE) == 0) {
1505                         if (msg->any.head.cmd & DMSGF_ABORT) {
1506                                 error = EALREADY;
1507                         } else {
1508                                 kprintf("kdmsg_state_msgtx: state reused "
1509                                         "for REPLY | DELETE\n");
1510                                 error = EINVAL;
1511                         }
1512                         break;
1513                 }
1514                 error = 0;
1515                 break;
1516         case DMSGF_REPLY:
1517                 /*
1518                  * Check for mid-stream ABORT reply sent.
1519                  *
1520                  * One-off REPLY messages are allowed for e.g. status updates.
1521                  */
1522                 if (msg->any.head.cmd & DMSGF_ABORT) {
1523                         if (state == NULL ||
1524                             (state->txcmd & DMSGF_CREATE) == 0) {
1525                                 error = EALREADY;
1526                                 break;
1527                         }
1528                 }
1529                 error = 0;
1530                 break;
1531         }
1532         lockmgr(&iocom->msglk, LK_RELEASE);
1533         return (error);
1534 }
1535
1536 static
1537 void
1538 kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
1539 {
1540         kdmsg_iocom_t *iocom = msg->iocom;
1541         kdmsg_state_t *state;
1542
1543         if ((state = msg->state) == NULL) {
1544                 kdmsg_msg_free(msg);
1545         } else if (msg->any.head.cmd & DMSGF_DELETE) {
1546                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1547                 KKASSERT((state->txcmd & DMSGF_DELETE) == 0);
1548                 state->txcmd |= DMSGF_DELETE;
1549                 if (state->rxcmd & DMSGF_DELETE) {
1550                         KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1551                         if (state->txcmd & DMSGF_REPLY) {
1552                                 KKASSERT(msg->any.head.cmd &
1553                                          DMSGF_REPLY);
1554                                 RB_REMOVE(kdmsg_state_tree,
1555                                           &iocom->staterd_tree, state);
1556                         } else {
1557                                 KKASSERT((msg->any.head.cmd &
1558                                           DMSGF_REPLY) == 0);
1559                                 RB_REMOVE(kdmsg_state_tree,
1560                                           &iocom->statewr_tree, state);
1561                         }
1562                         state->flags &= ~KDMSG_STATE_INSERTED;
1563                         if (msg != state->msg)
1564                                 kdmsg_msg_free(msg);
1565                         lockmgr(&iocom->msglk, LK_RELEASE);
1566                         kdmsg_state_free(state);
1567                 } else {
1568                         if (msg != state->msg)
1569                                 kdmsg_msg_free(msg);
1570                         lockmgr(&iocom->msglk, LK_RELEASE);
1571                 }
1572         } else if (msg != state->msg) {
1573                 kdmsg_msg_free(msg);
1574         }
1575 }
1576
1577 static
1578 void
1579 kdmsg_state_free(kdmsg_state_t *state)
1580 {
1581         kdmsg_iocom_t *iocom = state->iocom;
1582         kdmsg_msg_t *msg;
1583
1584         KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
1585         msg = state->msg;
1586         state->msg = NULL;
1587         kfree(state, iocom->mmsg);
1588         if (msg) {
1589                 msg->state = NULL;
1590                 kdmsg_msg_free(msg);
1591         }
1592 }
1593
1594 kdmsg_msg_t *
1595 kdmsg_msg_alloc(kdmsg_iocom_t *iocom, kdmsg_circuit_t *circ, uint32_t cmd,
1596                 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1597 {
1598         kdmsg_msg_t *msg;
1599         kdmsg_state_t *state;
1600         size_t hbytes;
1601
1602         KKASSERT(iocom != NULL);
1603         hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1604         msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1605                       iocom->mmsg, M_WAITOK | M_ZERO);
1606         msg->hdr_size = hbytes;
1607         msg->iocom = iocom;
1608         msg->any.head.magic = DMSG_HDR_MAGIC;
1609         msg->any.head.cmd = cmd;
1610         if (circ) {
1611                 kdmsg_circ_hold(circ);
1612                 msg->circ = circ;
1613                 msg->any.head.circuit = circ->msgid;
1614         }
1615
1616         if (cmd & DMSGF_CREATE) {
1617                 /*
1618                  * New transaction, requires tracking state and a unique
1619                  * msgid to be allocated.
1620                  */
1621                 KKASSERT(msg->state == NULL);
1622                 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1623                 state->flags = KDMSG_STATE_DYNAMIC;
1624                 state->func = func;
1625                 state->any.any = data;
1626                 state->msg = msg;
1627                 state->msgid = (uint64_t)(uintptr_t)state;
1628                 state->circ = circ;
1629                 state->iocom = iocom;
1630                 msg->state = state;
1631                 if (circ)
1632                         kdmsg_circ_hold(circ);
1633                 /*msg->any.head.msgid = state->msgid;XXX*/
1634
1635                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1636                 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
1637                         panic("duplicate msgid allocated");
1638                 state->flags |= KDMSG_STATE_INSERTED;
1639                 msg->any.head.msgid = state->msgid;
1640                 lockmgr(&iocom->msglk, LK_RELEASE);
1641         }
1642         return (msg);
1643 }
1644
1645 kdmsg_msg_t *
1646 kdmsg_msg_alloc_state(kdmsg_state_t *state, uint32_t cmd,
1647                       int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1648 {
1649         kdmsg_iocom_t *iocom = state->iocom;
1650         kdmsg_msg_t *msg;
1651         size_t hbytes;
1652
1653         KKASSERT(iocom != NULL);
1654         hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1655         msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1656                       iocom->mmsg, M_WAITOK | M_ZERO);
1657         msg->hdr_size = hbytes;
1658         msg->iocom = iocom;
1659         msg->any.head.magic = DMSG_HDR_MAGIC;
1660         msg->any.head.cmd = cmd;
1661         msg->state = state;
1662         if (state->circ) {
1663                 kdmsg_circ_hold(state->circ);
1664                 msg->circ = state->circ;
1665                 msg->any.head.circuit = state->circ->msgid;
1666         }
1667         return(msg);
1668 }
1669
1670 void
1671 kdmsg_msg_free(kdmsg_msg_t *msg)
1672 {
1673         kdmsg_iocom_t *iocom = msg->iocom;
1674
1675         if ((msg->flags & KDMSG_FLAG_AUXALLOC) &&
1676             msg->aux_data && msg->aux_size) {
1677                 kfree(msg->aux_data, iocom->mmsg);
1678                 msg->flags &= ~KDMSG_FLAG_AUXALLOC;
1679         }
1680         if (msg->circ) {
1681                 kdmsg_circ_drop(msg->circ);
1682                 msg->circ = NULL;
1683         }
1684         if (msg->state) {
1685                 if (msg->state->msg == msg)
1686                         msg->state->msg = NULL;
1687                 msg->state = NULL;
1688         }
1689         msg->aux_data = NULL;
1690         msg->aux_size = 0;
1691         msg->iocom = NULL;
1692         kfree(msg, iocom->mmsg);
1693 }
1694
1695 /*
1696  * Circuits are tracked in a red-black tree by their circuit id (msgid).
1697  */
1698 int
1699 kdmsg_circuit_cmp(kdmsg_circuit_t *circ1, kdmsg_circuit_t *circ2)
1700 {
1701         if (circ1->msgid < circ2->msgid)
1702                 return(-1);
1703         if (circ1->msgid > circ2->msgid)
1704                 return(1);
1705         return (0);
1706 }
1707
1708 /*
1709  * Indexed messages are stored in a red-black tree indexed by their
1710  * msgid.  Only persistent messages are indexed.
1711  */
1712 int
1713 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
1714 {
1715         if (state1->iocom < state2->iocom)
1716                 return(-1);
1717         if (state1->iocom > state2->iocom)
1718                 return(1);
1719         if (state1->circ < state2->circ)
1720                 return(-1);
1721         if (state1->circ > state2->circ)
1722                 return(1);
1723         if (state1->msgid < state2->msgid)
1724                 return(-1);
1725         if (state1->msgid > state2->msgid)
1726                 return(1);
1727         return(0);
1728 }
1729
1730 /*
1731  * Write a message.  All requisit command flags have been set.
1732  *
1733  * If msg->state is non-NULL the message is written to the existing
1734  * transaction.  msgid will be set accordingly.
1735  *
1736  * If msg->state is NULL and CREATE is set new state is allocated and
1737  * (func, data) is installed.  A msgid is assigned.
1738  *
1739  * If msg->state is NULL and CREATE is not set the message is assumed
1740  * to be a one-way message.  The originator must assign the msgid
1741  * (or leave it 0, which is typical.
1742  *
1743  * This function merely queues the message to the management thread, it
1744  * does not write to the message socket/pipe.
1745  */
1746 void
1747 kdmsg_msg_write(kdmsg_msg_t *msg)
1748 {
1749         kdmsg_iocom_t *iocom = msg->iocom;
1750         kdmsg_state_t *state;
1751
1752         if (msg->state) {
1753                 /*
1754                  * Continuance or termination of existing transaction.
1755                  * The transaction could have been initiated by either end.
1756                  *
1757                  * (Function callback and aux data for the receive side can
1758                  * be replaced or left alone).
1759                  */
1760                 state = msg->state;
1761                 msg->any.head.msgid = state->msgid;
1762                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1763         } else {
1764                 /*
1765                  * One-off message (always uses msgid 0 to distinguish
1766                  * between a possibly lost in-transaction message due to
1767                  * competing aborts and a real one-off message?)
1768                  */
1769                 state = NULL;
1770                 msg->any.head.msgid = 0;
1771                 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1772         }
1773
1774         /*
1775          * With AUTOCIRC and AUTOFORGE it is possible for the circuit to
1776          * get ripped out in the rxthread while some other thread is
1777          * holding a ref on it inbetween allocating and sending a dmsg.
1778          */
1779         if (msg->circ && msg->circ->rcirc_state == NULL &&
1780             (msg->circ->span_state == NULL || msg->circ->circ_state == NULL)) {
1781                 kprintf("kdmsg_msg_write: Attempt to write message to "
1782                         "terminated circuit: msg %08x\n", msg->any.head.cmd);
1783                 lockmgr(&iocom->msglk, LK_RELEASE);
1784                 if (kdmsg_state_msgtx(msg)) {
1785                         if (state == NULL || msg != state->msg)
1786                                 kdmsg_msg_free(msg);
1787                 } else if ((msg->state->rxcmd & DMSGF_DELETE) == 0) {
1788                         /* XXX SMP races simulating a response here */
1789                         kdmsg_state_t *state = msg->state;
1790                         kdmsg_state_cleanuptx(msg);
1791                         kdmsg_state_abort(state);
1792                 } else {
1793                         kdmsg_state_cleanuptx(msg);
1794                 }
1795                 return;
1796         }
1797
1798         /*
1799          * This flag is not set until after the tx thread has drained
1800          * the txmsgq and simulated responses.  After that point the
1801          * txthread is dead and can no longer simulate responses.
1802          *
1803          * Device drivers should never try to send a message once this
1804          * flag is set.  They should have detected (through the state
1805          * closures) that the link is in trouble.
1806          */
1807         if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) {
1808                 lockmgr(&iocom->msglk, LK_RELEASE);
1809                 panic("kdmsg_msg_write: Attempt to write message to "
1810                       "terminated iocom\n");
1811         }
1812
1813         /*
1814          * Finish up the msg fields.  Note that msg->aux_size and the
1815          * aux_bytes stored in the message header represent the unaligned
1816          * (actual) bytes of data, but the buffer is sized to an aligned
1817          * size and the CRC is generated over the aligned length.
1818          */
1819         msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
1820         ++iocom->msg_seq;
1821
1822         if (msg->aux_data && msg->aux_size) {
1823                 uint32_t abytes = DMSG_DOALIGN(msg->aux_size);
1824
1825                 msg->any.head.aux_bytes = msg->aux_size;
1826                 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes);
1827         }
1828         msg->any.head.hdr_crc = 0;
1829         msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
1830
1831         TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
1832
1833         if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
1834                 atomic_clear_int(&iocom->msg_ctl,
1835                                  KDMSG_CLUSTERCTL_SLEEPING);
1836                 wakeup(&iocom->msg_ctl);
1837         }
1838
1839         lockmgr(&iocom->msglk, LK_RELEASE);
1840 }
1841
1842 /*
1843  * Reply to a message and terminate our side of the transaction.
1844  *
1845  * If msg->state is non-NULL we are replying to a one-way message.
1846  */
1847 void
1848 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
1849 {
1850         kdmsg_state_t *state = msg->state;
1851         kdmsg_msg_t *nmsg;
1852         uint32_t cmd;
1853
1854         /*
1855          * Reply with a simple error code and terminate the transaction.
1856          */
1857         cmd = DMSG_LNK_ERROR;
1858
1859         /*
1860          * Check if our direction has even been initiated yet, set CREATE.
1861          *
1862          * Check what direction this is (command or reply direction).  Note
1863          * that txcmd might not have been initiated yet.
1864          *
1865          * If our direction has already been closed we just return without
1866          * doing anything.
1867          */
1868         if (state) {
1869                 if (state->txcmd & DMSGF_DELETE)
1870                         return;
1871                 if ((state->txcmd & DMSGF_CREATE) == 0)
1872                         cmd |= DMSGF_CREATE;
1873                 if (state->txcmd & DMSGF_REPLY)
1874                         cmd |= DMSGF_REPLY;
1875                 cmd |= DMSGF_DELETE;
1876         } else {
1877                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1878                         cmd |= DMSGF_REPLY;
1879         }
1880
1881         /* XXX messy mask cmd to avoid allocating state */
1882         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
1883         nmsg->any.head.error = error;
1884         kdmsg_msg_write(nmsg);
1885 }
1886
1887 /*
1888  * Reply to a message and continue our side of the transaction.
1889  *
1890  * If msg->state is non-NULL we are replying to a one-way message and this
1891  * function degenerates into the same as kdmsg_msg_reply().
1892  */
1893 void
1894 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
1895 {
1896         kdmsg_state_t *state = msg->state;
1897         kdmsg_msg_t *nmsg;
1898         uint32_t cmd;
1899
1900         /*
1901          * Return a simple result code, do NOT terminate the transaction.
1902          */
1903         cmd = DMSG_LNK_ERROR;
1904
1905         /*
1906          * Check if our direction has even been initiated yet, set CREATE.
1907          *
1908          * Check what direction this is (command or reply direction).  Note
1909          * that txcmd might not have been initiated yet.
1910          *
1911          * If our direction has already been closed we just return without
1912          * doing anything.
1913          */
1914         if (state) {
1915                 if (state->txcmd & DMSGF_DELETE)
1916                         return;
1917                 if ((state->txcmd & DMSGF_CREATE) == 0)
1918                         cmd |= DMSGF_CREATE;
1919                 if (state->txcmd & DMSGF_REPLY)
1920                         cmd |= DMSGF_REPLY;
1921                 /* continuing transaction, do not set MSGF_DELETE */
1922         } else {
1923                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1924                         cmd |= DMSGF_REPLY;
1925         }
1926
1927         /* XXX messy mask cmd to avoid allocating state */
1928         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
1929         nmsg->any.head.error = error;
1930         kdmsg_msg_write(nmsg);
1931 }
1932
1933 /*
1934  * Reply to a message and terminate our side of the transaction.
1935  *
1936  * If msg->state is non-NULL we are replying to a one-way message.
1937  */
1938 void
1939 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error)
1940 {
1941         kdmsg_msg_t *nmsg;
1942         uint32_t cmd;
1943
1944         /*
1945          * Reply with a simple error code and terminate the transaction.
1946          */
1947         cmd = DMSG_LNK_ERROR;
1948
1949         /*
1950          * Check if our direction has even been initiated yet, set CREATE.
1951          *
1952          * Check what direction this is (command or reply direction).  Note
1953          * that txcmd might not have been initiated yet.
1954          *
1955          * If our direction has already been closed we just return without
1956          * doing anything.
1957          */
1958         if (state) {
1959                 if (state->txcmd & DMSGF_DELETE)
1960                         return;
1961                 if ((state->txcmd & DMSGF_CREATE) == 0)
1962                         cmd |= DMSGF_CREATE;
1963                 if (state->txcmd & DMSGF_REPLY)
1964                         cmd |= DMSGF_REPLY;
1965                 cmd |= DMSGF_DELETE;
1966         } else {
1967                 if ((state->txcmd & DMSGF_REPLY) == 0)
1968                         cmd |= DMSGF_REPLY;
1969         }
1970
1971         /* XXX messy mask cmd to avoid allocating state */
1972         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
1973         nmsg->any.head.error = error;
1974         kdmsg_msg_write(nmsg);
1975 }
1976
1977 /*
1978  * Reply to a message and continue our side of the transaction.
1979  *
1980  * If msg->state is non-NULL we are replying to a one-way message and this
1981  * function degenerates into the same as kdmsg_msg_reply().
1982  */
1983 void
1984 kdmsg_state_result(kdmsg_state_t *state, uint32_t error)
1985 {
1986         kdmsg_msg_t *nmsg;
1987         uint32_t cmd;
1988
1989         /*
1990          * Return a simple result code, do NOT terminate the transaction.
1991          */
1992         cmd = DMSG_LNK_ERROR;
1993
1994         /*
1995          * Check if our direction has even been initiated yet, set CREATE.
1996          *
1997          * Check what direction this is (command or reply direction).  Note
1998          * that txcmd might not have been initiated yet.
1999          *
2000          * If our direction has already been closed we just return without
2001          * doing anything.
2002          */
2003         if (state) {
2004                 if (state->txcmd & DMSGF_DELETE)
2005                         return;
2006                 if ((state->txcmd & DMSGF_CREATE) == 0)
2007                         cmd |= DMSGF_CREATE;
2008                 if (state->txcmd & DMSGF_REPLY)
2009                         cmd |= DMSGF_REPLY;
2010                 /* continuing transaction, do not set MSGF_DELETE */
2011         } else {
2012                 if ((state->txcmd & DMSGF_REPLY) == 0)
2013                         cmd |= DMSGF_REPLY;
2014         }
2015
2016         /* XXX messy mask cmd to avoid allocating state */
2017         nmsg = kdmsg_msg_alloc_state(state, cmd, NULL, NULL);
2018         nmsg->any.head.error = error;
2019         kdmsg_msg_write(nmsg);
2020 }