hammer2 - dmsg blockdev work
[dragonfly.git] / sys / kern / kern_dmsg.c
CommitLineData
26bf1a36
MD
1/*-
2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
3a5aa68f
MD
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/kernel.h>
37#include <sys/conf.h>
38#include <sys/systm.h>
39#include <sys/queue.h>
40#include <sys/tree.h>
41#include <sys/malloc.h>
42#include <sys/mount.h>
43#include <sys/socket.h>
44#include <sys/vnode.h>
45#include <sys/file.h>
46#include <sys/proc.h>
47#include <sys/priv.h>
48#include <sys/thread.h>
49#include <sys/globaldata.h>
50#include <sys/limits.h>
26bf1a36 51
3a5aa68f 52#include <sys/dmsg.h>
26bf1a36 53
3a5aa68f
MD
54RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
55
56static void kdmsg_iocom_thread_rd(void *arg);
57static void kdmsg_iocom_thread_wr(void *arg);
58
59/*
60 * Initialize the roll-up communications structure for a network
61 * messaging session. This function does not install the socket.
62 */
63void
64kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle,
65 struct malloc_type *mmsg,
3a5aa68f
MD
66 int (*lnk_rcvmsg)(kdmsg_msg_t *msg),
67 int (*dbg_rcvmsg)(kdmsg_msg_t *msg),
68 int (*misc_rcvmsg)(kdmsg_msg_t *msg))
69{
70 bzero(iocom, sizeof(*iocom));
71 iocom->handle = handle;
72 iocom->mmsg = mmsg;
3a5aa68f
MD
73 iocom->lnk_rcvmsg = lnk_rcvmsg;
74 iocom->dbg_rcvmsg = dbg_rcvmsg;
75 iocom->misc_rcvmsg = misc_rcvmsg;
76 iocom->router.iocom = iocom;
77 lockinit(&iocom->msglk, "h2msg", 0, 0);
78 TAILQ_INIT(&iocom->msgq);
79 RB_INIT(&iocom->staterd_tree);
80 RB_INIT(&iocom->statewr_tree);
81}
82
83/*
84 * [Re]connect using the passed file pointer. The caller must ref the
85 * fp for us. We own that ref now.
86 */
87void
88kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
89 const char *subsysname)
90{
91 /*
92 * Destroy the current connection
93 */
94 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
95 while (iocom->msgrd_td || iocom->msgwr_td) {
96 wakeup(&iocom->msg_ctl);
97 tsleep(iocom, 0, "clstrkl", hz);
98 }
99
100 /*
101 * Drop communications descriptor
102 */
103 if (iocom->msg_fp) {
104 fdrop(iocom->msg_fp);
105 iocom->msg_fp = NULL;
106 }
107 kprintf("RESTART CONNECTION\n");
108
109 /*
110 * Setup new communications descriptor
111 */
112 iocom->msg_ctl = 0;
113 iocom->msg_fp = fp;
114 iocom->msg_seq = 0;
115
116 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
117 NULL, 0, -1, "%s-msgrd", subsysname);
118 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
119 NULL, 0, -1, "%s-msgwr", subsysname);
120}
121
185ace93
MD
122/*
123 * Disconnect and clean up
124 */
125void
126kdmsg_iocom_uninit(kdmsg_iocom_t *iocom)
127{
128 /*
129 * Ask the cluster controller to go away
130 */
131 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
132
133 while (iocom->msgrd_td || iocom->msgwr_td) {
134 wakeup(&iocom->msg_ctl);
135 tsleep(iocom, 0, "clstrkl", hz);
136 }
137
138 /*
139 * Drop communications descriptor
140 */
141 if (iocom->msg_fp) {
142 fdrop(iocom->msg_fp);
143 iocom->msg_fp = NULL;
144 }
145}
146
3a5aa68f
MD
147/*
148 * Cluster controller thread. Perform messaging functions. We have one
149 * thread for the reader and one for the writer. The writer handles
150 * shutdown requests (which should break the reader thread).
151 */
152static
153void
154kdmsg_iocom_thread_rd(void *arg)
155{
156 kdmsg_iocom_t *iocom = arg;
157 dmsg_hdr_t hdr;
158 kdmsg_msg_t *msg;
159 kdmsg_state_t *state;
160 size_t hbytes;
161 int error = 0;
162
163 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
164 /*
165 * Retrieve the message from the pipe or socket.
166 */
167 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
168 NULL, 1, UIO_SYSSPACE);
169 if (error)
170 break;
171 if (hdr.magic != DMSG_HDR_MAGIC) {
172 kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
173 error = EINVAL;
174 break;
175 }
176 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
177 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
178 kprintf("kdmsg: bad header size %zd\n", hbytes);
179 error = EINVAL;
180 break;
181 }
182 /* XXX messy: mask cmd to avoid allocating state */
183 msg = kdmsg_msg_alloc(&iocom->router,
184 hdr.cmd & DMSGF_BASECMDMASK,
185 NULL, NULL);
186 msg->any.head = hdr;
187 msg->hdr_size = hbytes;
188 if (hbytes > sizeof(hdr)) {
189 error = fp_read(iocom->msg_fp, &msg->any.head + 1,
190 hbytes - sizeof(hdr),
191 NULL, 1, UIO_SYSSPACE);
192 if (error) {
193 kprintf("kdmsg: short msg received\n");
194 error = EINVAL;
195 break;
196 }
197 }
198 msg->aux_size = hdr.aux_bytes * DMSG_ALIGN;
199 if (msg->aux_size > DMSG_AUX_MAX) {
200 kprintf("kdmsg: illegal msg payload size %zd\n",
201 msg->aux_size);
202 error = EINVAL;
203 break;
204 }
205 if (msg->aux_size) {
206 msg->aux_data = kmalloc(msg->aux_size, iocom->mmsg,
207 M_WAITOK | M_ZERO);
208 error = fp_read(iocom->msg_fp, msg->aux_data,
209 msg->aux_size,
210 NULL, 1, UIO_SYSSPACE);
211 if (error) {
212 kprintf("kdmsg: short msg payload received\n");
213 break;
214 }
215 }
216
217 /*
218 * State machine tracking, state assignment for msg,
219 * returns error and discard status. Errors are fatal
220 * to the connection except for EALREADY which forces
221 * a discard without execution.
222 */
223 error = kdmsg_state_msgrx(msg);
224 if (error) {
225 /*
226 * Raw protocol or connection error
227 */
228 kdmsg_msg_free(msg);
229 if (error == EALREADY)
230 error = 0;
231 } else if (msg->state && msg->state->func) {
232 /*
233 * Message related to state which already has a
234 * handling function installed for it.
235 */
236 error = msg->state->func(msg->state, msg);
237 kdmsg_state_cleanuprx(msg);
238 } else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
239 DMSG_PROTO_LNK) {
240 /*
241 * Message related to the LNK protocol set
242 */
243 error = iocom->lnk_rcvmsg(msg);
244 kdmsg_state_cleanuprx(msg);
245 } else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
246 DMSG_PROTO_DBG) {
247 /*
248 * Message related to the DBG protocol set
249 */
250 error = iocom->dbg_rcvmsg(msg);
251 kdmsg_state_cleanuprx(msg);
252 } else {
253 /*
254 * Other higher-level messages (e.g. vnops)
255 */
256 error = iocom->misc_rcvmsg(msg);
257 kdmsg_state_cleanuprx(msg);
258 }
259 msg = NULL;
260 }
261
262 if (error)
263 kprintf("kdmsg: read failed error %d\n", error);
264
265 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
266 if (msg) {
267 if (msg->state && msg->state->msg == msg)
268 msg->state->msg = NULL;
269 kdmsg_msg_free(msg);
270 }
271
272 if ((state = iocom->freerd_state) != NULL) {
273 iocom->freerd_state = NULL;
274 kdmsg_state_free(state);
275 }
276
277 /*
278 * Shutdown the socket before waiting for the transmit side.
279 *
280 * If we are dying due to e.g. a socket disconnect verses being
281 * killed explicity we have to set KILL in order to kick the tx
282 * side when it might not have any other work to do. KILL might
283 * already be set if we are in an unmount or reconnect.
284 */
285 fp_shutdown(iocom->msg_fp, SHUT_RDWR);
286
287 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
288 wakeup(&iocom->msg_ctl);
289
290 /*
291 * Wait for the transmit side to drain remaining messages
292 * before cleaning up the rx state. The transmit side will
293 * set KILLTX and wait for the rx side to completely finish
294 * (set msgrd_td to NULL) before cleaning up any remaining
295 * tx states.
296 */
297 lockmgr(&iocom->msglk, LK_RELEASE);
298 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
299 wakeup(&iocom->msg_ctl);
300 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
301 wakeup(&iocom->msg_ctl);
302 tsleep(iocom, 0, "clstrkw", hz);
303 }
304
305 iocom->msgrd_td = NULL;
306
307 /*
308 * iocom can be ripped out from under us at this point but
309 * wakeup() is safe.
310 */
311 wakeup(iocom);
312 lwkt_exit();
313}
314
315static
316void
317kdmsg_iocom_thread_wr(void *arg)
318{
319 kdmsg_iocom_t *iocom = arg;
320 kdmsg_msg_t *msg;
321 kdmsg_state_t *state;
322 ssize_t res;
323 int error = 0;
324 int retries = 20;
325
326 /*
327 * Transmit loop
328 */
329 msg = NULL;
330 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
331
332 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
333 /*
334 * Sleep if no messages pending. Interlock with flag while
335 * holding msglk.
336 */
337 if (TAILQ_EMPTY(&iocom->msgq)) {
338 atomic_set_int(&iocom->msg_ctl,
339 KDMSG_CLUSTERCTL_SLEEPING);
340 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
341 atomic_clear_int(&iocom->msg_ctl,
342 KDMSG_CLUSTERCTL_SLEEPING);
343 }
344
345 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
346 /*
347 * Remove msg from the transmit queue and do
348 * persist and half-closed state handling.
349 */
350 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
351 lockmgr(&iocom->msglk, LK_RELEASE);
352
353 error = kdmsg_state_msgtx(msg);
354 if (error == EALREADY) {
355 error = 0;
356 kdmsg_msg_free(msg);
357 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
358 continue;
359 }
360 if (error) {
361 kdmsg_msg_free(msg);
362 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
363 break;
364 }
365
366 /*
367 * Dump the message to the pipe or socket.
368 */
369 error = fp_write(iocom->msg_fp, &msg->any,
370 msg->hdr_size, &res, UIO_SYSSPACE);
371 if (error || res != msg->hdr_size) {
372 if (error == 0)
373 error = EINVAL;
374 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
375 break;
376 }
377 if (msg->aux_size) {
378 error = fp_write(iocom->msg_fp,
379 msg->aux_data, msg->aux_size,
380 &res, UIO_SYSSPACE);
381 if (error || res != msg->aux_size) {
382 if (error == 0)
383 error = EINVAL;
384 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
385 break;
386 }
387 }
388 kdmsg_state_cleanuptx(msg);
389 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
390 }
391 }
392
393 /*
394 * Cleanup messages pending transmission and release msgq lock.
395 */
396 if (error)
397 kprintf("kdmsg: write failed error %d\n", error);
398
399 if (msg) {
400 if (msg->state && msg->state->msg == msg)
401 msg->state->msg = NULL;
402 kdmsg_msg_free(msg);
403 }
404
405 /*
406 * Shutdown the socket. This will cause the rx thread to get an
407 * EOF and ensure that both threads get to a termination state.
408 */
409 fp_shutdown(iocom->msg_fp, SHUT_RDWR);
410
411 /*
412 * Set KILLTX (which the rx side waits for), then wait for the RX
413 * side to completely finish before we clean out any remaining
414 * command states.
415 */
416 lockmgr(&iocom->msglk, LK_RELEASE);
417 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
418 wakeup(&iocom->msg_ctl);
419 while (iocom->msgrd_td) {
420 wakeup(&iocom->msg_ctl);
421 tsleep(iocom, 0, "clstrkw", hz);
422 }
423 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
424
425 /*
426 * Simulate received MSGF_DELETE's for any remaining states.
427 */
428cleanuprd:
429 RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
430 if (state->func &&
431 (state->rxcmd & DMSGF_DELETE) == 0) {
432 lockmgr(&iocom->msglk, LK_RELEASE);
433 msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
434 NULL, NULL);
435 if ((state->rxcmd & DMSGF_CREATE) == 0)
436 msg->any.head.cmd |= DMSGF_CREATE;
437 msg->any.head.cmd |= DMSGF_DELETE;
438 msg->state = state;
439 state->rxcmd = msg->any.head.cmd &
440 ~DMSGF_DELETE;
441 msg->state->func(state, msg);
442 kdmsg_state_cleanuprx(msg);
443 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
444 goto cleanuprd;
445 }
446 if (state->func == NULL) {
447 state->flags &= ~KDMSG_STATE_INSERTED;
448 RB_REMOVE(kdmsg_state_tree,
449 &iocom->staterd_tree, state);
450 kdmsg_state_free(state);
451 goto cleanuprd;
452 }
453 }
454
455 /*
456 * NOTE: We have to drain the msgq to handle situations
457 * where received states have built up output
458 * messages, to avoid creating messages with
459 * duplicate CREATE/DELETE flags.
460 */
461cleanupwr:
462 kdmsg_drain_msgq(iocom);
463 RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
464 if (state->func &&
465 (state->rxcmd & DMSGF_DELETE) == 0) {
466 lockmgr(&iocom->msglk, LK_RELEASE);
467 msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
468 NULL, NULL);
469 if ((state->rxcmd & DMSGF_CREATE) == 0)
470 msg->any.head.cmd |= DMSGF_CREATE;
471 msg->any.head.cmd |= DMSGF_DELETE |
472 DMSGF_REPLY;
473 msg->state = state;
474 state->rxcmd = msg->any.head.cmd &
475 ~DMSGF_DELETE;
476 msg->state->func(state, msg);
477 kdmsg_state_cleanuprx(msg);
478 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
479 goto cleanupwr;
480 }
481 if (state->func == NULL) {
482 state->flags &= ~KDMSG_STATE_INSERTED;
483 RB_REMOVE(kdmsg_state_tree,
484 &iocom->statewr_tree, state);
485 kdmsg_state_free(state);
486 goto cleanupwr;
487 }
488 }
489
490 kdmsg_drain_msgq(iocom);
491 if (--retries == 0)
492 panic("kdmsg: comm thread shutdown couldn't drain");
493 if (RB_ROOT(&iocom->statewr_tree))
494 goto cleanupwr;
495
496 if ((state = iocom->freewr_state) != NULL) {
497 iocom->freewr_state = NULL;
498 kdmsg_state_free(state);
499 }
500
501 lockmgr(&iocom->msglk, LK_RELEASE);
502
503 /*
504 * The state trees had better be empty now
505 */
506 KKASSERT(RB_EMPTY(&iocom->staterd_tree));
507 KKASSERT(RB_EMPTY(&iocom->statewr_tree));
508 KKASSERT(iocom->conn_state == NULL);
509
ddfbb283
MD
510 if (iocom->exit_func) {
511 /*
512 * iocom is invalid after we call the exit function.
513 */
514 iocom->msgwr_td = NULL;
515 iocom->exit_func(iocom);
516 } else {
517 /*
518 * iocom can be ripped out from under us once msgwr_td is
519 * set to NULL. The wakeup is safe.
520 */
521 iocom->msgwr_td = NULL;
522 wakeup(iocom);
523 }
3a5aa68f
MD
524 lwkt_exit();
525}
526
527/*
528 * This cleans out the pending transmit message queue, adjusting any
529 * persistent states properly in the process.
530 *
531 * Caller must hold pmp->iocom.msglk
532 */
533void
534kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
535{
536 kdmsg_msg_t *msg;
537
538 /*
539 * Clean out our pending transmit queue, executing the
540 * appropriate state adjustments. If this tries to open
541 * any new outgoing transactions we have to loop up and
542 * clean them out.
543 */
544 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
545 TAILQ_REMOVE(&iocom->msgq, msg, qentry);
546 lockmgr(&iocom->msglk, LK_RELEASE);
547 if (msg->state && msg->state->msg == msg)
548 msg->state->msg = NULL;
549 if (kdmsg_state_msgtx(msg))
550 kdmsg_msg_free(msg);
551 else
552 kdmsg_state_cleanuptx(msg);
553 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
554 }
555}
9b8b748f 556
26bf1a36
MD
557/*
558 * Process state tracking for a message after reception, prior to
559 * execution.
560 *
561 * Called with msglk held and the msg dequeued.
562 *
563 * All messages are called with dummy state and return actual state.
564 * (One-off messages often just return the same dummy state).
565 *
566 * May request that caller discard the message by setting *discardp to 1.
567 * The returned state is not used in this case and is allowed to be NULL.
568 *
569 * --
570 *
571 * These routines handle persistent and command/reply message state via the
572 * CREATE and DELETE flags. The first message in a command or reply sequence
573 * sets CREATE, the last message in a command or reply sequence sets DELETE.
574 *
575 * There can be any number of intermediate messages belonging to the same
576 * sequence sent inbetween the CREATE message and the DELETE message,
577 * which set neither flag. This represents a streaming command or reply.
578 *
579 * Any command message received with CREATE set expects a reply sequence to
580 * be returned. Reply sequences work the same as command sequences except the
581 * REPLY bit is also sent. Both the command side and reply side can
582 * degenerate into a single message with both CREATE and DELETE set. Note
583 * that one side can be streaming and the other side not, or neither, or both.
584 *
585 * The msgid is unique for the initiator. That is, two sides sending a new
586 * message can use the same msgid without colliding.
587 *
588 * --
589 *
590 * ABORT sequences work by setting the ABORT flag along with normal message
591 * state. However, ABORTs can also be sent on half-closed messages, that is
592 * even if the command or reply side has already sent a DELETE, as long as
593 * the message has not been fully closed it can still send an ABORT+DELETE
594 * to terminate the half-closed message state.
595 *
596 * Since ABORT+DELETEs can race we silently discard ABORT's for message
597 * state which has already been fully closed. REPLY+ABORT+DELETEs can
598 * also race, and in this situation the other side might have already
599 * initiated a new unrelated command with the same message id. Since
600 * the abort has not set the CREATE flag the situation can be detected
601 * and the message will also be discarded.
602 *
603 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
604 * The ABORT request is essentially integrated into the command instead
605 * of being sent later on. In this situation the command implementation
606 * detects that CREATE and ABORT are both set (vs ABORT alone) and can
607 * special-case non-blocking operation for the command.
608 *
609 * NOTE! Messages with ABORT set without CREATE or DELETE are considered
610 * to be mid-stream aborts for command/reply sequences. ABORTs on
611 * one-way messages are not supported.
612 *
613 * NOTE! If a command sequence does not support aborts the ABORT flag is
614 * simply ignored.
615 *
616 * --
617 *
618 * One-off messages (no reply expected) are sent with neither CREATE or DELETE
619 * set. One-off messages cannot be aborted and typically aren't processed
620 * by these routines. The REPLY bit can be used to distinguish whether a
621 * one-off message is a command or reply. For example, one-off replies
622 * will typically just contain status updates.
623 */
624int
3a5aa68f 625kdmsg_state_msgrx(kdmsg_msg_t *msg)
26bf1a36 626{
3a5aa68f
MD
627 kdmsg_iocom_t *iocom;
628 kdmsg_state_t *state;
26bf1a36
MD
629 int error;
630
3a5aa68f 631 iocom = msg->router->iocom;
10c86c4e
MD
632
633 /*
634 * XXX resolve msg->any.head.source and msg->any.head.target
635 * into LNK_SPAN references.
636 *
637 * XXX replace msg->router
638 */
639
26bf1a36
MD
640 /*
641 * Make sure a state structure is ready to go in case we need a new
642 * one. This is the only routine which uses freerd_state so no
643 * races are possible.
644 */
3a5aa68f
MD
645 if ((state = iocom->freerd_state) == NULL) {
646 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
647 state->flags = KDMSG_STATE_DYNAMIC;
648 iocom->freerd_state = state;
26bf1a36
MD
649 }
650
651 /*
652 * Lock RB tree and locate existing persistent state, if any.
653 *
654 * If received msg is a command state is on staterd_tree.
655 * If received msg is a reply state is on statewr_tree.
656 */
3a5aa68f 657 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
26bf1a36
MD
658
659 state->msgid = msg->any.head.msgid;
3a5aa68f 660 state->router = &iocom->router;
10c86c4e 661 kprintf("received msg %08x msgid %jx source=%jx target=%jx\n",
8c280d5d 662 msg->any.head.cmd,
10c86c4e
MD
663 (intmax_t)msg->any.head.msgid,
664 (intmax_t)msg->any.head.source,
665 (intmax_t)msg->any.head.target);
5bc5bca2 666 if (msg->any.head.cmd & DMSGF_REPLY)
3a5aa68f 667 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
26bf1a36 668 else
3a5aa68f 669 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
26bf1a36
MD
670 msg->state = state;
671
672 /*
673 * Short-cut one-off or mid-stream messages (state may be NULL).
674 */
5bc5bca2
MD
675 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
676 DMSGF_ABORT)) == 0) {
3a5aa68f 677 lockmgr(&iocom->msglk, LK_RELEASE);
26bf1a36
MD
678 return(0);
679 }
680
681 /*
682 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
683 * inside the case statements.
684 */
5bc5bca2
MD
685 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY)) {
686 case DMSGF_CREATE:
687 case DMSGF_CREATE | DMSGF_DELETE:
26bf1a36
MD
688 /*
689 * New persistant command received.
690 */
691 if (state) {
3a5aa68f 692 kprintf("kdmsg_state_msgrx: duplicate transaction\n");
26bf1a36
MD
693 error = EINVAL;
694 break;
695 }
3a5aa68f
MD
696 state = iocom->freerd_state;
697 iocom->freerd_state = NULL;
26bf1a36 698 msg->state = state;
10c86c4e 699 state->router = msg->router;
26bf1a36 700 state->msg = msg;
5bc5bca2
MD
701 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
702 state->txcmd = DMSGF_REPLY;
3a5aa68f
MD
703 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
704 state->flags |= KDMSG_STATE_INSERTED;
26bf1a36
MD
705 error = 0;
706 break;
5bc5bca2 707 case DMSGF_DELETE:
26bf1a36
MD
708 /*
709 * Persistent state is expected but might not exist if an
710 * ABORT+DELETE races the close.
711 */
712 if (state == NULL) {
5bc5bca2 713 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
714 error = EALREADY;
715 } else {
3a5aa68f 716 kprintf("kdmsg_state_msgrx: no state "
26bf1a36
MD
717 "for DELETE\n");
718 error = EINVAL;
719 }
720 break;
721 }
722
723 /*
724 * Handle another ABORT+DELETE case if the msgid has already
725 * been reused.
726 */
5bc5bca2
MD
727 if ((state->rxcmd & DMSGF_CREATE) == 0) {
728 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
729 error = EALREADY;
730 } else {
3a5aa68f 731 kprintf("kdmsg_state_msgrx: state reused "
26bf1a36
MD
732 "for DELETE\n");
733 error = EINVAL;
734 }
735 break;
736 }
737 error = 0;
738 break;
739 default:
740 /*
741 * Check for mid-stream ABORT command received, otherwise
742 * allow.
743 */
5bc5bca2 744 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36 745 if (state == NULL ||
5bc5bca2 746 (state->rxcmd & DMSGF_CREATE) == 0) {
26bf1a36
MD
747 error = EALREADY;
748 break;
749 }
750 }
751 error = 0;
752 break;
5bc5bca2
MD
753 case DMSGF_REPLY | DMSGF_CREATE:
754 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
26bf1a36
MD
755 /*
756 * When receiving a reply with CREATE set the original
757 * persistent state message should already exist.
758 */
759 if (state == NULL) {
3a5aa68f 760 kprintf("kdmsg_state_msgrx: no state match for "
62e59746
MD
761 "REPLY cmd=%08x msgid=%016jx\n",
762 msg->any.head.cmd,
763 (intmax_t)msg->any.head.msgid);
26bf1a36
MD
764 error = EINVAL;
765 break;
766 }
5bc5bca2 767 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
26bf1a36
MD
768 error = 0;
769 break;
5bc5bca2 770 case DMSGF_REPLY | DMSGF_DELETE:
26bf1a36
MD
771 /*
772 * Received REPLY+ABORT+DELETE in case where msgid has
773 * already been fully closed, ignore the message.
774 */
775 if (state == NULL) {
5bc5bca2 776 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
777 error = EALREADY;
778 } else {
3a5aa68f 779 kprintf("kdmsg_state_msgrx: no state match "
26bf1a36
MD
780 "for REPLY|DELETE\n");
781 error = EINVAL;
782 }
783 break;
784 }
785
786 /*
787 * Received REPLY+ABORT+DELETE in case where msgid has
788 * already been reused for an unrelated message,
789 * ignore the message.
790 */
5bc5bca2
MD
791 if ((state->rxcmd & DMSGF_CREATE) == 0) {
792 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
793 error = EALREADY;
794 } else {
3a5aa68f 795 kprintf("kdmsg_state_msgrx: state reused "
26bf1a36
MD
796 "for REPLY|DELETE\n");
797 error = EINVAL;
798 }
799 break;
800 }
801 error = 0;
802 break;
5bc5bca2 803 case DMSGF_REPLY:
26bf1a36
MD
804 /*
805 * Check for mid-stream ABORT reply received to sent command.
806 */
5bc5bca2 807 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36 808 if (state == NULL ||
5bc5bca2 809 (state->rxcmd & DMSGF_CREATE) == 0) {
26bf1a36
MD
810 error = EALREADY;
811 break;
812 }
813 }
814 error = 0;
815 break;
816 }
3a5aa68f 817 lockmgr(&iocom->msglk, LK_RELEASE);
26bf1a36
MD
818 return (error);
819}
820
821void
3a5aa68f 822kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
26bf1a36 823{
3a5aa68f
MD
824 kdmsg_iocom_t *iocom;
825 kdmsg_state_t *state;
826
827 iocom = msg->router->iocom;
26bf1a36
MD
828
829 if ((state = msg->state) == NULL) {
3a5aa68f 830 kdmsg_msg_free(msg);
5bc5bca2 831 } else if (msg->any.head.cmd & DMSGF_DELETE) {
3a5aa68f 832 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
5bc5bca2
MD
833 state->rxcmd |= DMSGF_DELETE;
834 if (state->txcmd & DMSGF_DELETE) {
26bf1a36
MD
835 if (state->msg == msg)
836 state->msg = NULL;
3a5aa68f 837 KKASSERT(state->flags & KDMSG_STATE_INSERTED);
5bc5bca2 838 if (state->rxcmd & DMSGF_REPLY) {
62e59746 839 KKASSERT(msg->any.head.cmd &
5bc5bca2 840 DMSGF_REPLY);
3a5aa68f
MD
841 RB_REMOVE(kdmsg_state_tree,
842 &iocom->statewr_tree, state);
26bf1a36 843 } else {
62e59746 844 KKASSERT((msg->any.head.cmd &
5bc5bca2 845 DMSGF_REPLY) == 0);
3a5aa68f
MD
846 RB_REMOVE(kdmsg_state_tree,
847 &iocom->staterd_tree, state);
26bf1a36 848 }
3a5aa68f
MD
849 state->flags &= ~KDMSG_STATE_INSERTED;
850 lockmgr(&iocom->msglk, LK_RELEASE);
851 kdmsg_state_free(state);
26bf1a36 852 } else {
3a5aa68f 853 lockmgr(&iocom->msglk, LK_RELEASE);
26bf1a36 854 }
3a5aa68f 855 kdmsg_msg_free(msg);
26bf1a36 856 } else if (state->msg != msg) {
3a5aa68f 857 kdmsg_msg_free(msg);
26bf1a36
MD
858 }
859}
860
861/*
862 * Process state tracking for a message prior to transmission.
863 *
864 * Called with msglk held and the msg dequeued.
865 *
866 * One-off messages are usually with dummy state and msg->state may be NULL
867 * in this situation.
868 *
869 * New transactions (when CREATE is set) will insert the state.
870 *
871 * May request that caller discard the message by setting *discardp to 1.
872 * A NULL state may be returned in this case.
873 */
874int
3a5aa68f 875kdmsg_state_msgtx(kdmsg_msg_t *msg)
26bf1a36 876{
3a5aa68f
MD
877 kdmsg_iocom_t *iocom;
878 kdmsg_state_t *state;
26bf1a36
MD
879 int error;
880
3a5aa68f
MD
881 iocom = msg->router->iocom;
882
26bf1a36
MD
883 /*
884 * Make sure a state structure is ready to go in case we need a new
885 * one. This is the only routine which uses freewr_state so no
886 * races are possible.
887 */
3a5aa68f
MD
888 if ((state = iocom->freewr_state) == NULL) {
889 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
890 state->flags = KDMSG_STATE_DYNAMIC;
8a9471c3 891 state->router = &iocom->router;
3a5aa68f 892 iocom->freewr_state = state;
26bf1a36
MD
893 }
894
895 /*
896 * Lock RB tree. If persistent state is present it will have already
897 * been assigned to msg.
898 */
3a5aa68f 899 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
26bf1a36
MD
900 state = msg->state;
901
902 /*
903 * Short-cut one-off or mid-stream messages (state may be NULL).
904 */
5bc5bca2
MD
905 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
906 DMSGF_ABORT)) == 0) {
3a5aa68f 907 lockmgr(&iocom->msglk, LK_RELEASE);
26bf1a36
MD
908 return(0);
909 }
910
911
912 /*
913 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
914 * inside the case statements.
915 */
5bc5bca2
MD
916 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
917 DMSGF_REPLY)) {
918 case DMSGF_CREATE:
919 case DMSGF_CREATE | DMSGF_DELETE:
26bf1a36
MD
920 /*
921 * Insert the new persistent message state and mark
922 * half-closed if DELETE is set. Since this is a new
923 * message it isn't possible to transition into the fully
924 * closed state here.
9b8b748f
MD
925 *
926 * XXX state must be assigned and inserted by
3a5aa68f 927 * kdmsg_msg_write(). txcmd is assigned by us
9b8b748f 928 * on-transmit.
26bf1a36 929 */
9b8b748f 930 KKASSERT(state != NULL);
5bc5bca2
MD
931 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
932 state->rxcmd = DMSGF_REPLY;
26bf1a36
MD
933 error = 0;
934 break;
5bc5bca2 935 case DMSGF_DELETE:
26bf1a36
MD
936 /*
937 * Sent ABORT+DELETE in case where msgid has already
938 * been fully closed, ignore the message.
939 */
940 if (state == NULL) {
5bc5bca2 941 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
942 error = EALREADY;
943 } else {
3a5aa68f 944 kprintf("kdmsg_state_msgtx: no state match "
62e59746
MD
945 "for DELETE cmd=%08x msgid=%016jx\n",
946 msg->any.head.cmd,
947 (intmax_t)msg->any.head.msgid);
26bf1a36
MD
948 error = EINVAL;
949 }
950 break;
951 }
952
953 /*
954 * Sent ABORT+DELETE in case where msgid has
955 * already been reused for an unrelated message,
956 * ignore the message.
957 */
5bc5bca2
MD
958 if ((state->txcmd & DMSGF_CREATE) == 0) {
959 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
960 error = EALREADY;
961 } else {
3a5aa68f 962 kprintf("kdmsg_state_msgtx: state reused "
26bf1a36
MD
963 "for DELETE\n");
964 error = EINVAL;
965 }
966 break;
967 }
968 error = 0;
969 break;
970 default:
971 /*
972 * Check for mid-stream ABORT command sent
973 */
5bc5bca2 974 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36 975 if (state == NULL ||
5bc5bca2 976 (state->txcmd & DMSGF_CREATE) == 0) {
26bf1a36
MD
977 error = EALREADY;
978 break;
979 }
980 }
981 error = 0;
982 break;
5bc5bca2
MD
983 case DMSGF_REPLY | DMSGF_CREATE:
984 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
26bf1a36
MD
985 /*
986 * When transmitting a reply with CREATE set the original
987 * persistent state message should already exist.
988 */
989 if (state == NULL) {
3a5aa68f 990 kprintf("kdmsg_state_msgtx: no state match "
26bf1a36
MD
991 "for REPLY | CREATE\n");
992 error = EINVAL;
993 break;
994 }
5bc5bca2 995 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
26bf1a36
MD
996 error = 0;
997 break;
5bc5bca2 998 case DMSGF_REPLY | DMSGF_DELETE:
26bf1a36
MD
999 /*
1000 * When transmitting a reply with DELETE set the original
1001 * persistent state message should already exist.
1002 *
1003 * This is very similar to the REPLY|CREATE|* case except
1004 * txcmd is already stored, so we just add the DELETE flag.
1005 *
1006 * Sent REPLY+ABORT+DELETE in case where msgid has
1007 * already been fully closed, ignore the message.
1008 */
1009 if (state == NULL) {
5bc5bca2 1010 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
1011 error = EALREADY;
1012 } else {
3a5aa68f 1013 kprintf("kdmsg_state_msgtx: no state match "
26bf1a36
MD
1014 "for REPLY | DELETE\n");
1015 error = EINVAL;
1016 }
1017 break;
1018 }
1019
1020 /*
1021 * Sent REPLY+ABORT+DELETE in case where msgid has already
1022 * been reused for an unrelated message, ignore the message.
1023 */
5bc5bca2
MD
1024 if ((state->txcmd & DMSGF_CREATE) == 0) {
1025 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36
MD
1026 error = EALREADY;
1027 } else {
3a5aa68f 1028 kprintf("kdmsg_state_msgtx: state reused "
26bf1a36
MD
1029 "for REPLY | DELETE\n");
1030 error = EINVAL;
1031 }
1032 break;
1033 }
1034 error = 0;
1035 break;
5bc5bca2 1036 case DMSGF_REPLY:
26bf1a36
MD
1037 /*
1038 * Check for mid-stream ABORT reply sent.
1039 *
1040 * One-off REPLY messages are allowed for e.g. status updates.
1041 */
5bc5bca2 1042 if (msg->any.head.cmd & DMSGF_ABORT) {
26bf1a36 1043 if (state == NULL ||
5bc5bca2 1044 (state->txcmd & DMSGF_CREATE) == 0) {
26bf1a36
MD
1045 error = EALREADY;
1046 break;
1047 }
1048 }
1049 error = 0;
1050 break;
1051 }
3a5aa68f 1052 lockmgr(&iocom->msglk, LK_RELEASE);
26bf1a36
MD
1053 return (error);
1054}
1055
1056void
3a5aa68f 1057kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
26bf1a36 1058{
3a5aa68f
MD
1059 kdmsg_iocom_t *iocom;
1060 kdmsg_state_t *state;
1061
1062 iocom = msg->router->iocom;
26bf1a36
MD
1063
1064 if ((state = msg->state) == NULL) {
3a5aa68f 1065 kdmsg_msg_free(msg);
5bc5bca2 1066 } else if (msg->any.head.cmd & DMSGF_DELETE) {
3a5aa68f 1067 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
5bc5bca2
MD
1068 state->txcmd |= DMSGF_DELETE;
1069 if (state->rxcmd & DMSGF_DELETE) {
26bf1a36
MD
1070 if (state->msg == msg)
1071 state->msg = NULL;
3a5aa68f 1072 KKASSERT(state->flags & KDMSG_STATE_INSERTED);
5bc5bca2 1073 if (state->txcmd & DMSGF_REPLY) {
62e59746 1074 KKASSERT(msg->any.head.cmd &
5bc5bca2 1075 DMSGF_REPLY);
3a5aa68f
MD
1076 RB_REMOVE(kdmsg_state_tree,
1077 &iocom->staterd_tree, state);
26bf1a36 1078 } else {
62e59746 1079 KKASSERT((msg->any.head.cmd &
5bc5bca2 1080 DMSGF_REPLY) == 0);
3a5aa68f
MD
1081 RB_REMOVE(kdmsg_state_tree,
1082 &iocom->statewr_tree, state);
26bf1a36 1083 }
3a5aa68f
MD
1084 state->flags &= ~KDMSG_STATE_INSERTED;
1085 lockmgr(&iocom->msglk, LK_RELEASE);
1086 kdmsg_state_free(state);
26bf1a36 1087 } else {
3a5aa68f 1088 lockmgr(&iocom->msglk, LK_RELEASE);
26bf1a36 1089 }
3a5aa68f 1090 kdmsg_msg_free(msg);
26bf1a36 1091 } else if (state->msg != msg) {
3a5aa68f 1092 kdmsg_msg_free(msg);
26bf1a36
MD
1093 }
1094}
1095
1096void
3a5aa68f 1097kdmsg_state_free(kdmsg_state_t *state)
26bf1a36 1098{
3a5aa68f
MD
1099 kdmsg_iocom_t *iocom;
1100 kdmsg_msg_t *msg;
1101
1102 iocom = state->router->iocom;
26bf1a36 1103
3a5aa68f 1104 KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
26bf1a36
MD
1105 msg = state->msg;
1106 state->msg = NULL;
3a5aa68f 1107 kfree(state, iocom->mmsg);
26bf1a36 1108 if (msg)
3a5aa68f 1109 kdmsg_msg_free(msg);
26bf1a36
MD
1110}
1111
3a5aa68f
MD
1112kdmsg_msg_t *
1113kdmsg_msg_alloc(kdmsg_router_t *router, uint32_t cmd,
1114 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
9b8b748f 1115{
3a5aa68f
MD
1116 kdmsg_iocom_t *iocom;
1117 kdmsg_msg_t *msg;
1118 kdmsg_state_t *state;
9b8b748f
MD
1119 size_t hbytes;
1120
3a5aa68f 1121 iocom = router->iocom;
5bc5bca2 1122 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
3a5aa68f
MD
1123 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1124 iocom->mmsg, M_WAITOK | M_ZERO);
9b8b748f 1125 msg->hdr_size = hbytes;
10c86c4e
MD
1126 msg->router = router;
1127 KKASSERT(router != NULL);
5bc5bca2 1128 msg->any.head.magic = DMSG_HDR_MAGIC;
10c86c4e
MD
1129 msg->any.head.source = 0;
1130 msg->any.head.target = router->target;
9b8b748f
MD
1131 msg->any.head.cmd = cmd;
1132
5bc5bca2 1133 if (cmd & DMSGF_CREATE) {
1a34728c
MD
1134 /*
1135 * New transaction, requires tracking state and a unique
1136 * msgid to be allocated.
1137 */
1138 KKASSERT(msg->state == NULL);
3a5aa68f
MD
1139 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1140 state->flags = KDMSG_STATE_DYNAMIC;
1a34728c
MD
1141 state->func = func;
1142 state->any.any = data;
1143 state->msg = msg;
1144 state->msgid = (uint64_t)(uintptr_t)state;
1145 state->router = msg->router;
1146 msg->state = state;
1147 msg->any.head.source = 0;
1148 msg->any.head.target = state->router->target;
1149 msg->any.head.msgid = state->msgid;
1150
3a5aa68f
MD
1151 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1152 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
1a34728c 1153 panic("duplicate msgid allocated");
3a5aa68f 1154 state->flags |= KDMSG_STATE_INSERTED;
1a34728c 1155 msg->any.head.msgid = state->msgid;
3a5aa68f 1156 lockmgr(&iocom->msglk, LK_RELEASE);
1a34728c
MD
1157 }
1158
9b8b748f
MD
1159 return (msg);
1160}
1161
26bf1a36 1162void
3a5aa68f 1163kdmsg_msg_free(kdmsg_msg_t *msg)
26bf1a36 1164{
3a5aa68f
MD
1165 kdmsg_iocom_t *iocom;
1166
1167 iocom = msg->router->iocom;
10c86c4e 1168
26bf1a36 1169 if (msg->aux_data && msg->aux_size) {
3a5aa68f 1170 kfree(msg->aux_data, iocom->mmsg);
26bf1a36
MD
1171 msg->aux_data = NULL;
1172 msg->aux_size = 0;
10c86c4e 1173 msg->router = NULL;
26bf1a36 1174 }
3a5aa68f 1175 kfree(msg, iocom->mmsg);
26bf1a36
MD
1176}
1177
1178/*
1179 * Indexed messages are stored in a red-black tree indexed by their
1180 * msgid. Only persistent messages are indexed.
1181 */
1182int
3a5aa68f 1183kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
26bf1a36 1184{
10c86c4e 1185 if (state1->router < state2->router)
9b8b748f 1186 return(-1);
10c86c4e 1187 if (state1->router > state2->router)
9b8b748f 1188 return(1);
26bf1a36
MD
1189 if (state1->msgid < state2->msgid)
1190 return(-1);
1191 if (state1->msgid > state2->msgid)
1192 return(1);
1193 return(0);
1194}
1195
1196/*
8c280d5d
MD
1197 * Write a message. All requisit command flags have been set.
1198 *
1199 * If msg->state is non-NULL the message is written to the existing
10c86c4e 1200 * transaction. msgid will be set accordingly.
8c280d5d
MD
1201 *
1202 * If msg->state is NULL and CREATE is set new state is allocated and
10c86c4e 1203 * (func, data) is installed. A msgid is assigned.
26bf1a36 1204 *
8c280d5d 1205 * If msg->state is NULL and CREATE is not set the message is assumed
10c86c4e
MD
1206 * to be a one-way message. The originator must assign the msgid
1207 * (or leave it 0, which is typical.
8c280d5d
MD
1208 *
1209 * This function merely queues the message to the management thread, it
1210 * does not write to the message socket/pipe.
26bf1a36 1211 */
8c280d5d 1212void
3a5aa68f 1213kdmsg_msg_write(kdmsg_msg_t *msg)
26bf1a36 1214{
3a5aa68f
MD
1215 kdmsg_iocom_t *iocom;
1216 kdmsg_state_t *state;
1217
1218 iocom = msg->router->iocom;
9b8b748f 1219
8c280d5d
MD
1220 if (msg->state) {
1221 /*
1222 * Continuance or termination of existing transaction.
1223 * The transaction could have been initiated by either end.
1224 *
1225 * (Function callback and aux data for the receive side can
1226 * be replaced or left alone).
1227 */
10c86c4e
MD
1228 state = msg->state;
1229 msg->any.head.msgid = state->msgid;
1230 msg->any.head.source = 0;
1231 msg->any.head.target = state->router->target;
3a5aa68f 1232 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
9b8b748f
MD
1233 } else {
1234 /*
8c280d5d
MD
1235 * One-off message (always uses msgid 0 to distinguish
1236 * between a possibly lost in-transaction message due to
1237 * competing aborts and a real one-off message?)
9b8b748f
MD
1238 */
1239 msg->any.head.msgid = 0;
10c86c4e
MD
1240 msg->any.head.source = 0;
1241 msg->any.head.target = msg->router->target;
3a5aa68f 1242 lockmgr(&iocom->msglk, LK_EXCLUSIVE);
9b8b748f
MD
1243 }
1244
1245 /*
8c280d5d 1246 * Finish up the msg fields
9b8b748f 1247 */
3a5aa68f
MD
1248 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
1249 ++iocom->msg_seq;
8c280d5d
MD
1250
1251 msg->any.head.hdr_crc = 0;
3a5aa68f 1252 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
9b8b748f 1253
3a5aa68f 1254 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
185ace93
MD
1255
1256 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
1257 atomic_clear_int(&iocom->msg_ctl,
1258 KDMSG_CLUSTERCTL_SLEEPING);
1259 wakeup(&iocom->msg_ctl);
1260 }
1261
3a5aa68f 1262 lockmgr(&iocom->msglk, LK_RELEASE);
8c280d5d
MD
1263}
1264
1265/*
1266 * Reply to a message and terminate our side of the transaction.
1267 *
1268 * If msg->state is non-NULL we are replying to a one-way message.
1269 */
1270void
3a5aa68f 1271kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
8c280d5d 1272{
3a5aa68f
MD
1273 kdmsg_state_t *state = msg->state;
1274 kdmsg_msg_t *nmsg;
8c280d5d
MD
1275 uint32_t cmd;
1276
1277 /*
1278 * Reply with a simple error code and terminate the transaction.
1279 */
5bc5bca2 1280 cmd = DMSG_LNK_ERROR;
8c280d5d
MD
1281
1282 /*
1283 * Check if our direction has even been initiated yet, set CREATE.
1284 *
1285 * Check what direction this is (command or reply direction). Note
1286 * that txcmd might not have been initiated yet.
1287 *
1288 * If our direction has already been closed we just return without
1289 * doing anything.
1290 */
1291 if (state) {
5bc5bca2 1292 if (state->txcmd & DMSGF_DELETE)
8c280d5d 1293 return;
5bc5bca2
MD
1294 if ((state->txcmd & DMSGF_CREATE) == 0)
1295 cmd |= DMSGF_CREATE;
1296 if (state->txcmd & DMSGF_REPLY)
1297 cmd |= DMSGF_REPLY;
1298 cmd |= DMSGF_DELETE;
8c280d5d 1299 } else {
5bc5bca2
MD
1300 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1301 cmd |= DMSGF_REPLY;
8c280d5d 1302 }
70c3c3b7 1303 kprintf("MSG_REPLY state=%p msg %08x\n", state, cmd);
8c280d5d 1304
1a34728c 1305 /* XXX messy mask cmd to avoid allocating state */
3a5aa68f
MD
1306 nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1307 NULL, NULL);
1a34728c 1308 nmsg->any.head.cmd = cmd;
10c86c4e
MD
1309 nmsg->any.head.error = error;
1310 nmsg->state = state;
3a5aa68f 1311 kdmsg_msg_write(nmsg);
8c280d5d
MD
1312}
1313
1314/*
1315 * Reply to a message and continue our side of the transaction.
1316 *
1317 * If msg->state is non-NULL we are replying to a one-way message and this
3a5aa68f 1318 * function degenerates into the same as kdmsg_msg_reply().
8c280d5d
MD
1319 */
1320void
3a5aa68f 1321kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
8c280d5d 1322{
3a5aa68f
MD
1323 kdmsg_state_t *state = msg->state;
1324 kdmsg_msg_t *nmsg;
8c280d5d
MD
1325 uint32_t cmd;
1326
1327 /*
1328 * Return a simple result code, do NOT terminate the transaction.
1329 */
5bc5bca2 1330 cmd = DMSG_LNK_ERROR;
8c280d5d
MD
1331
1332 /*
1333 * Check if our direction has even been initiated yet, set CREATE.
1334 *
1335 * Check what direction this is (command or reply direction). Note
1336 * that txcmd might not have been initiated yet.
1337 *
1338 * If our direction has already been closed we just return without
1339 * doing anything.
1340 */
1341 if (state) {
5bc5bca2 1342 if (state->txcmd & DMSGF_DELETE)
8c280d5d 1343 return;
5bc5bca2
MD
1344 if ((state->txcmd & DMSGF_CREATE) == 0)
1345 cmd |= DMSGF_CREATE;
1346 if (state->txcmd & DMSGF_REPLY)
1347 cmd |= DMSGF_REPLY;
8c280d5d
MD
1348 /* continuing transaction, do not set MSGF_DELETE */
1349 } else {
5bc5bca2
MD
1350 if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1351 cmd |= DMSGF_REPLY;
8c280d5d 1352 }
9b8b748f 1353
1a34728c 1354 /* XXX messy mask cmd to avoid allocating state */
3a5aa68f
MD
1355 nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1356 NULL, NULL);
1a34728c 1357 nmsg->any.head.cmd = cmd;
10c86c4e
MD
1358 nmsg->any.head.error = error;
1359 nmsg->state = state;
3a5aa68f 1360 kdmsg_msg_write(nmsg);
26bf1a36 1361}