2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * NOTE! This file may be compiled for userland libraries as well as for
37 * $DragonFly: src/sys/kern/lwkt_msgport.c,v 1.38 2007/02/25 23:17:12 corecode Exp $
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
46 #include <sys/rtprio.h>
47 #include <sys/queue.h>
48 #include <sys/sysctl.h>
49 #include <sys/kthread.h>
50 #include <sys/signalvar.h>
51 #include <sys/signal2.h>
52 #include <machine/cpu.h>
56 #include <vm/vm_param.h>
57 #include <vm/vm_kern.h>
58 #include <vm/vm_object.h>
59 #include <vm/vm_page.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_pager.h>
62 #include <vm/vm_extern.h>
63 #include <vm/vm_zone.h>
65 #include <sys/thread2.h>
66 #include <sys/msgport2.h>
68 #include <machine/stdarg.h>
69 #include <machine/cpufunc.h>
71 #include <machine/smp.h>
74 #include <sys/malloc.h>
75 MALLOC_DEFINE(M_LWKTMSG, "lwkt message", "lwkt message");
79 #include <sys/stdint.h>
80 #include <libcaps/thread.h>
81 #include <sys/thread.h>
82 #include <sys/msgport.h>
83 #include <sys/errno.h>
84 #include <libcaps/globaldata.h>
85 #include <machine/cpufunc.h>
86 #include <sys/thread2.h>
87 #include <sys/msgport2.h>
93 /************************************************************************
95 ************************************************************************/
98 static void lwkt_replyport_remote(lwkt_msg_t msg);
99 static void lwkt_putport_remote(lwkt_msg_t msg);
100 static void lwkt_abortmsg_remote(lwkt_msg_t msg);
106 * Send a message asynchronously. This function requests asynchronous
107 * completion and calls lwkt_beginmsg(). If the target port decides to
108 * run the message synchronously this function will automatically queue
109 * the message to the current thread's message queue to present a
110 * consistent interface to the caller.
112 * The message's ms_cmd must be initialized and its ms_flags must
113 * be zero'd out. lwkt_sendmsg() will initialize the ms_abort_port
114 * (abort chasing port). If abort is supported, ms_abort must also be
117 * NOTE: you cannot safely request an abort until lwkt_sendmsg() returns
120 * NOTE: MSGF_DONE is left set. The target port must clear it if the
121 * message is to be handled asynchronously, while the synchronous case
122 * can just ignore it.
125 lwkt_sendmsg(lwkt_port_t port, lwkt_msg_t msg)
129 msg->ms_flags |= MSGF_ASYNC;
130 msg->ms_flags &= ~(MSGF_REPLY1 | MSGF_REPLY2 | MSGF_QUEUED | \
131 MSGF_ABORTED | MSGF_RETRIEVED);
132 KKASSERT(msg->ms_reply_port != NULL);
133 msg->ms_abort_port = msg->ms_reply_port;
134 if ((error = lwkt_beginmsg(port, msg)) != EASYNC) {
135 lwkt_replymsg(msg, error);
142 * Send a message synchronously. This function requests synchronous
143 * completion and calls lwkt_beginmsg(). If the target port decides to
144 * run the message asynchronously this function will block waiting for
145 * the message to complete. Since MSGF_ASYNC is not set the target
146 * will not attempt to queue the reply to a reply port but will simply
147 * wake up anyone waiting on the message.
149 * A synchronous error code is always returned.
151 * The message's ms_cmd must be initialized, and its ms_flags must be
152 * at least zero'd out. lwkt_domsg() will initialize the message's
153 * ms_abort_port (abort chasing port). If abort is supported, ms_abort
154 * must also be initialized.
156 * NOTE: you cannot safely request an abort until lwkt_domsg() blocks.
157 * XXX this probably needs some work.
159 * NOTE: MSGF_DONE is left set. The target port must clear it if the
160 * message is to be handled asynchronously, while the synchronous case
161 * can just ignore it.
164 lwkt_domsg(lwkt_port_t port, lwkt_msg_t msg)
168 msg->ms_flags &= ~(MSGF_ASYNC | MSGF_REPLY1 | MSGF_REPLY2 | \
169 MSGF_QUEUED | MSGF_ABORTED | MSGF_RETRIEVED);
170 KKASSERT(msg->ms_reply_port != NULL);
171 msg->ms_abort_port = msg->ms_reply_port;
172 if ((error = lwkt_beginmsg(port, msg)) == EASYNC) {
173 error = lwkt_waitmsg(msg);
178 /************************************************************************
180 ************************************************************************/
185 * Initialize a port for use and assign it to the specified thread.
186 * The default reply function is to return the message to the originator.
189 lwkt_initport(lwkt_port_t port, thread_t td)
191 bzero(port, sizeof(*port));
192 TAILQ_INIT(&port->mp_msgq);
194 port->mp_putport = lwkt_default_putport;
195 port->mp_waitport = lwkt_default_waitport;
196 port->mp_replyport = lwkt_default_replyport;
197 port->mp_abortport = lwkt_default_abortport;
201 * Similar to the standard initport, this function simply marks the message
202 * as being done and does not attempt to return it to an originating port.
205 lwkt_initport_null_rport(lwkt_port_t port, thread_t td)
207 lwkt_initport(port, td);
208 port->mp_replyport = lwkt_null_replyport;
214 * Retrieve the next message from the port's message queue, return NULL
215 * if no messages are pending. Note that callers CANNOT use the
216 * MSGF_ABORTED flag as a litmus test to determine if a message
217 * was aborted. The flag only indicates that an abort was requested.
218 * The message's error code will indicate whether an abort occured
219 * (typically by returning EINTR).
221 * Note that once a message has been dequeued it is subject to being
222 * requeued via an IPI based abort request if it is not marked MSGF_DONE.
224 * If the message has been aborted we have to guarentee that abort
225 * semantics are properly followed. The target port will always see
226 * the original message at least once, and if it does not reply the
227 * message before looping on its message port again it will then see
228 * the message again with ms_cmd set to ms_abort.
230 * The calling thread MUST own the port.
235 _lwkt_pullmsg(lwkt_port_t port, lwkt_msg_t msg)
237 if ((msg->ms_flags & MSGF_ABORTED) == 0) {
239 * normal case, remove and return the message.
241 TAILQ_REMOVE(&port->mp_msgq, msg, ms_node);
242 msg->ms_flags = (msg->ms_flags & ~MSGF_QUEUED) | MSGF_RETRIEVED;
244 if (msg->ms_flags & MSGF_RETRIEVED) {
246 * abort case, message already returned once, remvoe and
247 * return the aborted message a second time after setting
248 * ms_cmd to ms_abort.
250 TAILQ_REMOVE(&port->mp_msgq, msg, ms_node);
251 msg->ms_flags &= ~MSGF_QUEUED;
252 msg->ms_cmd = msg->ms_abort;
255 * abort case, abort races initial message retrieval. The
256 * message is returned normally but not removed from the
257 * queue. On the next loop the 'aborted' message will be
258 * dequeued and returned. Note that if the caller replies
259 * to the message it will be dequeued (the abort becomes a
262 msg->ms_flags |= MSGF_RETRIEVED;
268 lwkt_getport(lwkt_port_t port)
272 KKASSERT(port->mp_td == curthread);
274 crit_enter_quick(port->mp_td);
275 if ((msg = TAILQ_FIRST(&port->mp_msgq)) != NULL)
276 _lwkt_pullmsg(port, msg);
277 crit_exit_quick(port->mp_td);
282 * This inline helper function completes processing of a reply from an
283 * unknown cpu context.
285 * The message is being returned to the specified port. The port is
286 * owned by the mp_td thread. If we are on the same cpu as the mp_td
287 * thread we can trivially queue the message to the reply port and schedule
288 * the target thread, otherwise we have to send an ipi message to the
291 * This inline must be entered with a critical section already held.
292 * Note that the IPIQ callback function (*_remote) is entered with a
293 * critical section already held, and we obtain one in lwkt_replyport().
297 _lwkt_replyport(lwkt_port_t port, lwkt_msg_t msg, int force)
299 thread_t td = port->mp_td;
301 if (force || td->td_gd == mycpu) {
303 * We can only reply the message if the abort has caught up with us,
304 * or if no abort was issued (same case).
306 if (msg->ms_abort_port == port) {
307 KKASSERT((msg->ms_flags & MSGF_QUEUED) == 0);
308 TAILQ_INSERT_TAIL(&port->mp_msgq, msg, ms_node);
309 msg->ms_flags |= MSGF_DONE | MSGF_QUEUED | MSGF_REPLY2;
310 if (port->mp_flags & MSGPORTF_WAITING)
315 lwkt_send_ipiq(td->td_gd, (ipifunc1_t)lwkt_replyport_remote, msg);
317 panic("lwkt_replyport: thread %p has bad gd pointer", td);
325 * This function completes reply processing for the default case in the
326 * context of the originating cpu.
330 lwkt_replyport_remote(lwkt_msg_t msg)
332 _lwkt_replyport(msg->ms_reply_port, msg, 1);
338 * This function is called in the context of the target to reply a message.
339 * The critical section protects us from IPIs on the this CPU.
342 lwkt_default_replyport(lwkt_port_t port, lwkt_msg_t msg)
345 msg->ms_flags |= MSGF_REPLY1;
348 * An abort may have caught up to us while we were processing the
349 * message. If this occured we have to dequeue the message from the
350 * target port in the context of our current cpu before we can finish
353 if (msg->ms_flags & MSGF_QUEUED) {
354 KKASSERT(msg->ms_flags & MSGF_ABORTED);
355 TAILQ_REMOVE(&msg->ms_target_port->mp_msgq, msg, ms_node);
356 msg->ms_flags &= ~MSGF_QUEUED;
360 * Do reply port processing for async messages. Just mark the message
361 * done and wakeup the owner of the reply port for synchronous messages.
363 if (msg->ms_flags & MSGF_ASYNC) {
364 _lwkt_replyport(port, msg, 0);
366 msg->ms_flags |= MSGF_DONE;
367 if (port->mp_flags & MSGPORTF_WAITING)
368 lwkt_schedule(port->mp_td);
374 * You can point a port's reply vector at this function if you just want
375 * the message marked done, without any queueing or signaling. This is
376 * often used for structure-embedded messages.
379 lwkt_null_replyport(lwkt_port_t port, lwkt_msg_t msg)
382 msg->ms_flags |= MSGF_DONE|MSGF_REPLY1;
387 * lwkt_default_putport()
389 * This function is typically assigned to the mp_putport port vector.
391 * Queue a message to the target port and wakeup the thread owning it.
392 * This function always returns EASYNC and may be assigned to a
393 * message port's mp_putport function vector. Note that we must set
394 * MSGF_QUEUED prior to sending any IPIs in order to interlock against
395 * ABORT requests and other tests that might be performed.
397 * Note that messages start out as synchronous entities, and as an
398 * optimization MSGF_DONE is usually left set (so in the synchronous path
399 * no modifications to ms_flags are ever required). If a message becomes
400 * async, i.e. you return EASYNC, then MSGF_DONE must be cleared or
401 * lwkt_replymsg() will wind up being a NOP.
403 * The inline must be called from a critical section (the remote function
404 * is called from an IPI and will be in a critical section).
409 _lwkt_putport(lwkt_port_t port, lwkt_msg_t msg, int force)
411 thread_t td = port->mp_td;
413 if (force || td->td_gd == mycpu) {
414 TAILQ_INSERT_TAIL(&port->mp_msgq, msg, ms_node);
415 if (port->mp_flags & MSGPORTF_WAITING)
419 lwkt_send_ipiq(td->td_gd, (ipifunc1_t)lwkt_putport_remote, msg);
421 panic("lwkt_putport: thread %p has bad gd pointer", td);
430 lwkt_putport_remote(lwkt_msg_t msg)
434 * try to catch a free-after-send issue.
436 if (msg->ms_target_port == (void *)0xdeadc0de) {
438 for (i = 0; i < 1000000; ++i) {
439 if (msg->ms_target_port != (void *)0xdeadc0de)
443 panic("msg %p ms_target_port is bogus: reads %p after %d loops\n", msg, msg->ms_target_port, i);
446 _lwkt_putport(msg->ms_target_port, msg, 1);
452 lwkt_default_putport(lwkt_port_t port, lwkt_msg_t msg)
455 msg->ms_flags |= MSGF_QUEUED; /* abort interlock */
456 msg->ms_flags &= ~MSGF_DONE;
457 msg->ms_target_port = port;
458 _lwkt_putport(port, msg, 0);
466 * Forward a message received on one port to another port. The forwarding
467 * function must deal with a pending abort but othewise essentially just
468 * issues a putport to the target port.
470 * An abort may have two side effects: First, the message may have been
471 * requeued to the current target port. If so, we must dequeue it before
475 lwkt_forwardmsg(lwkt_port_t port, lwkt_msg_t msg)
480 if (msg->ms_flags & MSGF_QUEUED) {
481 KKASSERT(msg->ms_flags & MSGF_ABORTED);
482 TAILQ_REMOVE(&msg->ms_target_port->mp_msgq, msg, ms_node);
483 msg->ms_flags &= ~MSGF_QUEUED;
485 msg->ms_flags &= ~MSGF_RETRIEVED;
486 if ((error = port->mp_putport(port, msg)) != EASYNC)
487 lwkt_replymsg(msg, error);
495 * Aborting a message is a fairly complex task. The first order of
496 * business is to get the message to the cpu that owns the target
497 * port, during which we may have to do some port chasing due to
498 * message forwarding operations.
500 * NOTE! Since an aborted message is requeued all message processing
501 * loops should check the MSGF_ABORTED flag.
505 lwkt_abortmsg(lwkt_msg_t msg)
511 * A critical section protects us from reply IPIs on this cpu. We
512 * can only abort messages that have not yet completed (DONE), are not
513 * in the midst of being replied (REPLY1), and which support the
514 * abort function (ABORTABLE).
517 if ((msg->ms_flags & (MSGF_DONE|MSGF_REPLY1|MSGF_ABORTABLE)) == MSGF_ABORTABLE) {
519 * Chase the message. If REPLY1 is set the message has been replied
520 * all the way back to the originator, otherwise it is sitting on
521 * ms_target_port (but we can only complete processing if we are
522 * on the same cpu as the selected port in order to avoid
523 * SMP cache synchronization issues).
525 * When chasing through multiple ports ms_flags may not be
526 * synchronized to the current cpu, but it WILL be synchronized
527 * with regards to testing the MSGF_REPLY1 bit once we reach the
528 * target port that made the reply and since the cpu owning
529 * some port X stores the new port in ms_target_port if the message
530 * is forwarded, the current port will only ever equal the target
531 * port when we are on the correct cpu.
533 if (msg->ms_flags & MSGF_REPLY1)
534 port = msg->ms_reply_port;
536 port = msg->ms_target_port;
538 cpu_ccfence(); /* don't let the compiler reload ms_*_port */
541 * The chase call must run on the cpu owning the port. Fully
542 * synchronous ports (mp_td == NULL) can run the call on any cpu.
545 if (td && td->td_gd != mycpu) {
547 lwkt_send_ipiq(td->td_gd, (ipifunc1_t)lwkt_abortmsg_remote, msg);
549 panic("lwkt_abortmsg: thread %p has bad gd pointer", td);
552 port->mp_abortport(port, msg);
562 lwkt_abortmsg_remote(lwkt_msg_t msg)
567 if (msg->ms_flags & MSGF_REPLY1)
568 port = msg->ms_reply_port;
570 port = msg->ms_target_port;
571 cpu_ccfence(); /* don't let the compiler reload ms_*_port */
573 if (td->td_gd != mycpu) {
574 lwkt_send_ipiq(td->td_gd, (ipifunc1_t)lwkt_abortmsg_remote, msg);
576 port->mp_abortport(port, msg);
583 * The mp_abortport function is called when the abort has finally caught up
584 * to the target port or (if the message has been replied) the reply port.
587 lwkt_default_abortport(lwkt_port_t port, lwkt_msg_t msg)
590 * Set ms_abort_port to ms_reply_port to indicate the completion of
591 * the messaging chasing portion of the abort request. Note that
592 * the passed port is the port that we finally caught up to, not
593 * necessarily the reply port.
595 msg->ms_abort_port = msg->ms_reply_port;
597 if (msg->ms_flags & MSGF_REPLY2) {
599 * If REPLY2 is set we must have chased it all the way back to
600 * the reply port, but the replyport code has not queued the message
601 * (because it was waiting for the abort to catch up). We become
602 * responsible for queueing the message to the reply port.
604 KKASSERT((msg->ms_flags & MSGF_QUEUED) == 0);
605 KKASSERT(port == msg->ms_reply_port);
606 TAILQ_INSERT_TAIL(&port->mp_msgq, msg, ms_node);
607 msg->ms_flags |= MSGF_DONE | MSGF_QUEUED;
608 if (port->mp_flags & MSGPORTF_WAITING)
609 lwkt_schedule(port->mp_td);
610 } else if ((msg->ms_flags & (MSGF_QUEUED|MSGF_REPLY1)) == 0) {
612 * Abort on the target port. The message has not yet been replied
613 * and must be requeued to the target port.
615 msg->ms_flags |= MSGF_ABORTED | MSGF_QUEUED;
616 TAILQ_INSERT_TAIL(&port->mp_msgq, msg, ms_node);
617 if (port->mp_flags & MSGPORTF_WAITING)
618 lwkt_schedule(port->mp_td);
619 } else if ((msg->ms_flags & MSGF_REPLY1) == 0) {
621 * The message has not yet been retrieved by the target port, set
622 * MSGF_ABORTED so the target port can requeue the message abort after
625 msg->ms_flags |= MSGF_ABORTED;
630 * lwkt_default_waitport()
632 * If msg is NULL, dequeue the next message from the port's message
633 * queue, block until a message is ready. This function never
636 * If msg is non-NULL, block until the requested message has been returned
637 * to the port then dequeue and return it. DO NOT USE THIS TO WAIT FOR
638 * INCOMING REQUESTS, ONLY USE THIS TO WAIT FOR REPLIES.
640 * Note that the API does not currently support multiple threads waiting
641 * on a port. By virtue of owning the port it is controlled by our
642 * cpu and we can safely manipulate it's contents.
645 lwkt_default_waitport(lwkt_port_t port, lwkt_msg_t msg)
647 thread_t td = curthread;
650 KKASSERT(port->mp_td == td);
651 crit_enter_quick(td);
653 if ((msg = TAILQ_FIRST(&port->mp_msgq)) == NULL) {
654 port->mp_flags |= MSGPORTF_WAITING;
655 td->td_flags |= TDF_BLOCKED;
657 lwkt_deschedule_self(td);
659 } while ((msg = TAILQ_FIRST(&port->mp_msgq)) == NULL);
660 td->td_flags &= ~TDF_BLOCKED;
661 port->mp_flags &= ~MSGPORTF_WAITING;
663 _lwkt_pullmsg(port, msg);
666 * If a message is not marked done, or if it is queued, we have work
667 * to do. Note that MSGF_DONE is always set in the context of the
670 if ((msg->ms_flags & (MSGF_DONE|MSGF_QUEUED)) != MSGF_DONE) {
672 * We must own the reply port to safely mess with it's contents.
674 port = msg->ms_reply_port;
675 KKASSERT(port->mp_td == td);
677 if ((msg->ms_flags & MSGF_DONE) == 0) {
678 port->mp_flags |= MSGPORTF_WAITING; /* saved by the BGL */
683 * MSGF_PCATCH is only set by processes which wish to
684 * abort the message they are blocked on when a signal
685 * occurs. Note that we still must wait for message
686 * completion after sending an abort request.
688 if (msg->ms_flags & MSGF_PCATCH) {
689 if (sentabort == 0 && CURSIG(port->mp_td->td_lwp)) {
697 * XXX set TDF_SINTR so 'ps' knows the difference between
698 * an interruptable wait and a disk wait. YYY eventually
699 * move LWP_SINTR to TDF_SINTR to reduce duplication.
701 td->td_flags |= TDF_SINTR | TDF_BLOCKED;
702 lwkt_deschedule_self(td);
704 td->td_flags &= ~(TDF_SINTR | TDF_BLOCKED);
705 } while ((msg->ms_flags & MSGF_DONE) == 0);
706 port->mp_flags &= ~MSGPORTF_WAITING; /* saved by the BGL */
709 * We own the message now.
711 if (msg->ms_flags & MSGF_QUEUED) {
712 msg->ms_flags &= ~MSGF_QUEUED;
713 TAILQ_REMOVE(&port->mp_msgq, msg, ms_node);