From eb67213abec698ffb555ee926f2761bcb7e95f55 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 26 Mar 2019 14:11:56 -0700 Subject: [PATCH] kernel - Rewrite the callout_*() API * Rewrite the entire API from scratch and improve compatibility with FreeBSD. This is not an attempt to achieve full API compatibility, as FreeBSD's API has unnecessary complexity that coders would frequently make mistakes interpreting. * Remove the IPI mechanisms in favor of fine-grained spin-locks instead. * Add some robustness features in an attempt to track down corrupted callwheel lists due to originating subsystems freeing structures out from under an active callout. * The code supports a full-blown type-stable/adhoc-reuse structural separation between the front-end and the back-end, but this feature is currently not operational and may be removed at some future point. Instead we currently just embed the struct _callout inside the struct callout. * Replace callout_stop_sync() with callout_cancel(). * callout_drain() is now implemented as a synchronous cancel instead of an asynchronous stop, which is closer to the FreeBSD API and expected operation for ported code (usb stack in particular). We will just have to fix any deadlocks which we come across. * Retain our callout_terminate() function as the 'better' way to stop using a callout, as it will not only cancel the callout but also de-flag the structure so it can no longer be used. --- share/man/man9/Makefile | 5 +- share/man/man9/callout.9 | 537 ++++-- sys/bus/u4b/usbdi.h | 2 +- sys/dev/crypto/rdrand/rdrand.c | 2 +- sys/dev/disk/ahci/ahci.c | 6 +- sys/dev/disk/dm/delay/dm_target_delay.c | 2 +- sys/dev/disk/mpt/mpt.h | 2 +- sys/dev/disk/nata/ata-all.c | 2 +- sys/dev/disk/nata/ata-lowlevel.c | 2 +- sys/dev/disk/nata/ata-queue.c | 2 +- sys/dev/disk/nata/chipsets/ata-ahci.c | 2 +- sys/dev/disk/nata/chipsets/ata-marvell.c | 2 +- sys/dev/disk/nata/chipsets/ata-siliconimage.c | 2 +- sys/dev/disk/sili/sili.c | 4 +- sys/dev/drm/include/linux/workqueue.h | 3 +- sys/dev/drm/linux_hrtimer.c | 5 +- sys/dev/misc/ecc/ecc_amd8000.c | 2 +- sys/dev/misc/ecc/ecc_x3400.c | 2 +- sys/dev/misc/ipmi/ipmi.c | 2 +- sys/dev/misc/led/led.c | 2 +- sys/dev/misc/psm/psm.c | 4 +- sys/dev/netif/ath/ath/if_ath.c | 8 +- sys/dev/netif/iwn/if_iwn.c | 6 +- sys/dev/netif/oce/oce_if.c | 2 +- sys/dev/raid/aac/aac.c | 2 +- sys/dev/raid/ciss/ciss.c | 2 +- sys/dev/raid/hptrr/hptrr_osm_bsd.c | 2 +- sys/dev/raid/mfi/mfi.c | 2 +- sys/dev/raid/mps/mps.c | 2 +- sys/dev/raid/twa/tw_osl_freebsd.c | 4 +- sys/dev/virtual/virtio/random/virtio_random.c | 2 +- sys/dev/virtual/vmware/vmxnet3/if_vmx.c | 2 +- sys/kern/kern_event.c | 12 +- sys/kern/kern_exit.c | 2 +- sys/kern/kern_synch.c | 2 +- sys/kern/kern_time.c | 2 +- sys/kern/kern_timeout.c | 1502 ++++++++++------- sys/kern/kern_wdog.c | 11 +- sys/kern/subr_log.c | 2 +- sys/kern/subr_taskqueue.c | 3 +- sys/net/altq/altq_rmclass.c | 3 +- sys/net/ipfw/ip_fw2.c | 10 +- sys/netinet/ip_carp.c | 6 +- sys/netinet/tcp_subr.c | 10 +- sys/netinet/tcp_timer2.h | 12 + sys/netproto/802_11/README.DRAGONFLY | 2 +- sys/netproto/802_11/wlan/ieee80211_dfs.c | 4 +- sys/sys/callout.h | 252 +-- 48 files changed, 1512 insertions(+), 949 deletions(-) diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index bfed4232a7..a691a0aaa6 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -370,14 +370,17 @@ MLINKS+=byteorder.9 be16dec.9 \ byteorder.9 le64toh.9 MLINKS+=callout.9 callout_active.9 \ callout.9 callout_deactivate.9 \ + callout.9 callout_drain.9 \ callout.9 callout_init.9 \ callout.9 callout_init_lk.9 \ callout.9 callout_init_mp.9 \ callout.9 callout_pending.9 \ callout.9 callout_reset.9 \ + callout.9 callout_reset_bycpu.9 \ callout.9 callout_stop.9 \ callout.9 callout_stop_async.9 \ - callout.9 callout_stop_sync.9 + callout.9 callout_cancel.9 \ + callout.9 callout_terminate.9 \ MLINKS+=condvar.9 cv_broadcast.9 \ condvar.9 cv_broadcastpri.9 \ condvar.9 cv_destroy.9 \ diff --git a/share/man/man9/callout.9 b/share/man/man9/callout.9 index 533134ea0c..a7e7731e2b 100644 --- a/share/man/man9/callout.9 +++ b/share/man/man9/callout.9 @@ -14,13 +14,6 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the NetBSD -.\" Foundation, Inc. and its contributors. -.\" 4. Neither the name of The NetBSD Foundation nor the names of its -.\" contributors may be used to endorse or promote products derived -.\" from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED @@ -34,161 +27,505 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.\" $FreeBSD: src/share/man/man9/timeout.9,v 1.9.2.6 2001/12/17 11:30:19 ru Exp $ -.\" -.Dd November 30, 2014 -.Dt CALLOUT 9 +.Dd July 27, 2016 +.Dt TIMEOUT 9 .Os .Sh NAME +.Nm callout_active , +.Nm callout_deactivate , +.Nm callout_drain , .Nm callout_init , -.Nm callout_init_lk , .Nm callout_init_mp , +.Nm callout_init_lk , +.Nm callout_pending , .Nm callout_reset , +.Nm callout_reset_bycpu , .Nm callout_stop , .Nm callout_stop_async , -.Nm callout_stop_sync , -.Nm callout_active , -.Nm callout_pending , -.Nm callout_deactivate +.Nm callout_cancel , +.Nm callout_terminate .Nd execute a function after a specified length of time .Sh SYNOPSIS .In sys/types.h .In sys/systm.h +.In sys/callout.h .Bd -literal typedef void timeout_t (void *); .Ed +.Ft int +.Fn callout_active "struct callout *c" .Ft void -.Fn callout_init "struct callout *c" +.Fn callout_deactivate "struct callout *c" +.Ft int +.Fn callout_drain "struct callout *c" .Ft void -.Fn callout_init_lk "struct callout *c" "struct lock *lk" +.Fn callout_init "struct callout *c" "int mpsafe" .Ft void -.Fn callout_init_mp "struct callout *c" +.Fn callout_init_lk "struct callout *c" "struct lock *lk" +.Ft int +.Fn callout_pending "struct callout *c" .Ft void .Fn callout_reset "struct callout *c" "int ticks" "timeout_t *func" "void *arg" +.Ft void +.Fo callout_reset_bycpu +.Fa "struct callout *c" +.Fa "int ticks" +.Fa "timeout_t *func" +.Fa "void *arg" +.Fa "int cpuid" +.Fc .Ft int .Fn callout_stop "struct callout *c" -.Ft void +.Ft int .Fn callout_stop_async "struct callout *c" .Ft int -.Fn callout_stop_sync "struct callout *c" +.Fn callout_cancel "struct callout *c" .Ft int -.Fn callout_active "struct callout *c" -.Ft int -.Fn callout_pending "struct callout *c" -.Fn callout_deactivate "struct callout *c" +.Fn callout_terminate "struct callout *c" .Sh DESCRIPTION The .Nm callout -facility provides a mechanism to execute a function at a given time. -The timer is based on the hardclock timer which ticks -.Dv hz -times per second. +API is used to schedule a call to an arbitrary function at a specific +time in the future. +Consumers of this API are required to allocate a callout structure +.Pq struct callout +for each pending function invocation. +This structure stores state about the pending function invocation including +the function to be called and the time at which the function should be invoked. +Pending function calls can be cancelled or rescheduled to a different time. +In addition, +a callout structure may be reused to schedule a new function call after a +scheduled call is completed. .Pp -Clients of the -.Nm callout -facility are responsible for providing pre-allocated callout structures, or -.Dq handles . -The -.Nm callout -facility replaces the historic -.Bx -functions -.Fn timeout -and -.Fn untimeout . +Callouts only provide a single-shot mode. +If a consumer requires a periodic timer, +it must explicitly reschedule each function call. +This is normally done by rescheduling the subsequent call within the called +function. +.Pp +In +.Fx +callout functions must not sleep. +They may not acquire sleepable locks, +wait on condition variables, +perform blocking allocation requests, +or invoke any other action that might sleep. +In +.Dx +all callout functions are executed from a common kernel thread on the +target cpu and may block as long as deadlocks are avoided. But generally +speaking, callout functions should run in as short a time as possible +as they can add lag to other unrelated callouts. .Pp +Each callout structure must be initialized by +.Fn callout_init , +.Fn callout_init_mp , +or +.Fn callout_init_lk +before it is passed to any of the other callout functions. The .Fn callout_init -function initializes the callout handle +and +.Fn callout_init_mp +functions initialize a callout structure in .Fa c -so it can be passed to -.Fn callout_stop -or -.Fn callout_reset -without any side effects. -The MP version of this function, -.Fn callout_init_mp , -requires that the callback function installed by -.Fn callout_reset -be MP safe. +that is not associated with a specific lock. +The former will hold the mp_lock across callback. However, it is deprecated +and should not be used in new code. +.Fn callout_init_mp +should be used for any new code. .Pp The .Fn callout_init_lk -function associates the callout handle +function initialize a callout structure in .Fa c -with a lock specified by -.Fa lk . -The -.Nm callout -subsystem acquires the associated lock before calling the callout function -and releases it after the function returns. +that is associated with a specific lock. +In +.Fx +the associated lock should be held while stopping or rescheduling the +callout. +In +.Dx +the same is true, but is not a requirement. .Pp -The -.Fn callout_reset -function resets and starts the timer associated with the callout handle -.Fa c . -When the timer expires after -.Fa ticks Ns No /hz -seconds, the function specified by -.Fa func -will be called with the argument -.Fa arg . +The callout subsystem acquires the associated lock before calling the +callout function and releases it after the function returns. +If the callout was cancelled while the callout subsystem waited for the +associated lock, +the callout function is not called, +and the associated lock is released. +This ensures that stopping or rescheduling the callout will abort any +previously scheduled invocation. .Pp The function .Fn callout_stop -cancels the callout associated with the callout handle +cancels a callout .Fa c if it is currently pending. -It is safe to call +If the callout is pending and successfully stopped, then .Fn callout_stop -on a callout that is not pending, so long as it is initialized. -If the callout is not set, has already been serviced or is currently -being serviced, then zero will be returned. -The +returns a value of one. +In +.Fx +if the callout is not set, or +has already been serviced, then +negative one is returned. +In +.Dx +if the callout is not set, or +has already been serviced, then +zero is returned. +If the callout is currently being serviced and cannot be stopped, +then zero will be returned. +If the callout is currently being serviced and cannot be stopped, and at the +same time a next invocation of the same callout is also scheduled, then +.Fn callout_stop +unschedules the next run and returns zero. +In +.Fx +if the callout has an associated lock, +then that lock must be held when this function is called. +In +.Dx +if the callout has an associated lock, +then that lock should be held when this function is called +to avoid races, but does not have to be. +.Pp +In +.Dx +the stop operation is guaranteed to be synchronous if the callout +was initialized with +.Fn callout_init_lk . +.Pp +The function .Fn callout_stop_async -function is identical to +is identical to .Fn callout_stop -without a return value. +but does not block and allows the STOP operation to be asynchronous, +meaning that the callout structure may still be relevant after the +function returns. This situation can occur if the callback was +in-progress at the time the stop was issued. .Pp -The -.Fn callout_stop_sync -function is a synchronous version of +The function +.Fn callout_cancel +synchronously cancels a callout and returns a value similar to that +of +.Fn callout_stop . +.Fn callout_cancel +overrides all other operations while it is in-progress. +.Pp +The function +.Fn callout_terminate +synchronously cancels a callout and informs the system that the +callout structure will no longer be referenced. This function will +clear the initialization flag and any further use of the callout structure +will panic the system until it is next initialized. The callout structure +can be safely freed after this function returns, assuming other program +references to it have been removed. +.Pp +The function +.Fn callout_drain +is identical to .Fn callout_stop -which ensures that the callout function has completed operation (if it -was running) before returning. +except that it will wait for the callout +.Fa c +to complete if it is already in progress. +This function MUST NOT be called while holding any +locks on which the callout might block, or deadlock will result. +Note that if the callout subsystem has already begun processing this +callout, then the callout function may be invoked before +.Fn callout_drain +returns. +However, the callout subsystem does guarantee that the callout will be +fully stopped before +.Fn callout_drain +returns. .Pp The -.Fn callout_pending -macro tests if the callout handle +.Fn callout_reset +function schedules a future function invocation for callout +.Fa c . +If .Fa c -is pending. -A pending callout is one that has been started and whose function has not -yet been called. +already has a pending callout, +it is cancelled before the new invocation is scheduled. +In +.Fx +these functions return a value of one if a pending callout was cancelled +and zero if there was no pending callout. +If the callout has an associated lock, +then that lock must be held when any of these functions are called. +In +.Dx +these functions return void. +If the callout has an associated lock, +then that lock should generally be held when any of these functions are +called, but the API will work either way. +If a callout is already in-progress, this function's parameters will be +applied when the in-progress callback returns, if not overridden from +within the callback. +.Pp +The time at which the callout function will be invoked is determined by +the +.Fa ticks +argument. +The callout is scheduled to execute after +.Fa ticks Ns No /hz +seconds. +Non-positive values of +.Fa ticks +are silently converted to the value +.Sq 1 . .Pp The +.Fn callout_reset_bycpu +function schedules the callout to occur on the target cpu. The +normal +.Fn callout_reset +function schedules the callout to occur on the current cpu. +The +.Fn callout_reset +functions accept a +.Fa func +argument which identifies the function to be called when the time expires. +It must be a pointer to a function that takes a single +.Fa void * +argument. +Upon invocation, +.Fa func +will receive +.Fa arg +as its only argument. +.Pp +The callout subsystem provides a softclock thread for each CPU in the system. +Callouts are assigned to a single CPU and are executed by the softclock thread +for that CPU. +The callouts are assigned to the current cpu or to a specific cpu +depending on the call. +.Pp +The macros +.Fn callout_pending , .Fn callout_active -macro returns true if a timer has been started but not explicitly stopped, -even if it has already fired. +and +.Fn callout_deactivate +provide access to the current state of the callout. +The +.Fn callout_pending +macro checks whether a callout is +.Em pending ; +a callout is considered +.Em pending +when a timeout has been set but the time has not yet arrived. +Note that once the timeout time arrives and the callout subsystem +starts to process this callout, +.Fn callout_pending +will return +.Dv FALSE +even though the callout function may not have finished +.Pq or even begun +executing. +The +.Fn callout_active +macro checks whether a callout is marked as +.Em active , +and the +.Fn callout_deactivate +macro clears the callout's +.Em active +flag. +The callout subsystem marks a callout as +.Em active +when a timeout is set and it clears the +.Em active +flag in +.Fn callout_stop +and +.Fn callout_drain , +but it +.Em does not +clear it when a callout expires normally via the execution of the +callout function. +.Pp +There are three main techniques for addressing these +synchronization concerns. +The first approach is preferred as it is the simplest: +.Bl -enum -offset indent +.It +Callouts can be associated with a specific lock when they are initialized +by +.Fn callout_init_lk +When a callout is associated with a lock, +the callout subsystem acquires the lock before the callout function is +invoked. +This allows the callout subsystem to transparently handle races between +callout cancellation, +scheduling, +and execution. +Note that the associated lock must be acquired before calling +.Fn callout_stop +or +.Fn callout_reset +functions to provide this safety. .Pp +.It The +.Fn callout_pending , +.Fn callout_active +and .Fn callout_deactivate -macro deactivates the specified callout -.Fa c . +macros can be used together to work around the race conditions, +but the interpretation of these calls can be confusing and it +is recommended that a different, caller-specific method be used to +determine whether a race condition is present. .Pp +When a callout's timeout is set, the callout subsystem marks the +callout as both +.Em active +and +.Em pending . +When the timeout time arrives, the callout subsystem begins processing +the callout by first clearing the +.Em pending +flag. +It then invokes the callout function without changing the +.Em active +flag, and does not clear the +.Em active +flag even after the callout function returns. +The mechanism described here requires the callout function itself to +clear the +.Em active +flag using the +.Fn callout_deactivate +macro. The -.Fn callout_active , +.Fn callout_stop +and +.Fn callout_drain +functions always clear both the +.Em active +and +.Em pending +flags before returning. +.Pp +The callout function should first check the +.Em pending +flag and return without action if .Fn callout_pending +returns +.Dv TRUE . +This indicates that the callout was rescheduled using +.Fn callout_reset +just before the callout function was invoked. +If +.Fn callout_active +returns +.Dv FALSE +then the callout function should also return without action. +This indicates that the callout has been stopped. +Finally, the callout function should call +.Fn callout_deactivate +to clear the +.Em active +flag. +For example: +.Bd -literal -offset indent +lockmgr(&sc->sc_lock, LK_EXCLUSIVE); +if (callout_pending(&sc->sc_callout)) { + /* callout was reset */ + lockmgr(&sc->sc_lock, LK_RELEASE); + return; +} +if (!callout_active(&sc->sc_callout)) { + /* callout was stopped */ + lockmgr(&sc->sc_lock, LK_RELEASE); + return; +} +callout_deactivate(&sc->sc_callout); +/* rest of callout function */ +.Ed +.Pp +Together with appropriate synchronization, such as the lock used above, +this approach permits the +.Fn callout_stop and +.Fn callout_reset +functions to be used at any time without races. +For example: +.Bd -literal -offset indent +lockmgr(&sc->sc_mtx, LK_EXCLUSIVE); +callout_stop(&sc->sc_callout); +/* The callout is effectively stopped now. */ +.Ed +.Pp +If the callout is still pending then these functions operate normally, +but if processing of the callout has already begun then the tests in +the callout function cause it to return without further action. +Synchronization between the callout function and other code ensures that +stopping or resetting the callout will never be attempted while the +callout function is past the .Fn callout_deactivate -macros may only be used when the state of the callout structure is stable, -meaning from within the callback function or after the callback function -has been called but the timer has not yet been reset. +call. +.Pp +The above technique additionally ensures that the +.Em active +flag always reflects whether the callout is effectively enabled or +disabled. +If +.Fn callout_active +returns false, then the callout is effectively disabled, since even if +the callout subsystem is actually just about to invoke the callout +function, the callout function will return without action. +.El +.Pp +There is one final race condition that must be considered when a +callout is being stopped for the last time. +In this case it may not be safe to let the callout function itself +detect that the callout was stopped, since it may need to access +data objects that have already been destroyed or recycled. +To ensure that the callout is completely inactive, a call to +.Fn callout_cancel +or +.Fn callout_terminate +should be used. .Sh RETURN VALUES The -.Fn callout_stop -function and the +.Fn callout_active +macro returns the state of a callout's +.Em active +flag. +.Pp +The .Fn callout_pending -macro return non-zero if the callout is still pending or zero otherwise. +macro returns the state of a callout's +.Em pending +flag. +.Pp The -.Fn callout_active -macro returns non-zero if the callout is active or zero otherwise. +.Fn callout_stop +and +.Fn callout_drain +functions return a value of one if the callout was removed by the +function, or zero if the callout could not be stopped or was not running +in the first place. +.Sh HISTORY +The original work on the data structures used in this implementation +was published by +.An G. Varghese +and +.An A. Lauck +in the paper +.%T "Hashed and Hierarchical Timing Wheels: Data Structures for the Efficient Implementation of a Timer Facility" +in the +.%B "Proceedings of the 11th ACM Annual Symposium on Operating Systems Principles" . +The current implementation replaces the long standing +.Bx +linked list +callout mechanism which offered O(n) insertion and removal running time +but did not generate or require handles for untimeout operations. +.Pp +In +.Dx +the entire API was reformulated by Matthew Dillon for optimal SMP +operation, uses much larger rings, and is capable of queueing one +operation concurrent with an in-progress callback without blocking. diff --git a/sys/bus/u4b/usbdi.h b/sys/bus/u4b/usbdi.h index a040e60892..6ede7da7b2 100644 --- a/sys/bus/u4b/usbdi.h +++ b/sys/bus/u4b/usbdi.h @@ -408,7 +408,7 @@ struct usb_callout { #define usb_callout_init_mtx(c,m,f) callout_init_lk(&(c)->co, m) #define usb_callout_reset(c,t,f,d) callout_reset(&(c)->co, t, f, d) #define usb_callout_stop(c) callout_stop(&(c)->co) -#define usb_callout_drain(c) callout_stop_sync(&(c)->co) +#define usb_callout_drain(c) callout_drain(&(c)->co) #define usb_callout_pending(c) callout_pending(&(c)->co) /* USB transfer states */ diff --git a/sys/dev/crypto/rdrand/rdrand.c b/sys/dev/crypto/rdrand/rdrand.c index 65408ffc2e..387eb9739f 100644 --- a/sys/dev/crypto/rdrand/rdrand.c +++ b/sys/dev/crypto/rdrand/rdrand.c @@ -107,7 +107,7 @@ rdrand_detach(device_t dev) sc = device_get_softc(dev); - callout_stop_sync(&sc->sc_rng_co); + callout_terminate(&sc->sc_rng_co); return (0); } diff --git a/sys/dev/disk/ahci/ahci.c b/sys/dev/disk/ahci/ahci.c index 32aa8877e8..47a24811a8 100644 --- a/sys/dev/disk/ahci/ahci.c +++ b/sys/dev/disk/ahci/ahci.c @@ -1820,7 +1820,7 @@ restart: ccb = &ap->ap_ccbs[slot]; if (ccb->ccb_xa.flags & ATA_F_TIMEOUT_RUNNING) { serial = ccb->ccb_xa.serial; - callout_stop_sync(&ccb->ccb_timeout); + callout_cancel(&ccb->ccb_timeout); if (serial != ccb->ccb_xa.serial) { kprintf("%s: Warning: timeout race ccb %p\n", PORTNAME(ap), ccb); @@ -1841,7 +1841,7 @@ restart: ccb = &ap->ap_ccbs[slot]; if (ccb->ccb_xa.flags & ATA_F_TIMEOUT_RUNNING) { serial = ccb->ccb_xa.serial; - callout_stop_sync(&ccb->ccb_timeout); + callout_cancel(&ccb->ccb_timeout); if (serial != ccb->ccb_xa.serial) { kprintf("%s: Warning: timeout race ccb %p\n", PORTNAME(ap), ccb); @@ -3702,7 +3702,7 @@ ahci_ata_cmd_done(struct ahci_ccb *ccb) */ if (xa->flags & ATA_F_TIMEOUT_RUNNING) { serial = ccb->ccb_xa.serial; - callout_stop_sync(&ccb->ccb_timeout); + callout_cancel(&ccb->ccb_timeout); if (serial != ccb->ccb_xa.serial) { kprintf("%s: Warning: timeout race ccb %p\n", PORTNAME(ccb->ccb_port), ccb); diff --git a/sys/dev/disk/dm/delay/dm_target_delay.c b/sys/dev/disk/dm/delay/dm_target_delay.c index 9fbad75802..5fbb78a048 100644 --- a/sys/dev/disk/dm/delay/dm_target_delay.c +++ b/sys/dev/disk/dm/delay/dm_target_delay.c @@ -333,7 +333,7 @@ _destroy(struct dm_delay_info *di) mtx_lock(&di->cal_mtx); if (callout_pending(&di->cal)) - callout_stop_sync(&di->cal); + callout_cancel(&di->cal); mtx_unlock(&di->cal_mtx); _submit_queue(di, 1); diff --git a/sys/dev/disk/mpt/mpt.h b/sys/dev/disk/mpt/mpt.h index bc284d4c4d..fb9c31b3ed 100644 --- a/sys/dev/disk/mpt/mpt.h +++ b/sys/dev/disk/mpt/mpt.h @@ -773,7 +773,7 @@ mpt_assign_serno(struct mpt_softc *mpt, request_t *req) #define mpt_callout_init(mpt, c) \ callout_init_mp(c) #define mpt_callout_drain(mpt, c) \ - callout_stop_sync(c) + callout_drain(c) /******************************* Register Access ******************************/ static __inline void mpt_write(struct mpt_softc *, size_t, uint32_t); diff --git a/sys/dev/disk/nata/ata-all.c b/sys/dev/disk/nata/ata-all.c index 5340c4fe30..7b48380c56 100644 --- a/sys/dev/disk/nata/ata-all.c +++ b/sys/dev/disk/nata/ata-all.c @@ -205,7 +205,7 @@ ata_reinit(device_t dev) /* catch eventual request in ch->running */ lockmgr(&ch->state_mtx, LK_EXCLUSIVE); if ((request = ch->running)) - callout_stop_sync(&request->callout); + callout_cancel(&request->callout); ch->running = NULL; /* unconditionally grap the channel lock */ diff --git a/sys/dev/disk/nata/ata-lowlevel.c b/sys/dev/disk/nata/ata-lowlevel.c index 0221044a2d..f7dbee1c25 100644 --- a/sys/dev/disk/nata/ata-lowlevel.c +++ b/sys/dev/disk/nata/ata-lowlevel.c @@ -439,7 +439,7 @@ ata_end_transaction(struct ata_request *request) kprintf("ata_end_transaction OOPS!!\n"); end_finished: - callout_stop_sync(&request->callout); + callout_cancel(&request->callout); return ATA_OP_FINISHED; end_continue: diff --git a/sys/dev/disk/nata/ata-queue.c b/sys/dev/disk/nata/ata-queue.c index eb94a01022..fcf1ebb122 100644 --- a/sys/dev/disk/nata/ata-queue.c +++ b/sys/dev/disk/nata/ata-queue.c @@ -575,7 +575,7 @@ ata_fail_requests(device_t dev) /* do we have any running request to care about ? */ if ((request = ch->running) && (!dev || request->dev == dev)) { - callout_stop_sync(&request->callout); + callout_cancel(&request->callout); ch->running = NULL; request->result = ENXIO; TAILQ_INSERT_TAIL(&fail_requests, request, chain); diff --git a/sys/dev/disk/nata/chipsets/ata-ahci.c b/sys/dev/disk/nata/chipsets/ata-ahci.c index f4438587ed..619f27c3fa 100644 --- a/sys/dev/disk/nata/chipsets/ata-ahci.c +++ b/sys/dev/disk/nata/chipsets/ata-ahci.c @@ -343,7 +343,7 @@ ata_ahci_end_transaction(struct ata_request *request) int tag = 0; /* kill the timeout */ - callout_stop_sync(&request->callout); + callout_cancel(&request->callout); /* get status */ tf_data = ATA_INL(ctlr->r_res2, ATA_AHCI_P_TFD + offset); diff --git a/sys/dev/disk/nata/chipsets/ata-marvell.c b/sys/dev/disk/nata/chipsets/ata-marvell.c index 842c352ab6..cd6a54468e 100644 --- a/sys/dev/disk/nata/chipsets/ata-marvell.c +++ b/sys/dev/disk/nata/chipsets/ata-marvell.c @@ -453,7 +453,7 @@ ata_marvell_edma_end_transaction(struct ata_request *request) int slot; /* stop timeout */ - callout_stop_sync(&request->callout); + callout_cancel(&request->callout); /* get response ptr's */ rsp_in = ATA_INL(ctlr->r_res1, 0x02020 + ATA_MV_EDMA_BASE(ch)); diff --git a/sys/dev/disk/nata/chipsets/ata-siliconimage.c b/sys/dev/disk/nata/chipsets/ata-siliconimage.c index 26be4a38b9..39b0fa698f 100644 --- a/sys/dev/disk/nata/chipsets/ata-siliconimage.c +++ b/sys/dev/disk/nata/chipsets/ata-siliconimage.c @@ -569,7 +569,7 @@ ata_siiprb_end_transaction(struct ata_request *request) int error, timeout; /* kill the timeout */ - callout_stop_sync(&request->callout); + callout_cancel(&request->callout); prb = (struct ata_siiprb_command *) ((u_int8_t *)rman_get_virtual(ctlr->r_res2) + offset); diff --git a/sys/dev/disk/sili/sili.c b/sys/dev/disk/sili/sili.c index e47663438d..58933b84cd 100644 --- a/sys/dev/disk/sili/sili.c +++ b/sys/dev/disk/sili/sili.c @@ -1089,7 +1089,7 @@ restart: ccb = &ap->ap_ccbs[slot]; if (ccb->ccb_xa.flags & ATA_F_TIMEOUT_RUNNING) { serial = ccb->ccb_xa.serial; - callout_stop_sync(&ccb->ccb_timeout); + callout_cancel(&ccb->ccb_timeout); if (serial != ccb->ccb_xa.serial) { kprintf("%s: Warning: timeout race ccb %p\n", PORTNAME(ap), ccb); @@ -2401,7 +2401,7 @@ sili_ata_cmd_done(struct sili_ccb *ccb) */ if (xa->flags & ATA_F_TIMEOUT_RUNNING) { serial = ccb->ccb_xa.serial; - callout_stop_sync(&ccb->ccb_timeout); + callout_cancel(&ccb->ccb_timeout); if (serial != ccb->ccb_xa.serial) { kprintf("%s: Warning: timeout race ccb %p\n", PORTNAME(ccb->ccb_port), ccb); diff --git a/sys/dev/drm/include/linux/workqueue.h b/sys/dev/drm/include/linux/workqueue.h index fe29bd5c87..7c53d85d36 100644 --- a/sys/dev/drm/include/linux/workqueue.h +++ b/sys/dev/drm/include/linux/workqueue.h @@ -197,8 +197,7 @@ cancel_delayed_work(struct delayed_work *work) static inline int cancel_delayed_work_sync(struct delayed_work *work) { - - callout_drain(&work->timer); + callout_cancel(&work->timer); if (work->work.taskqueue && taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL)) taskqueue_drain(work->work.taskqueue, &work->work.work_task); diff --git a/sys/dev/drm/linux_hrtimer.c b/sys/dev/drm/linux_hrtimer.c index eb75689205..e1ba3620f9 100644 --- a/sys/dev/drm/linux_hrtimer.c +++ b/sys/dev/drm/linux_hrtimer.c @@ -84,12 +84,13 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, int hrtimer_cancel(struct hrtimer *timer) { - return callout_drain(&timer->timer_callout) == 0; + return callout_cancel(&timer->timer_callout) == 0; } /* Returns non-zero if the timer is already on the queue */ bool -hrtimer_active(const struct hrtimer *timer) +hrtimer_active(const struct hrtimer *const_timer) { + struct hrtimer *timer = __DECONST(struct hrtimer *, const_timer); return callout_pending(&timer->timer_callout); } diff --git a/sys/dev/misc/ecc/ecc_amd8000.c b/sys/dev/misc/ecc/ecc_amd8000.c index 57ae28e955..67e24b2311 100644 --- a/sys/dev/misc/ecc/ecc_amd8000.c +++ b/sys/dev/misc/ecc/ecc_amd8000.c @@ -206,7 +206,7 @@ ecc_amd8000_stop(device_t dev) { struct ecc_amd8000_softc *sc = device_get_softc(dev); - callout_stop_sync(&sc->ecc_callout); + callout_cancel(&sc->ecc_callout); } static int diff --git a/sys/dev/misc/ecc/ecc_x3400.c b/sys/dev/misc/ecc/ecc_x3400.c index 48b38883dd..25740cc72d 100644 --- a/sys/dev/misc/ecc/ecc_x3400.c +++ b/sys/dev/misc/ecc/ecc_x3400.c @@ -278,7 +278,7 @@ ecc_x3400_stop(device_t dev) { struct ecc_x3400_softc *sc = device_get_softc(dev); - callout_stop_sync(&sc->ecc_callout); + callout_cancel(&sc->ecc_callout); } static int diff --git a/sys/dev/misc/ipmi/ipmi.c b/sys/dev/misc/ipmi/ipmi.c index 3f34568d90..bebdd1feb8 100644 --- a/sys/dev/misc/ipmi/ipmi.c +++ b/sys/dev/misc/ipmi/ipmi.c @@ -883,7 +883,7 @@ ipmi_detach(device_t dev) destroy_dev(sc->ipmi_cdev); /* Detach from watchdog handling and turn off watchdog. */ - callout_stop_sync(&sc->ipmi_watchdog); + callout_cancel(&sc->ipmi_watchdog); ipmi_set_watchdog(sc, 0); /* XXX: should use shutdown callout I think. */ diff --git a/sys/dev/misc/led/led.c b/sys/dev/misc/led/led.c index c49c46261d..b381165bfc 100644 --- a/sys/dev/misc/led/led.c +++ b/sys/dev/misc/led/led.c @@ -362,7 +362,7 @@ led_drvexit(void) error = EINVAL; lockmgr(&led_lock, LK_RELEASE); if (error == 0) { - callout_stop_sync(&led_ch); + callout_cancel(&led_ch); delete_unrhdr(led_unit); lockuninit(&led_lock); lockuninit(&led_lock2); diff --git a/sys/dev/misc/psm/psm.c b/sys/dev/misc/psm/psm.c index 02c38f23bc..a724d2606d 100644 --- a/sys/dev/misc/psm/psm.c +++ b/sys/dev/misc/psm/psm.c @@ -1688,8 +1688,8 @@ psmdetach(device_t dev) destroy_dev(sc->dev); /* XXX: callout_drain in original freebsd11 code */ - callout_stop_sync(&sc->callout); - callout_stop_sync(&sc->softcallout); + callout_terminate(&sc->callout); + callout_terminate(&sc->softcallout); return (0); } diff --git a/sys/dev/netif/ath/ath/if_ath.c b/sys/dev/netif/ath/ath/if_ath.c index 33c82d3d13..a46a10afe9 100644 --- a/sys/dev/netif/ath/ath/if_ath.c +++ b/sys/dev/netif/ath/ath/if_ath.c @@ -1911,7 +1911,7 @@ ath_suspend(struct ath_softc *sc) ATH_LOCK(sc); #if defined(__DragonFly__) - callout_stop_sync(&sc->sc_cal_ch); + callout_cancel(&sc->sc_cal_ch); #else callout_stop(&sc->sc_cal_ch); #endif @@ -2691,7 +2691,7 @@ ath_stop(struct ath_softc *sc) sc->sc_tx99->stop(sc->sc_tx99); #endif #if defined(__DragonFly__) - callout_stop_sync(&sc->sc_wd_ch); + callout_cancel(&sc->sc_wd_ch); #else callout_stop(&sc->sc_wd_ch); #endif @@ -2701,7 +2701,7 @@ ath_stop(struct ath_softc *sc) if (!sc->sc_invalid) { if (sc->sc_softled) { #if defined(__DragonFly__) - callout_stop_sync(&sc->sc_ledtimer); + callout_cancel(&sc->sc_ledtimer); #else callout_stop(&sc->sc_ledtimer); #endif @@ -5665,7 +5665,7 @@ ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) * ATH_LOCK held. */ #if defined(__DragonFly__) - callout_stop_sync(&sc->sc_cal_ch); + callout_cancel(&sc->sc_cal_ch); #else callout_stop(&sc->sc_cal_ch); #endif diff --git a/sys/dev/netif/iwn/if_iwn.c b/sys/dev/netif/iwn/if_iwn.c index eadac375fb..437044553f 100644 --- a/sys/dev/netif/iwn/if_iwn.c +++ b/sys/dev/netif/iwn/if_iwn.c @@ -2909,7 +2909,7 @@ iwn_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) IEEE80211_UNLOCK(ic); IWN_LOCK(sc); #if defined(__DragonFly__) - callout_stop_sync(&sc->calib_to); + callout_cancel(&sc->calib_to); #else callout_stop(&sc->calib_to); #endif @@ -8987,8 +8987,8 @@ iwn_stop_locked(struct iwn_softc *sc) sc->sc_is_scanning = 0; sc->sc_tx_timer = 0; #if defined(__DragonFly__) - callout_stop_sync(&sc->watchdog_to); - callout_stop_sync(&sc->calib_to); + callout_cancel(&sc->watchdog_to); + callout_cancel(&sc->calib_to); #else callout_stop(&sc->watchdog_to); callout_stop(&sc->calib_to); diff --git a/sys/dev/netif/oce/oce_if.c b/sys/dev/netif/oce/oce_if.c index fce30e54d7..d14fec6e26 100644 --- a/sys/dev/netif/oce/oce_if.c +++ b/sys/dev/netif/oce/oce_if.c @@ -328,7 +328,7 @@ oce_detach(device_t dev) oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); - callout_stop_sync(&sc->timer); + callout_terminate(&sc->timer); if (sc->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); diff --git a/sys/dev/raid/aac/aac.c b/sys/dev/raid/aac/aac.c index 80ae06675c..51aba99ec7 100644 --- a/sys/dev/raid/aac/aac.c +++ b/sys/dev/raid/aac/aac.c @@ -666,7 +666,7 @@ aac_detach(device_t dev) sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); - callout_stop_sync(&sc->aac_daemontime); + callout_terminate(&sc->aac_daemontime); lockmgr(&sc->aac_io_lock, LK_EXCLUSIVE); while (sc->aifflags & AAC_AIFFLAGS_RUNNING) { diff --git a/sys/dev/raid/ciss/ciss.c b/sys/dev/raid/ciss/ciss.c index 036ae185fa..018066898b 100644 --- a/sys/dev/raid/ciss/ciss.c +++ b/sys/dev/raid/ciss/ciss.c @@ -1908,7 +1908,7 @@ ciss_free(struct ciss_softc *sc) destroy_dev(sc->ciss_dev_t); /* Final cleanup of the callout. */ - callout_stop_sync(&sc->ciss_periodic); + callout_terminate(&sc->ciss_periodic); lockuninit(&sc->ciss_lock); /* free the controller data */ diff --git a/sys/dev/raid/hptrr/hptrr_osm_bsd.c b/sys/dev/raid/hptrr/hptrr_osm_bsd.c index d744817b31..3111259e93 100644 --- a/sys/dev/raid/hptrr/hptrr_osm_bsd.c +++ b/sys/dev/raid/hptrr/hptrr_osm_bsd.c @@ -374,7 +374,7 @@ static void hpt_shutdown_vbus(PVBUS_EXT vbus_ext, int howto) kfree(hba->ldm_adapter.him_handle, M_DEVBUF); } - callout_stop_sync(&vbus_ext->timer); + callout_terminate(&vbus_ext->timer); lockuninit(&vbus_ext->lock); kfree(vbus_ext, M_DEVBUF); KdPrint(("hpt_shutdown_vbus done")); diff --git a/sys/dev/raid/mfi/mfi.c b/sys/dev/raid/mfi/mfi.c index afcc2cf40b..3a586755dd 100644 --- a/sys/dev/raid/mfi/mfi.c +++ b/sys/dev/raid/mfi/mfi.c @@ -1079,7 +1079,7 @@ mfi_free(struct mfi_softc *sc) struct mfi_command *cm; int i; - callout_stop_sync(&sc->mfi_watchdog_callout); + callout_terminate(&sc->mfi_watchdog_callout); if (sc->mfi_cdev != NULL) destroy_dev(sc->mfi_cdev); diff --git a/sys/dev/raid/mps/mps.c b/sys/dev/raid/mps/mps.c index 1d65841b67..4c1a15c24d 100644 --- a/sys/dev/raid/mps/mps.c +++ b/sys/dev/raid/mps/mps.c @@ -1325,7 +1325,7 @@ mps_free(struct mps_softc *sc) mps_lock(sc); sc->mps_flags |= MPS_FLAGS_SHUTDOWN; mps_unlock(sc); - callout_stop_sync(&sc->periodic); + callout_terminate(&sc->periodic); if (((error = mps_detach_log(sc)) != 0) || ((error = mps_detach_sas(sc)) != 0)) diff --git a/sys/dev/raid/twa/tw_osl_freebsd.c b/sys/dev/raid/twa/tw_osl_freebsd.c index 50eacd0b49..723a6c2fc4 100644 --- a/sys/dev/raid/twa/tw_osl_freebsd.c +++ b/sys/dev/raid/twa/tw_osl_freebsd.c @@ -850,8 +850,8 @@ twa_shutdown(device_t dev) error = twa_teardown_intr(sc); /* Stop watchdog task. */ - callout_stop_sync(&(sc->watchdog_callout[0])); - callout_stop_sync(&(sc->watchdog_callout[1])); + callout_cancel(&(sc->watchdog_callout[0])); + callout_cancel(&(sc->watchdog_callout[1])); /* Disconnect from the controller. */ if ((error = tw_cl_shutdown_ctlr(&(sc->ctlr_handle), 0))) { diff --git a/sys/dev/virtual/virtio/random/virtio_random.c b/sys/dev/virtual/virtio/random/virtio_random.c index 32b2a665dc..f477bd884d 100644 --- a/sys/dev/virtual/virtio/random/virtio_random.c +++ b/sys/dev/virtual/virtio/random/virtio_random.c @@ -155,7 +155,7 @@ vtrnd_detach(device_t dev) sc = device_get_softc(dev); - callout_stop_sync(&sc->vtrnd_callout); + callout_terminate(&sc->vtrnd_callout); return (0); } diff --git a/sys/dev/virtual/vmware/vmxnet3/if_vmx.c b/sys/dev/virtual/vmware/vmxnet3/if_vmx.c index 649739808e..cefdb143cf 100644 --- a/sys/dev/virtual/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/virtual/vmware/vmxnet3/if_vmx.c @@ -378,7 +378,7 @@ vmxnet3_detach(device_t dev) vmxnet3_stop(sc); VMXNET3_CORE_UNLOCK(sc); - callout_stop_sync(&sc->vmx_tick); + callout_terminate(&sc->vmx_tick); #ifndef VMXNET3_LEGACY_TX vmxnet3_drain_taskqueue(sc); #endif diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 069eeda971..82f7f89a8c 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -498,12 +498,6 @@ filt_timerattach(struct knote *kn) /* * This function is called with the knote flagged locked but it is * still possible to race a callout event due to the callback blocking. - * - * NOTE: Even though the note is locked via KN_PROCSESING, filt_timerexpire() - * can still race us requeue the callout due to potential token cycling - * from various blocking conditions. If this situation arises, - * callout_stop_sync() will always return non-zero and we can simply - * retry the operation. */ static void filt_timerdetach(struct knote *kn) @@ -511,11 +505,7 @@ filt_timerdetach(struct knote *kn) struct callout *calloutp; calloutp = (struct callout *)kn->kn_hook; - while (callout_stop_sync(calloutp)) { - kprintf("debug: kqueue timer race fixed, pid %d %s\n", - (curthread->td_proc ? curthread->td_proc->p_pid : 0), - curthread->td_comm); - } + callout_terminate(calloutp); kn->kn_hook = NULL; kfree(calloutp, M_KQUEUE); atomic_subtract_int(&kq_ncallouts, 1); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 89b77fc546..a382fe5e6d 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -367,7 +367,7 @@ exit1(int rv) SIGEMPTYSET(p->p_siglist); SIGEMPTYSET(lp->lwp_siglist); if (timevalisset(&p->p_realtimer.it_value)) - callout_stop_sync(&p->p_ithandle); + callout_terminate(&p->p_ithandle); /* * Reset any sigio structures pointing to us as a result of diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 4d0ea1bbe4..f8c9be899a 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -746,7 +746,7 @@ tsleep(const volatile void *ident, int flags, const char *wmesg, int timo) error = EWOULDBLOCK; } else { /* does not block when on same cpu */ - callout_stop(&thandle); + callout_cancel(&thandle); } } td->td_flags &= ~TDF_TSLEEP_DESCHEDULED; diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index c2e9f77666..b49af4336b 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -902,7 +902,7 @@ sys_setitimer(struct setitimer_args *uap) lwkt_gettoken(&p->p_token); if (uap->which == ITIMER_REAL) { if (timevalisset(&p->p_realtimer.it_value)) - callout_stop_sync(&p->p_ithandle); + callout_cancel(&p->p_ithandle); if (timevalisset(&aitv.it_value)) callout_reset(&p->p_ithandle, tvtohz_high(&aitv.it_value), realitexpire, p); diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 299cea2090..34b5ef298d 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004,2014 The DragonFly Project. All rights reserved. + * Copyright (c) 2004,2014,2019 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -74,60 +74,464 @@ * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. - * - * The per-cpu augmentation was done by Matthew Dillon. This file has - * essentially been rewritten pretty much from scratch by Matt. */ #include #include +#include #include #include #include #include +#include +#ifdef CALLOUT_TYPESTABLE +#include +#endif +#include +#include +#include #include #include -#include +TAILQ_HEAD(colist, _callout); +struct softclock_pcpu; +struct _callout_mag; + +/* + * DID_INIT - Sanity check + * SYNC - Synchronous waiter, request SYNCDONE and wakeup() + * SYNCDONE - Synchronous waiter ackknowlegement + * CANCEL_RES - Flags that a cancel/stop prevented a callback + * STOP_RES + * RESET - Callout_reset request queued + * STOP - Callout_stop request queued + * INPROG - Softclock_handler thread processing in-progress on callout + * SET - Callout is linked to queue (if INPROG not set) + * AUTOLOCK - Lockmgr cancelable interlock + * MPSAFE - Callout is MPSAFE + * CANCEL - callout_cancel requested queued + * ACTIVE - active/inactive tracking (see documentation). This is + * *NOT* the same as whether a callout is queued or not. + */ +#define CALLOUT_DID_INIT 0x00000001 /* frontend */ +#define CALLOUT_SYNC 0x00000002 /* backend */ +#define CALLOUT_SYNCDONE 0x00000004 /* frontend */ +#define CALLOUT_CANCEL_RES 0x00000008 /* frontend */ +#define CALLOUT_STOP_RES 0x00000010 /* frontend */ +#define CALLOUT_RESET 0x00000020 /* backend */ +#define CALLOUT_STOP 0x00000040 /* backend */ +#define CALLOUT_INPROG 0x00000080 /* backend */ +#define CALLOUT_SET 0x00000100 /* backend */ +#define CALLOUT_AUTOLOCK 0x00000200 /* both */ +#define CALLOUT_MPSAFE 0x00000400 /* both */ +#define CALLOUT_CANCEL 0x00000800 /* backend */ +#define CALLOUT_ACTIVE 0x00001000 /* frontend */ + +struct wheel { + struct spinlock spin; + struct colist list; +}; struct softclock_pcpu { - struct callout_tailq *callwheel; - struct callout * volatile next; - intptr_t running; /* NOTE! Bit 0 used to flag wakeup */ - int softticks; /* softticks index */ - int curticks; /* per-cpu ticks counter */ - int isrunning; - struct thread thread; + struct wheel *callwheel; + struct _callout *running; + struct _callout * volatile next; +#ifdef CALLOUT_TYPESTABLE + struct _callout *quick_obj; +#endif + int softticks; /* softticks index */ + int curticks; /* per-cpu ticks counter */ + int isrunning; + struct thread thread; }; typedef struct softclock_pcpu *softclock_pcpu_t; +TAILQ_HEAD(maglist, _callout_mag); + +#if 0 +static int callout_debug = 0; +SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW, + &callout_debug, 0, ""); +#endif + +#ifdef CALLOUT_TYPESTABLE +static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts"); +#endif + static int cwheelsize; static int cwheelmask; static softclock_pcpu_t softclock_pcpu_ary[MAXCPU]; +#ifdef CALLOUT_TYPESTABLE +static struct typestable_glob callout_tsg; +#endif static void softclock_handler(void *arg); static void slotimer_callback(void *arg); -static void callout_reset_ipi(void *arg); -static void callout_stop_ipi(void *arg, int issync, struct intrframe *frame); -static __inline int -callout_setclear(struct callout *c, int sflags, int cflags) +#ifdef CALLOUT_TYPESTABLE +/* + * typestable callback functions. The init function pre-initializes + * the structure in order to allow for reuse without complete + * reinitialization (i.e. our spinlock). + * + * The test function allows us to reject an allocation attempt due + * to the object being reassociated out-of-band. + */ +static +void +_callout_typestable_init(void *obj) { - int flags; - int nflags; + struct _callout *c = obj; - for (;;) { - flags = c->c_flags; - cpu_ccfence(); - nflags = (flags | sflags) & ~cflags; - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) - break; + spin_init(&c->spin, "_callout"); +} + +/* + * Object may have been reassociated out-of-band. + * + * Return 1 on success with the spin-lock held, allowing reallocation. + * Return 0 on failure with no side effects, rejecting reallocation. + */ +static +int +_callout_typestable_test(void *obj) +{ + struct _callout *c = obj; + + if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) + return 0; + spin_lock(&c->spin); + if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) { + spin_unlock(&c->spin); + return 0; + } else { + return 1; } - return flags; } +/* + * NOTE: sc might refer to a different cpu. + */ +static __inline +void +_callout_typestable_free(softclock_pcpu_t sc, void *obj, int tentitive) +{ + if (tentitive == 0) { + obj = atomic_swap_ptr((void *)&sc->quick_obj, obj); + if (obj == NULL) + return; + } + typestable_free(&callout_tsg, obj, tentitive); +} +#endif + +/* + * Post-processing helper for a callout executes any pending request. + * This routine handles post-processing from the softclock thread and + * also handles request processing from the API. + * + * This routine does not block in any way. + * Caller must hold c->spin. + * + * INPROG - Callback is in-processing / in-progress. + * + * SET - Assigned to queue or is in-processing. If INPROG is set, + * however, the _callout is no longer in the queue. + * + * RESET - New timeout was installed. + * + * STOP - Stop requested. + * + * ACTIVE - Set on callout_reset(), cleared by callout_stop() + * or callout_cancel(). Starts out cleared. + * + * NOTE: Flags can be adjusted without holding c->spin, so atomic ops + * must be used at all times. + * + * NOTE: The passed-in (sc) might refer to another cpu. + */ +static __inline +int +_callout_process_spinlocked(struct _callout *c, int fromsoftclock) +{ + struct wheel *wheel; + int res = -1; + + /* + * If a callback manipulates the callout-in-progress we do + * a partial 'completion' of the operation so the operation + * can be processed synchronously and tell the softclock_handler + * to stop messing with it. + */ + if (fromsoftclock == 0 && curthread == &c->qsc->thread && + c->qsc->running == c) { + c->qsc->running = NULL; + atomic_clear_int(&c->flags, CALLOUT_SET | + CALLOUT_INPROG); + } + + /* + * Based on source and state + */ + if (fromsoftclock) { + /* + * From control thread, INPROG is set, handle pending + * request and normal termination. + */ +#ifdef CALLOUT_TYPESTABLE + KASSERT(c->verifier->toc == c, + ("callout corrupt: c=%p %s/%d\n", + c, c->ident, c->lineno)); +#else + KASSERT(&c->verifier->toc == c, + ("callout corrupt: c=%p %s/%d\n", + c, c->ident, c->lineno)); +#endif + if (c->flags & CALLOUT_CANCEL) { + /* + * CANCEL overrides everything. + * + * If a RESET is pending it counts as canceling a + * running timer. + */ + if (c->flags & CALLOUT_RESET) + atomic_set_int(&c->verifier->flags, + CALLOUT_CANCEL_RES | + CALLOUT_STOP_RES); + if (c->flags & CALLOUT_SYNC) { + atomic_set_int(&c->verifier->flags, + CALLOUT_SYNCDONE); + wakeup(c->verifier); + } + atomic_clear_int(&c->flags, CALLOUT_SET | + CALLOUT_INPROG | + CALLOUT_STOP | + CALLOUT_CANCEL | + CALLOUT_RESET | + CALLOUT_SYNC); + res = 0; + } else if (c->flags & CALLOUT_RESET) { + /* + * RESET request pending, requeue appropriately. + */ + atomic_clear_int(&c->flags, CALLOUT_RESET | + CALLOUT_INPROG); + atomic_set_int(&c->flags, CALLOUT_SET); + c->qsc = c->rsc; + c->qarg = c->rarg; + c->qfunc = c->rfunc; + c->qtick = c->rtick; + + /* + * Do not queue to current or past wheel or the + * callout will be lost for ages. + */ + wheel = &c->qsc->callwheel[c->qtick & cwheelmask]; + spin_lock(&wheel->spin); + while (c->qtick - c->qsc->softticks <= 0) { + c->qtick = c->qsc->softticks + 1; + spin_unlock(&wheel->spin); + wheel = &c->qsc->callwheel[c->qtick & + cwheelmask]; + spin_lock(&wheel->spin); + } + TAILQ_INSERT_TAIL(&wheel->list, c, entry); + spin_unlock(&wheel->spin); + } else { + /* + * STOP request pending or normal termination. Since + * this is from our control thread the callout has + * already been removed from the queue. + */ + if (c->flags & CALLOUT_SYNC) { + atomic_set_int(&c->verifier->flags, + CALLOUT_SYNCDONE); + wakeup(c->verifier); + } + atomic_clear_int(&c->flags, CALLOUT_SET | + CALLOUT_INPROG | + CALLOUT_STOP | + CALLOUT_SYNC); + res = 1; + } + } else if (c->flags & CALLOUT_SET) { + /* + * Process request from an API function. qtick and ACTIVE + * are stable while we hold c->spin. Checking INPROG requires + * holding wheel->spin. + * + * If INPROG is set the control thread must handle the request + * for us. + */ + softclock_pcpu_t sc; + + sc = c->qsc; + + wheel = &sc->callwheel[c->qtick & cwheelmask]; + spin_lock(&wheel->spin); + if (c->flags & CALLOUT_INPROG) { + /* + * API requests are deferred if a callback is in + * progress and will be handled after the callback + * returns. + */ + } else if (c->flags & CALLOUT_CANCEL) { + /* + * CANCEL request overrides everything except INPROG + * (for INPROG the CANCEL is handled upon completion). + */ + if (sc->next == c) + sc->next = TAILQ_NEXT(c, entry); + TAILQ_REMOVE(&wheel->list, c, entry); + atomic_set_int(&c->verifier->flags, CALLOUT_CANCEL_RES | + CALLOUT_STOP_RES); + if (c->flags & CALLOUT_SYNC) { + atomic_set_int(&c->verifier->flags, + CALLOUT_SYNCDONE); + /* direct from API no need to wakeup() */ + /* wakeup(c->verifier); */ + } + atomic_clear_int(&c->flags, CALLOUT_STOP | + CALLOUT_SYNC | + CALLOUT_SET); + res = 0; + } else if (c->flags & CALLOUT_RESET) { + /* + * RESET request pending, requeue appropriately. + * + * (ACTIVE is governed by c->spin so we do not have + * to clear it prior to releasing wheel->spin). + */ + if (sc->next == c) + sc->next = TAILQ_NEXT(c, entry); + TAILQ_REMOVE(&wheel->list, c, entry); + spin_unlock(&wheel->spin); + + atomic_clear_int(&c->flags, CALLOUT_RESET); + /* remain ACTIVE */ + sc = c->rsc; + c->qsc = sc; + c->qarg = c->rarg; + c->qfunc = c->rfunc; + c->qtick = c->rtick; + + /* + * Do not queue to current or past wheel or the + * callout will be lost for ages. + */ + wheel = &sc->callwheel[c->qtick & cwheelmask]; + spin_lock(&wheel->spin); + while (c->qtick - sc->softticks <= 0) { + c->qtick = sc->softticks + 1; + spin_unlock(&wheel->spin); + wheel = &sc->callwheel[c->qtick & cwheelmask]; + spin_lock(&wheel->spin); + } + TAILQ_INSERT_TAIL(&wheel->list, c, entry); + } else if (c->flags & CALLOUT_STOP) { + /* + * STOP request + */ + if (sc->next == c) + sc->next = TAILQ_NEXT(c, entry); + TAILQ_REMOVE(&wheel->list, c, entry); + atomic_set_int(&c->verifier->flags, CALLOUT_STOP_RES); + if (c->flags & CALLOUT_SYNC) { + atomic_set_int(&c->verifier->flags, + CALLOUT_SYNCDONE); + /* direct from API no need to wakeup() */ + /* wakeup(c->verifier); */ + } + atomic_clear_int(&c->flags, CALLOUT_STOP | + CALLOUT_SYNC | + CALLOUT_SET); + res = 1; + } else { + /* + * No request pending (someone else processed the + * request before we could) + */ + /* nop */ + } + spin_unlock(&wheel->spin); + } else { + /* + * Process request from API function. callout is not + * active so there's nothing for us to remove. + */ + KKASSERT((c->flags & CALLOUT_INPROG) == 0); + if (c->flags & CALLOUT_CANCEL) { + /* + * CANCEL request (nothing to cancel) + */ + if (c->flags & CALLOUT_SYNC) { + atomic_set_int(&c->verifier->flags, + CALLOUT_SYNCDONE); + /* direct from API no need to wakeup() */ + /* wakeup(c->verifier); */ + } + atomic_clear_int(&c->flags, CALLOUT_STOP | + CALLOUT_CANCEL | + CALLOUT_SYNC); + res = 0; + } else if (c->flags & CALLOUT_RESET) { + /* + * RESET request pending, queue appropriately. + * Do not queue to currently-processing tick. + */ + softclock_pcpu_t sc; + + sc = c->rsc; + atomic_clear_int(&c->flags, CALLOUT_RESET); + atomic_set_int(&c->flags, CALLOUT_SET); + c->qsc = sc; + c->qarg = c->rarg; + c->qfunc = c->rfunc; + c->qtick = c->rtick; + + /* + * Do not queue to current or past wheel or the + * callout will be lost for ages. + */ + wheel = &sc->callwheel[c->qtick & cwheelmask]; + spin_lock(&wheel->spin); + while (c->qtick - sc->softticks <= 0) { + c->qtick = sc->softticks + 1; + spin_unlock(&wheel->spin); + wheel = &sc->callwheel[c->qtick & cwheelmask]; + spin_lock(&wheel->spin); + } + TAILQ_INSERT_TAIL(&wheel->list, c, entry); + spin_unlock(&wheel->spin); + } else if (c->flags & CALLOUT_STOP) { + /* + * STOP request (nothing to stop) + */ + if (c->flags & CALLOUT_SYNC) { + atomic_set_int(&c->verifier->flags, + CALLOUT_SYNCDONE); + /* direct from API no need to wakeup() */ + /* wakeup(c->verifier); */ + } + atomic_clear_int(&c->flags, CALLOUT_STOP | + CALLOUT_SYNC); + res = 1; + } else { + /* + * No request pending (someone else processed the + * request before we could) + */ + /* nop */ + } + } + return res; +} + +/* + * System init + */ static void swi_softclock_setup(void *arg) { @@ -148,6 +552,13 @@ swi_softclock_setup(void *arg) cwheelsize <<= 1; cwheelmask = cwheelsize - 1; +#ifdef CALLOUT_TYPESTABLE + typestable_init_glob(&callout_tsg, M_CALLOUT, + sizeof(struct _callout), + _callout_typestable_test, + _callout_typestable_init); +#endif + /* * Initialize per-cpu data structures. */ @@ -164,8 +575,10 @@ swi_softclock_setup(void *arg) sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz, VM_SUBSYS_GD, KM_CPU(cpu)); memset(sc->callwheel, 0, wheel_sz); - for (i = 0; i < cwheelsize; ++i) - TAILQ_INIT(&sc->callwheel[i]); + for (i = 0; i < cwheelsize; ++i) { + spin_init(&sc->callwheel[i].spin, "wheel"); + TAILQ_INIT(&sc->callwheel[i].list); + } /* * Mark the softclock handler as being an interrupt thread @@ -193,19 +606,20 @@ SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND, * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where * the callwheel is currently indexed. * - * WARNING! The MP lock is not necessarily held on call, nor can it be - * safely obtained. - * * sc->softticks is adjusted by either this routine or our helper thread * depending on whether the helper thread is running or not. + * + * sc->curticks and sc->softticks are adjusted using atomic ops in order + * to ensure that remote cpu callout installation does not race the thread. */ void hardclock_softtick(globaldata_t gd) { softclock_pcpu_t sc; + struct wheel *wheel; sc = softclock_pcpu_ary[gd->gd_cpuid]; - ++sc->curticks; + atomic_add_int(&sc->curticks, 1); if (sc->isrunning) return; if (sc->softticks == sc->curticks) { @@ -213,11 +627,15 @@ hardclock_softtick(globaldata_t gd) * In sync, only wakeup the thread if there is something to * do. */ - if (TAILQ_FIRST(&sc->callwheel[sc->softticks & cwheelmask])) { + wheel = &sc->callwheel[sc->softticks & cwheelmask]; + spin_lock(&wheel->spin); + if (TAILQ_FIRST(&wheel->list)) { sc->isrunning = 1; + spin_unlock(&wheel->spin); lwkt_schedule(&sc->thread); } else { - ++sc->softticks; + atomic_add_int(&sc->softticks, 1); + spin_unlock(&wheel->spin); } } else { /* @@ -231,9 +649,7 @@ hardclock_softtick(globaldata_t gd) /* * This procedure is the main loop of our per-cpu helper thread. The - * sc->isrunning flag prevents us from racing hardclock_softtick() and - * a critical section is sufficient to interlock sc->curticks and protect - * us from remote IPI's / list removal. + * sc->isrunning flag prevents us from racing hardclock_softtick(). * * The thread starts with the MP lock released and not in a critical * section. The loop itself is MP safe while individual callbacks @@ -243,11 +659,10 @@ static void softclock_handler(void *arg) { softclock_pcpu_t sc; - struct callout *c; - struct callout_tailq *bucket; + struct _callout *c; + struct wheel *wheel; struct callout slotimer; int mpsafe = 1; - int flags; /* * Setup pcpu slow clocks which we want to run from the callout @@ -262,136 +677,110 @@ softclock_handler(void *arg) */ /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ - /* - * Loop critical section against ipi operations to this cpu. - */ sc = arg; - crit_enter(); loop: while (sc->softticks != (int)(sc->curticks + 1)) { - bucket = &sc->callwheel[sc->softticks & cwheelmask]; + wheel = &sc->callwheel[sc->softticks & cwheelmask]; - for (c = TAILQ_FIRST(bucket); c; c = sc->next) { - void (*c_func)(void *); - void *c_arg; - struct lock *c_lk; + spin_lock(&wheel->spin); + sc->next = TAILQ_FIRST(&wheel->list); + while ((c = sc->next) != NULL) { int error; + int res; - if (c->c_time != sc->softticks) { - sc->next = TAILQ_NEXT(c, c_links.tqe); + /* + * Match callouts for this tick. The wheel spinlock + * is sufficient to set INPROG. Once set, other + * threads can make only limited changes to (c) + */ + sc->next = TAILQ_NEXT(c, entry); + if (c->qtick != sc->softticks) continue; - } + TAILQ_REMOVE(&wheel->list, c, entry); + atomic_set_int(&c->flags, CALLOUT_INPROG); + sc->running = c; + spin_unlock(&wheel->spin); /* - * Synchronize with mpsafe requirements + * legacy mplock support */ - flags = c->c_flags; - if (flags & CALLOUT_MPSAFE) { + if (c->flags & CALLOUT_MPSAFE) { if (mpsafe == 0) { mpsafe = 1; rel_mplock(); } } else { - /* - * The request might be removed while we - * are waiting to get the MP lock. If it - * was removed sc->next will point to the - * next valid request or NULL, loop up. - */ if (mpsafe) { mpsafe = 0; - sc->next = c; get_mplock(); - if (c != sc->next) - continue; } } /* - * Queue protection only exists while we hold the - * critical section uninterrupted. - * - * Adjust sc->next when removing (c) from the queue, - * note that an IPI on this cpu may make further - * adjustments to sc->next. - */ - sc->next = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE(bucket, c, c_links.tqe); - - KASSERT((c->c_flags & CALLOUT_DID_INIT) && - (c->c_flags & CALLOUT_PENDING) && - CALLOUT_FLAGS_TO_CPU(c->c_flags) == - mycpu->gd_cpuid, - ("callout %p: bad flags %08x", c, c->c_flags)); - - /* - * Once CALLOUT_PENDING is cleared only the IPI_MASK - * prevents the callout from being moved to another - * cpu. However, callout_stop() will also check - * sc->running on the assigned cpu if CALLOUT_EXECUTED - * is set. CALLOUT_EXECUTE implies a callback - * interlock is needed when cross-cpu. + * Execute function (protected by INPROG) */ - sc->running = (intptr_t)c; - c_func = c->c_func; - c_arg = c->c_arg; - c_lk = c->c_lk; - c->c_func = NULL; - - if ((flags & (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) == - (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) { - error = lockmgr(c_lk, LK_EXCLUSIVE | - LK_CANCELABLE); + if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) { + /* + * Raced a stop or cancel request, do + * not execute. The processing code + * thinks its a normal completion so + * flag the fact that cancel/stop actually + * prevented a callout here. + */ + if (c->flags & CALLOUT_CANCEL) { + atomic_set_int(&c->verifier->flags, + CALLOUT_CANCEL_RES | + CALLOUT_STOP_RES); + } else if (c->flags & CALLOUT_STOP) { + atomic_set_int(&c->verifier->flags, + CALLOUT_STOP_RES); + } + } else if (c->flags & CALLOUT_AUTOLOCK) { + /* + * Interlocked cancelable call. If the + * lock gets canceled we have to flag the + * fact that the cancel/stop actually + * prevented the callout here. + */ + error = lockmgr(c->lk, LK_EXCLUSIVE | + LK_CANCELABLE); if (error == 0) { - flags = callout_setclear(c, - CALLOUT_EXECUTED, - CALLOUT_PENDING | - CALLOUT_WAITING); - crit_exit(); - c_func(c_arg); - crit_enter(); - lockmgr(c_lk, LK_RELEASE); - } else { - flags = callout_setclear(c, - 0, - CALLOUT_PENDING); + c->qfunc(c->qarg); + lockmgr(c->lk, LK_RELEASE); + } else if (c->flags & CALLOUT_CANCEL) { + atomic_set_int(&c->verifier->flags, + CALLOUT_CANCEL_RES | + CALLOUT_STOP_RES); + } else if (c->flags & CALLOUT_STOP) { + atomic_set_int(&c->verifier->flags, + CALLOUT_STOP_RES); } - } else if (flags & CALLOUT_ACTIVE) { - flags = callout_setclear(c, - CALLOUT_EXECUTED, - CALLOUT_PENDING | - CALLOUT_WAITING); - crit_exit(); - c_func(c_arg); - crit_enter(); } else { - flags = callout_setclear(c, - 0, - CALLOUT_PENDING | - CALLOUT_WAITING); + /* + * Normal call + */ + c->qfunc(c->qarg); } - /* - * Read and clear sc->running. If bit 0 was set, - * a callout_stop() is likely blocked waiting for - * the callback to complete. - * - * The sigclear above also cleared CALLOUT_WAITING - * and returns the contents of flags prior to clearing - * any bits. - * - * Interlock wakeup any _stop's waiting on us. Note - * that once c_func() was called, the callout - * structure (c) pointer may no longer be valid. It - * can only be used for the wakeup. - */ - if ((atomic_readandclear_ptr(&sc->running) & 1) || - (flags & CALLOUT_WAITING)) { - wakeup(c); + if (sc->running == c) { + /* + * We are still INPROG so (c) remains valid, but + * the callout is now governed by its internal + * spin-lock. + */ + spin_lock(&c->spin); + res = _callout_process_spinlocked(c, 1); + spin_unlock(&c->spin); +#ifdef CALLOUT_TYPESTABLE + if (res >= 0) + _callout_typestable_free(sc, c, res); +#endif } - /* NOTE: list may have changed */ + spin_lock(&wheel->spin); } - ++sc->softticks; + sc->running = NULL; + spin_unlock(&wheel->spin); + atomic_add_int(&sc->softticks, 1); } /* @@ -401,9 +790,17 @@ loop: mpsafe = 1; rel_mplock(); } - sc->isrunning = 0; - lwkt_deschedule_self(&sc->thread); /* == curthread */ - lwkt_switch(); + + /* + * Recheck in critical section to interlock against hardlock + */ + crit_enter(); + if (sc->softticks == (int)(sc->curticks + 1)) { + sc->isrunning = 0; + lwkt_deschedule_self(&sc->thread); /* == curthread */ + lwkt_switch(); + } + crit_exit(); goto loop; /* NOT REACHED */ } @@ -422,547 +819,444 @@ slotimer_callback(void *arg) } /* - * Start or restart a timeout. Installs the callout structure on the - * callwheel of the current cpu. Callers may legally pass any value, even - * if 0 or negative, but since the sc->curticks index may have already - * been processed a minimum timeout of 1 tick will be enforced. - * - * This function will block if the callout is currently queued to a different - * cpu or the callback is currently running in another thread. + * API FUNCTIONS */ -void -callout_reset(struct callout *c, int to_ticks, void (*ftn)(void *), void *arg) -{ - softclock_pcpu_t sc; - globaldata_t gd; - -#ifdef INVARIANTS - if ((c->c_flags & CALLOUT_DID_INIT) == 0) { - callout_init(c); - kprintf( - "callout_reset(%p) from %p: callout was not initialized\n", - c, ((int **)&c)[-1]); - print_backtrace(-1); - } -#endif - gd = mycpu; - sc = softclock_pcpu_ary[gd->gd_cpuid]; - crit_enter_gd(gd); - - /* - * Our cpu must gain ownership of the callout and cancel anything - * still running, which is complex. The easiest way to do it is to - * issue a callout_stop_sync(). callout_stop_sync() will also - * handle CALLOUT_EXECUTED (dispatch waiting), and clear it. - * - * WARNING: callout_stop_sync()'s return state can race other - * callout_*() calls due to blocking, so we must re-check. - */ - for (;;) { - int flags; - int nflags; - - if (c->c_flags & (CALLOUT_ARMED_MASK | CALLOUT_EXECUTED)) - callout_stop_sync(c); - flags = c->c_flags & ~(CALLOUT_ARMED_MASK | CALLOUT_EXECUTED); - nflags = (flags & ~CALLOUT_CPU_MASK) | - CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid) | - CALLOUT_PENDING | - CALLOUT_ACTIVE; - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) - break; - cpu_pause(); - } - - /* - * With the critical section held and PENDING set we now 'own' the - * callout. - */ - if (to_ticks <= 0) - to_ticks = 1; - - c->c_arg = arg; - c->c_func = ftn; - c->c_time = sc->curticks + to_ticks; - - TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & cwheelmask], - c, c_links.tqe); - crit_exit_gd(gd); -} /* - * Setup a callout to run on the specified cpu. Should generally be used - * to run a callout on a specific cpu which does not nominally change. This - * callout_reset() will be issued asynchronously via an IPI. + * Prepare a callout structure for use by callout_reset() and/or + * callout_stop(). + * + * The MP version of this routine requires that the callback + * function installed by callout_reset() be MP safe. + * + * The LK version of this routine is also MPsafe and will automatically + * acquire the specified lock for the duration of the function call, + * and release it after the function returns. In addition, when autolocking + * is used, callout_stop() becomes synchronous if the caller owns the lock. + * callout_reset(), callout_stop(), and callout_cancel() will block + * normally instead of spinning when a cpu race occurs. Lock cancelation + * is used to avoid deadlocks against the callout ring dispatch. + * + * The init functions can be called from any cpu and do not have to be + * called from the cpu that the timer will eventually run on. */ -void -callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *), - void *arg, int cpuid) +static __inline void +_callout_setup(struct callout *cc, int flags CALLOUT_DEBUG_ARGS) { - globaldata_t gd; - globaldata_t tgd; - -#ifdef INVARIANTS - if ((c->c_flags & CALLOUT_DID_INIT) == 0) { - callout_init(c); - kprintf( - "callout_reset(%p) from %p: callout was not initialized\n", - c, ((int **)&c)[-1]); - print_backtrace(-1); - } + bzero(cc, sizeof(*cc)); + cc->flags = flags; /* frontend flags */ +#ifdef CALLOUT_DEBUG +#ifdef CALLOUT_TYPESTABLE + cc->ident = ident; + cc->lineno = lineno; +#else + cc->toc.verifier = cc; /* corruption detector */ + cc->toc.ident = ident; + cc->toc.lineno = lineno; + cc->toc.flags = flags; /* backend flags */ +#endif #endif - gd = mycpu; - crit_enter_gd(gd); - - tgd = globaldata_find(cpuid); - - /* - * This code is similar to the code in callout_reset() but we assign - * the callout to the target cpu. We cannot set PENDING here since - * we cannot atomically add the callout to the target cpu's queue. - * However, incrementing the IPI count has the effect of locking - * the cpu assignment. - * - * WARNING: callout_stop_sync()'s return state can race other - * callout_*() calls due to blocking, so we must re-check. - */ - for (;;) { - int flags; - int nflags; - - if (c->c_flags & (CALLOUT_ARMED_MASK | CALLOUT_EXECUTED)) - callout_stop_sync(c); - flags = c->c_flags & ~(CALLOUT_ARMED_MASK | CALLOUT_EXECUTED); - nflags = (flags & ~(CALLOUT_CPU_MASK | - CALLOUT_EXECUTED)) | - CALLOUT_CPU_TO_FLAGS(tgd->gd_cpuid) | - CALLOUT_ACTIVE; - nflags = nflags + 1; /* bump IPI count */ - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) - break; - cpu_pause(); - } - - /* - * Since we control our +1 in the IPI count, the target cpu cannot - * now change until our IPI is processed. - */ - if (to_ticks <= 0) - to_ticks = 1; - - c->c_arg = arg; - c->c_func = ftn; - c->c_load = to_ticks; /* IPI will add curticks */ - - lwkt_send_ipiq(tgd, callout_reset_ipi, c); - crit_exit_gd(gd); } /* - * Remote IPI for callout_reset_bycpu(). The cpu assignment cannot be - * ripped out from under us due to the count in IPI_MASK, but it is possible - * that other IPIs executed so we must deal with other flags that might - * have been set or cleared. + * Associate an internal _callout with the external callout and + * verify that the type-stable structure is still applicable (inactive + * type-stable _callouts might have been reused for a different callout). + * If not, a new internal structure will be allocated. + * + * Returns the _callout already spin-locked. */ -static void -callout_reset_ipi(void *arg) +static __inline +struct _callout * +_callout_gettoc(struct callout *cc) { - struct callout *c = arg; - globaldata_t gd = mycpu; + struct _callout *c; +#ifdef CALLOUT_TYPESTABLE softclock_pcpu_t sc; - int flags; - int nflags; - - sc = softclock_pcpu_ary[gd->gd_cpuid]; + KKASSERT(cc->flags & CALLOUT_DID_INIT); for (;;) { - flags = c->c_flags; + c = cc->toc; cpu_ccfence(); - KKASSERT((flags & CALLOUT_IPI_MASK) > 0 && - CALLOUT_FLAGS_TO_CPU(flags) == gd->gd_cpuid); - - nflags = (flags - 1) & ~(CALLOUT_EXECUTED | CALLOUT_WAITING); - nflags |= CALLOUT_PENDING; - - /* - * Put us on the queue - */ - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { - if (flags & CALLOUT_PENDING) { - if (sc->next == c) - sc->next = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE( - &sc->callwheel[c->c_time & cwheelmask], - c, - c_links.tqe); + if (c == NULL) { + sc = softclock_pcpu_ary[mycpu->gd_cpuid]; + c = atomic_swap_ptr((void *)&sc->quick_obj, NULL); + if (c == NULL || _callout_typestable_test(c) == 0) + c = typestable_alloc(&callout_tsg); + /* returns spin-locked */ + c->verifier = cc; + c->flags = cc->flags; + c->lk = cc->lk; + c->ident = cc->ident; + c->lineno = cc->lineno; + if (atomic_cmpset_ptr(&cc->toc, NULL, c)) { + break; } - c->c_time = sc->curticks + c->c_load; - TAILQ_INSERT_TAIL( - &sc->callwheel[c->c_time & cwheelmask], - c, c_links.tqe); - break; + c->verifier = NULL; + spin_unlock(&c->spin); + _callout_typestable_free(sc, c, 0); + } else { + spin_lock(&c->spin); + if (c->verifier == cc) + break; + spin_unlock(&c->spin); + /* ok if atomic op fails */ + (void)atomic_cmpset_ptr(&cc->toc, c, NULL); } - /* retry */ - cpu_pause(); } +#else + c = &cc->toc; + spin_lock(&c->spin); +#endif + /* returns with spin-lock held */ + return c; +} - /* - * Issue wakeup if requested. - */ - if (flags & CALLOUT_WAITING) - wakeup(c); +/* + * Macrod in sys/callout.h for debugging + * + * WARNING! tsleep() assumes this will not block + */ +void +_callout_init(struct callout *cc CALLOUT_DEBUG_ARGS) +{ + _callout_setup(cc, CALLOUT_DID_INIT + CALLOUT_DEBUG_PASSTHRU); +} + +void +_callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS) +{ + _callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE + CALLOUT_DEBUG_PASSTHRU); +} + +void +_callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS) +{ + _callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE | + CALLOUT_AUTOLOCK + CALLOUT_DEBUG_PASSTHRU); +#ifdef CALLOUT_TYPESTABLE + cc->lk = lk; +#else + cc->toc.lk = lk; +#endif } /* - * Stop a running timer and ensure that any running callout completes before - * returning. If the timer is running on another cpu this function may block - * to interlock against the callout. If the callout is currently executing - * or blocked in another thread this function may also block to interlock - * against the callout. + * Start or restart a timeout. New timeouts can be installed while the + * current one is running. + * + * Start or restart a timeout. Installs the callout structure on the + * callwheel of the current cpu. Callers may legally pass any value, even + * if 0 or negative, but since the sc->curticks index may have already + * been processed a minimum timeout of 1 tick will be enforced. * - * The caller must be careful to avoid deadlocks, either by using - * callout_init_lk() (which uses the lockmgr lock cancelation feature), - * by using tokens and dealing with breaks in the serialization, or using - * the lockmgr lock cancelation feature yourself in the callout callback - * function. + * This function will not deadlock against a running call. * - * callout_stop() returns non-zero if the callout was pending. + * WARNING! tsleep() assumes this will not block */ -static int -_callout_stop(struct callout *c, int issync) +void +callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg) { - globaldata_t gd = mycpu; - globaldata_t tgd; softclock_pcpu_t sc; - int flags; - int nflags; - int rc; - int cpuid; - -#ifdef INVARIANTS - if ((c->c_flags & CALLOUT_DID_INIT) == 0) { - callout_init(c); - kprintf( - "callout_stop(%p) from %p: callout was not initialized\n", - c, ((int **)&c)[-1]); - print_backtrace(-1); - } + struct _callout *c; + int res; + + atomic_set_int(&cc->flags, CALLOUT_ACTIVE); + c = _callout_gettoc(cc); + atomic_set_int(&c->flags, CALLOUT_RESET); + + sc = softclock_pcpu_ary[mycpu->gd_cpuid]; + c->rsc = sc; + c->rtick = sc->curticks + to_ticks; + c->rfunc = ftn; + c->rarg = arg; +#ifdef CALLOUT_TYPESTABLE + cc->arg = arg; /* only used by callout_arg() */ +#endif + res = _callout_process_spinlocked(c, 0); + spin_unlock(&c->spin); +#ifdef CALLOUT_TYPESTABLE + if (res >= 0) + _callout_typestable_free(sc, c, res); #endif - crit_enter_gd(gd); +} -retry: - /* - * Adjust flags for the required operation. If the callout is - * armed on another cpu we break out into the remote-cpu code which - * will issue an IPI. If it is not armed we are trivially done, - * but may still need to test EXECUTED. - */ - for (;;) { - flags = c->c_flags; - cpu_ccfence(); +/* + * Same as callout_reset() but the timeout will run on a particular cpu. + */ +void +callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *), + void *arg, int cpuid) +{ + softclock_pcpu_t sc; + struct _callout *c; + globaldata_t gd; + int res; - cpuid = CALLOUT_FLAGS_TO_CPU(flags); + gd = globaldata_find(cpuid); + atomic_set_int(&cc->flags, CALLOUT_ACTIVE); + c = _callout_gettoc(cc); + atomic_set_int(&c->flags, CALLOUT_RESET); + atomic_clear_int(&c->flags, CALLOUT_STOP); - /* - * Armed on remote cpu (break to remote-cpu code) - */ - if ((flags & CALLOUT_ARMED_MASK) && gd->gd_cpuid != cpuid) { - nflags = flags + 1; - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { - /* - * BREAK TO REMOTE-CPU CODE HERE - */ - break; - } - cpu_pause(); - continue; - } - - /* - * Armed or armable on current cpu - */ - if (flags & CALLOUT_IPI_MASK) { - lwkt_process_ipiq(); - cpu_pause(); - continue; /* retry */ - } + sc = softclock_pcpu_ary[gd->gd_cpuid]; + c->rsc = sc; + c->rtick = sc->curticks + to_ticks; + c->rfunc = ftn; + c->rarg = arg; +#ifdef CALLOUT_TYPESTABLE + cc->arg = arg; /* only used by callout_arg() */ +#endif + res = _callout_process_spinlocked(c, 0); + spin_unlock(&c->spin); +#ifdef CALLOUT_TYPESTABLE + if (res >= 0) + _callout_typestable_free(sc, c, res); +#endif +} - /* - * If PENDING is set we can remove the callout from our - * queue and also use the side effect that the bit causes - * the callout to be locked to our cpu. - */ - if (flags & CALLOUT_PENDING) { - sc = softclock_pcpu_ary[gd->gd_cpuid]; - if (sc->next == c) - sc->next = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE( - &sc->callwheel[c->c_time & cwheelmask], - c, - c_links.tqe); - c->c_func = NULL; - - for (;;) { - flags = c->c_flags; - cpu_ccfence(); - nflags = flags & ~(CALLOUT_ACTIVE | - CALLOUT_EXECUTED | - CALLOUT_WAITING | - CALLOUT_PENDING); - if (atomic_cmpset_int(&c->c_flags, - flags, nflags)) { - goto skip_slow; - } - cpu_pause(); - } - /* NOT REACHED */ - } +static __inline +void +_callout_cancel_or_stop(struct callout *cc, uint32_t flags) +{ + struct _callout *c; + softclock_pcpu_t sc; + uint32_t oflags; + int res; - /* - * If PENDING was not set the callout might not be locked - * to this cpu. - */ - nflags = flags & ~(CALLOUT_ACTIVE | - CALLOUT_EXECUTED | - CALLOUT_WAITING | - CALLOUT_PENDING); - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { - goto skip_slow; - } - cpu_pause(); - /* retry */ - } +#ifdef CALLOUT_TYPESTABLE + if (cc->toc == NULL || cc->toc->verifier != cc) + return; +#else + KKASSERT(cc->toc.verifier == cc); +#endif + /* + * Setup for synchronous + */ + atomic_clear_int(&cc->flags, CALLOUT_SYNCDONE | CALLOUT_ACTIVE); + c = _callout_gettoc(cc); + oflags = c->flags; + atomic_set_int(&c->flags, flags | CALLOUT_SYNC); + sc = softclock_pcpu_ary[mycpu->gd_cpuid]; + res = _callout_process_spinlocked(c, 0); + spin_unlock(&c->spin); +#ifdef CALLOUT_TYPESTABLE + if (res >= 0) + _callout_typestable_free(sc, c, res); +#endif /* - * Remote cpu path. We incremented the IPI_MASK count so the callout - * is now locked to the remote cpu and we can safely send an IPI - * to it. + * Wait for stop or completion. NOTE: The backend only + * runs atomic ops on the frontend cc->flags for the sync + * operation. * - * Once sent, wait for all IPIs to be processed. If PENDING remains - * set after all IPIs have processed we raced a callout or - * callout_reset and must retry. Callers expect the callout to - * be completely stopped upon return, so make sure it is. + * WARNING! (c) can go stale now, so do not use (c) after this + * point. */ - tgd = globaldata_find(cpuid); - lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); - - for (;;) { - flags = c->c_flags; - cpu_ccfence(); - - if ((flags & CALLOUT_IPI_MASK) == 0) - break; - - nflags = flags | CALLOUT_WAITING; - tsleep_interlock(c, 0); - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { - tsleep(c, PINTERLOCKED, "cstp1", 0); + flags = cc->flags; + if ((flags & CALLOUT_SYNCDONE) == 0) { +#ifdef CALLOUT_TYPESTABLE + if (cc->flags & CALLOUT_AUTOLOCK) + lockmgr(cc->lk, LK_CANCEL_BEG); +#else + if (cc->flags & CALLOUT_AUTOLOCK) + lockmgr(c->lk, LK_CANCEL_BEG); +#endif + while ((flags & CALLOUT_SYNCDONE) == 0) { + tsleep_interlock(cc, 0); + if (atomic_cmpset_int(&cc->flags, + flags | CALLOUT_SYNCDONE, + flags | CALLOUT_SYNCDONE)) { + break; + } + tsleep(cc, PINTERLOCKED, "costp", 0); + flags = cc->flags; /* recheck after sleep */ + cpu_ccfence(); } +#ifdef CALLOUT_TYPESTABLE + if (cc->flags & CALLOUT_AUTOLOCK) + lockmgr(cc->lk, LK_CANCEL_END); +#else + if (cc->flags & CALLOUT_AUTOLOCK) + lockmgr(c->lk, LK_CANCEL_END); +#endif } - if (flags & CALLOUT_PENDING) - goto retry; - - /* - * Caller expects callout_stop_sync() to clear EXECUTED and return - * its previous status. - */ - atomic_clear_int(&c->c_flags, CALLOUT_EXECUTED); - -skip_slow: - if (flags & CALLOUT_WAITING) - wakeup(c); /* - * If (issync) we must also wait for any in-progress callbacks to - * complete, unless the stop is being executed from the callback - * itself. The EXECUTED flag is set prior to the callback - * being made so our existing flags status already has it. - * - * If auto-lock mode is being used, this is where we cancel any - * blocked lock that is potentially preventing the target cpu - * from completing the callback. + * If CALLOUT_SYNC was already set before we began, multiple + * threads may have been doing a synchronous wait. This can + * cause the processing code to optimize-out the wakeup(). + * Make sure the wakeup() is issued. */ - while (issync) { - intptr_t *runp; - intptr_t runco; - - sc = softclock_pcpu_ary[cpuid]; - if (gd->gd_curthread == &sc->thread) /* stop from cb */ - break; - runp = &sc->running; - runco = *runp; - cpu_ccfence(); - if ((runco & ~(intptr_t)1) != (intptr_t)c) - break; - if (c->c_flags & CALLOUT_AUTOLOCK) - lockmgr(c->c_lk, LK_CANCEL_BEG); - tsleep_interlock(c, 0); - if (atomic_cmpset_long(runp, runco, runco | 1)) - tsleep(c, PINTERLOCKED, "cstp3", 0); - if (c->c_flags & CALLOUT_AUTOLOCK) - lockmgr(c->c_lk, LK_CANCEL_END); - } - - crit_exit_gd(gd); - rc = (flags & CALLOUT_EXECUTED) != 0; - - return rc; + if (oflags & CALLOUT_SYNC) + wakeup(c->verifier); } /* - * IPI for stop function. The callout is locked to the receiving cpu - * by the IPI_MASK count. + * This is a synchronous STOP which cancels the callout. If AUTOLOCK + * then a CANCEL will be issued to the lock holder. Unlike STOP, the + * cancel function prevents any new callout_reset()s from being issued + * in addition to canceling the lock. The lock will also be deactivated. + * + * Returns 0 if the callout was not active (or was active and completed, + * but didn't try to start a new timeout). + * Returns 1 if the cancel is responsible for stopping the callout. */ -static void -callout_stop_ipi(void *arg, int issync, struct intrframe *frame) +int +callout_cancel(struct callout *cc) { - globaldata_t gd = mycpu; - struct callout *c = arg; - softclock_pcpu_t sc; - int flags; - int nflags; + atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES); + _callout_cancel_or_stop(cc, CALLOUT_CANCEL); - flags = c->c_flags; - cpu_ccfence(); - - KKASSERT(CALLOUT_FLAGS_TO_CPU(flags) == gd->gd_cpuid); - - /* - * We can handle the PENDING flag immediately. - */ - if (flags & CALLOUT_PENDING) { - sc = softclock_pcpu_ary[gd->gd_cpuid]; - if (sc->next == c) - sc->next = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE( - &sc->callwheel[c->c_time & cwheelmask], - c, - c_links.tqe); - c->c_func = NULL; - } - - /* - * Transition to the stopped state and decrement the IPI count. - * Leave the EXECUTED bit alone (the next callout_reset() will - * have to deal with it). - */ - for (;;) { - flags = c->c_flags; - cpu_ccfence(); - nflags = (flags - 1) & ~(CALLOUT_ACTIVE | - CALLOUT_PENDING | - CALLOUT_WAITING); - - if (atomic_cmpset_int(&c->c_flags, flags, nflags)) - break; - cpu_pause(); - } - if (flags & CALLOUT_WAITING) - wakeup(c); + return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0); } +/* + * Currently the same as callout_cancel. Ultimately we may wish the + * drain function to allow a pending callout to proceed, but for now + * we will attempt to to cancel it. + * + * Returns 0 if the callout was not active (or was active and completed, + * but didn't try to start a new timeout). + * Returns 1 if the drain is responsible for stopping the callout. + */ int -callout_stop(struct callout *c) +callout_drain(struct callout *cc) { - return _callout_stop(c, 0); + atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES); + _callout_cancel_or_stop(cc, CALLOUT_CANCEL); + + return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0); } +/* + * Stops a callout if it is pending or queued, does not block. + * This function does not interlock against a callout that is in-progress. + * + * Returns whether the STOP operation was responsible for removing a + * queued or pending callout. + */ int -callout_stop_sync(struct callout *c) +callout_stop_async(struct callout *cc) { - return _callout_stop(c, 1); -} + softclock_pcpu_t sc; + struct _callout *c; + uint32_t flags; + int res; + + atomic_clear_int(&cc->flags, CALLOUT_STOP_RES | CALLOUT_ACTIVE); +#ifdef CALLOUT_TYPESTABLE + if (cc->toc == NULL || cc->toc->verifier != cc) + return 0; +#else + KKASSERT(cc->toc.verifier == cc); +#endif + c = _callout_gettoc(cc); + atomic_set_int(&c->flags, CALLOUT_STOP); + atomic_clear_int(&c->flags, CALLOUT_RESET); + sc = softclock_pcpu_ary[mycpu->gd_cpuid]; + res = _callout_process_spinlocked(c, 0); + flags = cc->flags; + spin_unlock(&c->spin); +#ifdef CALLOUT_TYPESTABLE + if (res >= 0) + _callout_typestable_free(sc, c, res); +#endif -void -callout_stop_async(struct callout *c) -{ - _callout_stop(c, 0); + return ((flags & CALLOUT_STOP_RES) ? 1 : 0); } +/* + * Callout deactivate merely clears the CALLOUT_ACTIVE bit + * Stops a callout if it is pending or queued, does not block. + * This function does not interlock against a callout that is in-progress. + */ void -callout_terminate(struct callout *c) +callout_deactivate(struct callout *cc) { - _callout_stop(c, 1); - atomic_clear_int(&c->c_flags, CALLOUT_DID_INIT); + atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); } /* - * Prepare a callout structure for use by callout_reset() and/or - * callout_stop(). + * lock-aided callouts are STOPped synchronously using STOP semantics + * (meaning that another thread can start the callout again before we + * return). * - * The MP version of this routine requires that the callback - * function installed by callout_reset() be MP safe. + * non-lock-aided callouts * - * The LK version of this routine is also MPsafe and will automatically - * acquire the specified lock for the duration of the function call, - * and release it after the function returns. In addition, when autolocking - * is used, callout_stop() becomes synchronous if the caller owns the lock. - * callout_reset(), callout_stop(), and callout_stop_sync() will block - * normally instead of spinning when a cpu race occurs. Lock cancelation - * is used to avoid deadlocks against the callout ring dispatch. - * - * The init functions can be called from any cpu and do not have to be - * called from the cpu that the timer will eventually run on. + * Stops a callout if it is pending or queued, does not block. + * This function does not interlock against a callout that is in-progress. */ -static __inline void -_callout_init(struct callout *c, int flags) +int +callout_stop(struct callout *cc) { - bzero(c, sizeof *c); - c->c_flags = flags; + if (cc->flags & CALLOUT_AUTOLOCK) { + atomic_clear_int(&cc->flags, CALLOUT_STOP_RES); + _callout_cancel_or_stop(cc, CALLOUT_STOP); + return ((cc->flags & CALLOUT_STOP_RES) ? 1 : 0); + } else { + return callout_stop_async(cc); + } } /* - * Setup callout, with debugging support + * Terminates a callout by canceling operations and then clears the + * INIT bit. Upon return, the callout structure must not be used. */ -#ifdef callout_init -#undef callout_init -#undef callout_init_mp -#undef callout_init_lk -#endif - void -callout_init(struct callout *c) +callout_terminate(struct callout *cc) { - _callout_init(c, CALLOUT_DID_INIT); -} - -void -callout_init_mp(struct callout *c) -{ - _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE); -} - -void -callout_init_lk(struct callout *c, struct lock *lk) -{ - _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK); - c->c_lk = lk; + _callout_cancel_or_stop(cc, CALLOUT_CANCEL); + atomic_clear_int(&cc->flags, CALLOUT_DID_INIT); +#ifdef CALLOUT_TYPESTABLE + atomic_swap_ptr((void *)&cc->toc, NULL); +#else + cc->toc.verifier = NULL; +#endif } -void -callout_initd(struct callout *c, - const char *ident, int lineno) +/* + * Returns whether a callout is queued and the time has not yet + * arrived (the callout is not yet in-progress). + */ +int +callout_pending(struct callout *cc) { - _callout_init(c, CALLOUT_DID_INIT); - c->c_ident = ident; - c->c_lineno = lineno; -} + struct _callout *c; + int res = 0; -void -callout_initd_mp(struct callout *c, - const char *ident, int lineno) -{ - _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE); - c->c_ident = ident; - c->c_lineno = lineno; + /* + * Don't instantiate toc to test pending + */ +#ifdef CALLOUT_TYPESTABLE + if ((c = cc->toc) != NULL) { +#else + c = &cc->toc; + KKASSERT(c->verifier == cc); + { +#endif + spin_lock(&c->spin); + if (c->verifier == cc) { + res = ((c->flags & (CALLOUT_SET|CALLOUT_INPROG)) == + CALLOUT_SET); + } + spin_unlock(&c->spin); + } + return res; } -void -callout_initd_lk(struct callout *c, struct lock *lk, - const char *ident, int lineno) +/* + * Returns whether a callout is active or not. A callout is active when + * a timeout is set and remains active upon normal termination, even if + * it does not issue a new timeout. A callout is inactive if a timeout has + * never been set or if the callout has been stopped or canceled. The next + * timeout that is set will re-set the active state. + */ +int +callout_active(struct callout *cc) { - _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK); - c->c_ident = ident; - c->c_lineno = lineno; - c->c_lk = lk; + return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0); } diff --git a/sys/kern/kern_wdog.c b/sys/kern/kern_wdog.c index ef3f4c605c..5190c2ce13 100644 --- a/sys/kern/kern_wdog.c +++ b/sys/kern/kern_wdog.c @@ -101,9 +101,10 @@ wdog_reset_all(void *unused) if (period < min_period) min_period = period; } - if (wdog_auto_enable) - callout_reset(&wdog_callout, min_period * hz / 2, wdog_reset_all, NULL); - + if (wdog_auto_enable) { + callout_reset(&wdog_callout, min_period * hz / 2, + wdog_reset_all, NULL); + } wdog_auto_period = min_period; done: @@ -219,8 +220,8 @@ wdog_init(void) static void wdog_uninit(void) { - callout_stop(&wdog_callout); - callout_deactivate(&wdog_callout); + callout_cancel(&wdog_callout); + callout_terminate(&wdog_callout); dev_ops_remove_all(&wdog_ops); spin_uninit(&wdogmtx); } diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c index 6e174facb6..b6aa9e802f 100644 --- a/sys/kern/subr_log.c +++ b/sys/kern/subr_log.c @@ -109,7 +109,7 @@ static int logclose(struct dev_close_args *ap) { log_open = 0; - callout_stop_sync(&logsoftc.sc_callout); + callout_cancel(&logsoftc.sc_callout); logsoftc.sc_state = 0; funsetown(&logsoftc.sc_sigio); return (0); diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c index 993cbe6b9f..32812cc1b6 100644 --- a/sys/kern/subr_taskqueue.c +++ b/sys/kern/subr_taskqueue.c @@ -402,8 +402,7 @@ void taskqueue_drain_timeout(struct taskqueue *queue, struct timeout_task *timeout_task) { - - callout_stop_sync(&timeout_task->c); + callout_cancel(&timeout_task->c); taskqueue_drain(queue, &timeout_task->t); } diff --git a/sys/net/altq/altq_rmclass.c b/sys/net/altq/altq_rmclass.c index 8b1134c470..c9656fd532 100644 --- a/sys/net/altq/altq_rmclass.c +++ b/sys/net/altq/altq_rmclass.c @@ -541,11 +541,12 @@ rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) ALTQ_SQ_ASSERT_LOCKED(ifsq); ALTQ_SQ_UNLOCK(ifsq); - callout_stop_sync(&cl->callout_); + callout_cancel(&cl->callout_); /* Make sure that cl->callout_nmsg_ stops. */ netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); lwkt_domsg(netisr_cpuport(0), &smsg.lmsg, 0); + callout_terminate(&cl->callout_); ALTQ_SQ_LOCK(ifsq); crit_enter(); diff --git a/sys/net/ipfw/ip_fw2.c b/sys/net/ipfw/ip_fw2.c index a645c15dcc..d6634ee67f 100644 --- a/sys/net/ipfw/ip_fw2.c +++ b/sys/net/ipfw/ip_fw2.c @@ -7506,10 +7506,10 @@ ipfw_ctx_fini_dispatch(netmsg_t nmsg) ASSERT_NETISR_NCPUS(mycpuid); - callout_stop_sync(&ctx->ipfw_stateto_ch); - callout_stop_sync(&ctx->ipfw_trackto_ch); - callout_stop_sync(&ctx->ipfw_keepalive_ch); - callout_stop_sync(&ctx->ipfw_xlatreap_ch); + callout_cancel(&ctx->ipfw_stateto_ch); + callout_cancel(&ctx->ipfw_trackto_ch); + callout_cancel(&ctx->ipfw_keepalive_ch); + callout_cancel(&ctx->ipfw_xlatreap_ch); crit_enter(); netisr_dropmsg(&ctx->ipfw_stateexp_more); @@ -7551,7 +7551,7 @@ ipfw_fini_dispatch(netmsg_t nmsg) ipfw_ctx_fini_dispatch); netisr_domsg_global(&nm); - callout_stop_sync(&ipfw_gd.ipfw_crossref_ch); + callout_cancel(&ipfw_gd.ipfw_crossref_ch); crit_enter(); netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm); crit_exit(); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 1d026061ae..4cfd8af999 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -686,9 +686,9 @@ carp_clone_destroy_dispatch(netmsg_t msg) sc->sc_dead = TRUE; carp_detach(sc, TRUE, FALSE); - callout_stop_sync(&sc->sc_ad_tmo); - callout_stop_sync(&sc->sc_md_tmo); - callout_stop_sync(&sc->sc_md6_tmo); + callout_cancel(&sc->sc_ad_tmo); + callout_cancel(&sc->sc_md_tmo); + callout_cancel(&sc->sc_md6_tmo); crit_enter(); lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index b199955af4..b4f482dfa0 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -912,11 +912,11 @@ tcp_close(struct tcpcb *tp) * (tp->tt_msg->tt_tcb == NULL), timers are never used too. */ if (tp->tt_msg != NULL && tp->tt_msg->tt_tcb != NULL) { - tcp_callout_stop(tp, tp->tt_rexmt); - tcp_callout_stop(tp, tp->tt_persist); - tcp_callout_stop(tp, tp->tt_keep); - tcp_callout_stop(tp, tp->tt_2msl); - tcp_callout_stop(tp, tp->tt_delack); + tcp_callout_terminate(tp, tp->tt_rexmt); + tcp_callout_terminate(tp, tp->tt_persist); + tcp_callout_terminate(tp, tp->tt_keep); + tcp_callout_terminate(tp, tp->tt_2msl); + tcp_callout_terminate(tp, tp->tt_delack); } if (tp->t_flags & TF_ONOUTPUTQ) { diff --git a/sys/netinet/tcp_timer2.h b/sys/netinet/tcp_timer2.h index a0e6517d93..b8f2e3a349 100644 --- a/sys/netinet/tcp_timer2.h +++ b/sys/netinet/tcp_timer2.h @@ -72,6 +72,18 @@ tcp_callout_stop(struct tcpcb *_tp, struct tcp_callout *_tc) crit_exit(); } +static __inline void +tcp_callout_terminate(struct tcpcb *_tp, struct tcp_callout *_tc) +{ + KKASSERT(_tp->tt_msg->tt_cpuid == mycpuid); + + crit_enter(); + callout_terminate(&_tc->tc_callout); + _tp->tt_msg->tt_tasks &= ~_tc->tc_task; + _tp->tt_msg->tt_running_tasks &= ~_tc->tc_task; + crit_exit(); +} + static __inline void tcp_callout_reset(struct tcpcb *_tp, struct tcp_callout *_tc, int _to_ticks, void (*_func)(void *)) diff --git a/sys/netproto/802_11/README.DRAGONFLY b/sys/netproto/802_11/README.DRAGONFLY index 28792f306d..9865b135bd 100644 --- a/sys/netproto/802_11/README.DRAGONFLY +++ b/sys/netproto/802_11/README.DRAGONFLY @@ -116,7 +116,7 @@ * callout API. callout_init_mtx() is already macrod to callout_init_lk(). - callout_stop() -> callout_stop_sync() + callout_stop() -> callout_cancel() callout_sched() -> must be converted to the proper callout_reset(...) call (function must be re-provided). diff --git a/sys/netproto/802_11/wlan/ieee80211_dfs.c b/sys/netproto/802_11/wlan/ieee80211_dfs.c index eeb0c9bb0f..20202ba4d6 100644 --- a/sys/netproto/802_11/wlan/ieee80211_dfs.c +++ b/sys/netproto/802_11/wlan/ieee80211_dfs.c @@ -371,7 +371,9 @@ ieee80211_dfs_notify_radar(struct ieee80211com *ic, struct ieee80211_channel *ch if (callout_pending(&dfs->cac_timer)) #if defined(__DragonFly__) - callout_schedule_dfly(&dfs->cac_timer, 0, cac_timeout, dfs->cac_timer.c_arg); + callout_schedule_dfly(&dfs->cac_timer, 0, + cac_timeout, + callout_arg(&dfs->cac_timer)); #else callout_schedule(&dfs->cac_timer, 0); #endif diff --git a/sys/sys/callout.h b/sys/sys/callout.h index ebd904615d..5006217988 100644 --- a/sys/sys/callout.h +++ b/sys/sys/callout.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014,2018 The DragonFly Project. All rights reserved. + * Copyright (c) 2014,2018,2019 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,40 +31,6 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ #ifndef _SYS_CALLOUT_H_ #define _SYS_CALLOUT_H_ @@ -75,153 +41,111 @@ #ifndef _SYS_LOCK_H_ #include #endif -#ifndef _CPU_ATOMIC_H_ -#include -#endif - -SLIST_HEAD(callout_list, callout); -TAILQ_HEAD(callout_tailq, callout); /* - * Callwheel linkages are only adjusted on the target cpu. The target - * cpu can only be [re]assigned when the IPI_MASK and PENDING bits are - * clear. + * WITH TYPESTABLE (currently disabled) * - * callout_reset() and callout_stop() are always synchronous and will - * interlock against a running callout as well as reassign the callout - * to the current cpu. The caller might block, and a deadlock is possible - * if the caller does not use callout_init_lk() or is not careful with - * locks acquired in the callout function. + * This is a mechanism to use a separate internal _callout structure + * for kern_timeout's internals instead of embedding. At the moment + * it doesn't work properly because tsleep() is using a temporary callout + * and can't afford to block on setup. * - * Programers should note that our lockmgr locks have a cancelation feature - * which can be used to avoid deadlocks. callout_init_lk() also uses this - * feature. + * WITHOUT TYPESTABLE <--- * - * callout_deactivate() is asynchronous and will not interlock against - * anything. Deactivation does not dequeue a callout, it simply prevents - * its function from being executed. + * Without typestable we embed the internal _callout structure within + * the external callout structure. We will still use the verifier to + * try to catch corruption. */ +struct callout; +#undef CALLOUT_TYPESTABLE + +struct _callout { + struct spinlock spin; + TAILQ_ENTRY(_callout) entry; + struct callout *verifier; + uint32_t flags; + uint32_t lineno; + struct lock *lk; + const char *ident; + + struct softclock_pcpu *rsc; /* request info */ + void *rarg; + void (*rfunc)(void *); + int rtick; + + struct softclock_pcpu *qsc; /* active info */ + void *qarg; + void (*qfunc)(void *); + int qtick; +}; + struct callout { - union { - SLIST_ENTRY(callout) sle; - TAILQ_ENTRY(callout) tqe; - } c_links; - int c_time; /* match tick on event */ - int c_load; /* load value for reset ipi */ - void *c_arg; /* function argument */ - void (*c_func) (void *); /* function to call */ - int c_flags; /* state of this entry */ - int c_lineno; /* debugging */ - struct lock *c_lk; /* auto-lock */ - const char *c_ident; /* debugging */ +#ifdef CALLOUT_TYPESTABLE + struct _callout *toc; /* opaque internal structure */ + uint32_t flags; /* initial flags */ + uint32_t lineno; /* debugging */ + struct lock *lk; /* interlock */ + const char *ident; /* debugging */ + void *arg; /* ONLY used for callout_arg() XXX */ +#else + struct _callout toc; + uint32_t flags; +#endif }; -/* - * ACTIVE - If cleared this the callout is prevented from issuing its - * callback. The callout remains on its timer queue. - * - * PENDING - Indicates the callout is on a particular cpu's timer queue. - * Also locks the cpu owning the callout. - * - * MPSAFE - Indicates the callout does not need the MP lock (most - * callouts are flagged this way). - * - * DID_INIT - Safety - * - * EXECUTED - Set prior to function dispatch, cleared by callout_reset(), - * cleared and (prior value) returned by callout_stop_sync(). - * - * WAITING - Used for tsleep/wakeup blocking, primarily for - * callout_stop(). - * - * IPI_MASK - Counts pending IPIs. Also locks the cpu owning the callout. - * - * CPU_MASK - Currently assigned cpu. Only valid when at least one bit - * in ARMED_MASK is set. - * - */ -#define CALLOUT_ACTIVE 0x80000000 /* quick [de]activation flag */ -#define CALLOUT_PENDING 0x40000000 /* callout is on callwheel */ -#define CALLOUT_MPSAFE 0x20000000 /* callout does not need the BGL */ -#define CALLOUT_DID_INIT 0x10000000 /* safety check */ -#define CALLOUT_AUTOLOCK 0x08000000 /* auto locking / cancel feature */ -#define CALLOUT_WAITING 0x04000000 /* interlocked waiter */ -#define CALLOUT_EXECUTED 0x02000000 /* (generates stop status) */ -#define CALLOUT_UNUSED01 0x01000000 -#define CALLOUT_IPI_MASK 0x00000FFF /* count operations in prog */ -#define CALLOUT_CPU_MASK 0x00FFF000 /* cpu assignment */ - -#define CALLOUT_ARMED_MASK (CALLOUT_PENDING | CALLOUT_IPI_MASK) - -#define CALLOUT_FLAGS_TO_CPU(flags) (((flags) & CALLOUT_CPU_MASK) >> 12) -#define CALLOUT_CPU_TO_FLAGS(cpuid) ((cpuid) << 12) +#ifdef CALLOUT_TYPESTABLE +#define callout_arg(cc) ((cc)->arg) +#else +#define callout_arg(cc) ((cc)->toc.rarg) +#endif -/* - * WARNING! The caller is responsible for stabilizing the callout state, - * our suggestion is to either manage the callout on the same cpu - * or to use the callout_init_lk() feature and hold the lock while - * making callout_*() calls. The lock will be held automatically - * by the callout wheel for any call-back and the callout wheel - * will handle any callout_stop() deadlocks properly. - * - * active - Returns activation status. This bit is set by callout_reset*() - * and will only be cleared by an explicit callout_deactivate() - * or callout_stop(). A function dispatch does not clear this - * bit. In addition, a callout_reset() to another cpu is - * asynchronous and may not immediately re-set this bit. - * - * deactivate - Disarm the callout, preventing it from being executed if it - * is queued or the queueing operation is in-flight. Has no - * effect if the callout has already been dispatched. Does not - * dequeue the callout. Does not affect the status returned - * by callout_stop(). - * - * Not serialized, caller must be careful when racing a new - * callout_reset() that might be issued by the callback, which - * will re-arm the callout. - * - * callout_reset() must be called to reactivate the callout. - * - * pending - Only useful for same-cpu callouts, indicates that the callout - * is pending on the callwheel or that a callout_reset() ipi - * is (probably) in-flight. Can also false-positive on - * callout_stop() IPIs. - */ -#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) +#ifdef _KERNEL -#define callout_deactivate(c) atomic_clear_int(&(c)->c_flags, CALLOUT_ACTIVE) +#define CALLOUT_DEBUG +#ifdef CALLOUT_DEBUG +#define CALLOUT_DEBUG_ARGS , const char *ident, int lineno +#define CALLOUT_DEBUG_PASSTHRU , ident, lineno +#else +#define CALLOUT_DEBUG_ARGS +#define CALLOUT_DEBUG_PASSTHRU +#endif -#define callout_pending(c) ((c)->c_flags & CALLOUT_ARMED_MASK) +struct globaldata; -#ifdef _KERNEL -extern int ncallout; +extern int ncallout; -struct globaldata; +int callout_active(struct callout *ext); +int callout_pending(struct callout *ext); +void callout_deactivate(struct callout *ext); void hardclock_softtick(struct globaldata *); -void callout_init (struct callout *); -void callout_init_mp (struct callout *); -void callout_init_lk (struct callout *, struct lock *); -void callout_initd (struct callout *, const char *, int); -void callout_initd_mp (struct callout *, const char *, int); -void callout_initd_lk (struct callout *, struct lock *, const char *, int); -void callout_reset (struct callout *, int, void (*)(void *), void *); -int callout_stop (struct callout *); -void callout_stop_async (struct callout *); -int callout_stop_sync (struct callout *); -void callout_terminate (struct callout *); -void callout_reset_bycpu (struct callout *, int, void (*)(void *), void *, - int); - -#define callout_drain(x) callout_stop_sync(x) +void _callout_init (struct callout *cc + CALLOUT_DEBUG_ARGS); +void _callout_init_mp (struct callout *cc + CALLOUT_DEBUG_ARGS); +void _callout_init_lk (struct callout *cc, struct lock *lk + CALLOUT_DEBUG_ARGS); +void callout_reset (struct callout *, int, + void (*)(void *), void *); +void callout_reset_bycpu (struct callout *, int, + void (*)(void *), void *, + int); +int callout_stop (struct callout *cc); +int callout_stop_async (struct callout *cc); +void callout_terminate (struct callout *cc); +int callout_cancel (struct callout *cc); +int callout_drain (struct callout *cc); -#define CALLOUT_DEBUG #ifdef CALLOUT_DEBUG -#define callout_init(co) callout_initd(co, __FILE__, __LINE__) -#define callout_init_mp(co) callout_initd_mp(co, __FILE__, __LINE__) -#define callout_init_lk(co, lk) callout_initd_lk(co, lk, __FILE__, __LINE__) +#define callout_init(co) _callout_init(co, __FILE__, __LINE__) +#define callout_init_mp(co) _callout_init_mp(co, __FILE__, __LINE__) +#define callout_init_lk(co, lk) _callout_init_lk(co, lk, __FILE__, __LINE__) +#else +#define callout_init(co) _callout_init(co) +#define callout_init_mp(co) _callout_init_mp(co) +#define callout_init_lk(co, lk) _callout_init_lk(co, lk) #endif -#endif +#endif /* _KERNEL */ -#endif /* _SYS_CALLOUT_H_ */ +#endif /* _SYS_CALLOUT_H_ */ -- 2.41.0