gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2004 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Matthew Dillon <dillon@backplane.com>
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*/
	34	/*
	35	* Copyright (c) 1982, 1986, 1991, 1993
	36	* The Regents of the University of California. All rights reserved.
	37	* (c) UNIX System Laboratories, Inc.
	38	* All or some portions of this file are derived from material licensed
	39	* to the University of California by American Telephone and Telegraph
	40	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	41	* the permission of UNIX System Laboratories, Inc.
	42	*
	43	* Redistribution and use in source and binary forms, with or without
	44	* modification, are permitted provided that the following conditions
	45	* are met:
	46	* 1. Redistributions of source code must retain the above copyright
	47	* notice, this list of conditions and the following disclaimer.
	48	* 2. Redistributions in binary form must reproduce the above copyright
	49	* notice, this list of conditions and the following disclaimer in the
	50	* documentation and/or other materials provided with the distribution.
	51	* 3. All advertising materials mentioning features or use of this software
	52	* must display the following acknowledgement:
	53	* This product includes software developed by the University of
	54	* California, Berkeley and its contributors.
	55	* 4. Neither the name of the University nor the names of its contributors
	56	* may be used to endorse or promote products derived from this software
	57	* without specific prior written permission.
	58	*
	59	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	60	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	61	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	62	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	63	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	64	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	65	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	66	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	67	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	68	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	69	* SUCH DAMAGE.
	70	*
	71	* From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
	72	* $FreeBSD: src/sys/kern/kern_timeout.c,v 1.59.2.1 2001/11/13 18:24:52 archie Exp $
	73	* $DragonFly: src/sys/kern/kern_timeout.c,v 1.27 2007/11/14 18:27:52 swildner Exp $
	74	*/
	75	/*
	76	* DRAGONFLY BGL STATUS
	77	*
	78	* All the API functions should be MP safe.
	79	*
	80	* The callback functions will be flagged as being MP safe if the
	81	* timeout structure is initialized with callout_init_mp() instead of
	82	* callout_init().
	83	*
	84	* The helper threads cannot be made preempt-capable until after we
	85	* clean up all the uses of splsoftclock() and related interlocks (which
	86	* require the related functions to be MP safe as well).
	87	*/
	88	/*
	89	* The callout mechanism is based on the work of Adam M. Costello and
	90	* George Varghese, published in a technical report entitled "Redesigning
	91	* the BSD Callout and Timer Facilities" and modified slightly for inclusion
	92	* in FreeBSD by Justin T. Gibbs. The original work on the data structures
	93	* used in this implementation was published by G. Varghese and T. Lauck in
	94	* the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
	95	* the Efficient Implementation of a Timer Facility" in the Proceedings of
	96	* the 11th ACM Annual Symposium on Operating Systems Principles,
	97	* Austin, Texas Nov 1987.
	98	*
	99	* The per-cpu augmentation was done by Matthew Dillon.
	100	*/
	101
	102	#include <sys/param.h>
	103	#include <sys/systm.h>
	104	#include <sys/callout.h>
	105	#include <sys/kernel.h>
	106	#include <sys/interrupt.h>
	107	#include <sys/thread.h>
	108	#include <sys/thread2.h>
	109
	110	#ifndef MAX_SOFTCLOCK_STEPS
	111	#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
	112	#endif
	113
	114
	115	struct softclock_pcpu {
	116	struct callout_tailq *callwheel;
	117	struct callout * volatile next;
	118	int softticks; /* softticks index */
	119	int curticks; /* per-cpu ticks counter */
	120	int isrunning;
	121	struct thread thread;
	122
	123	};
	124
	125	typedef struct softclock_pcpu *softclock_pcpu_t;
	126
	127	/*
	128	* TODO:
	129	* allocate more timeout table slots when table overflows.
	130	*/
	131	static MALLOC_DEFINE(M_CALLOUT, "callout", "callout structures");
	132	static int callwheelsize;
	133	static int callwheelbits;
	134	static int callwheelmask;
	135	static struct softclock_pcpu softclock_pcpu_ary[MAXCPU];
	136
	137	static void softclock_handler(void *arg);
	138
	139	static void
	140	swi_softclock_setup(void *arg)
	141	{
	142	int cpu;
	143	int i;
	144
	145	/*
	146	* Figure out how large a callwheel we need. It must be a power of 2.
	147	*/
	148	callwheelsize = 1;
	149	callwheelbits = 0;
	150	while (callwheelsize < ncallout) {
	151	callwheelsize <<= 1;
	152	++callwheelbits;
	153	}
	154	callwheelmask = callwheelsize - 1;
	155
	156	/*
	157	* Initialize per-cpu data structures.
	158	*/
	159	for (cpu = 0; cpu < ncpus; ++cpu) {
	160	softclock_pcpu_t sc;
	161
	162	sc = &softclock_pcpu_ary[cpu];
	163
	164	sc->callwheel = kmalloc(sizeof(sc->callwheel) callwheelsize,
	165	M_CALLOUT, M_WAITOK\|M_ZERO);
	166	for (i = 0; i < callwheelsize; ++i)
	167	TAILQ_INIT(&sc->callwheel[i]);
	168
	169	/*
	170	* Create a preemption-capable thread for each cpu to handle
	171	* softclock timeouts on that cpu. The preemption can only
	172	* be blocked by a critical section. The thread can itself
	173	* be preempted by normal interrupts.
	174	*/
	175	lwkt_create(softclock_handler, sc, NULL,
	176	&sc->thread, TDF_STOPREQ\|TDF_INTTHREAD, cpu,
	177	"softclock %d", cpu);
	178	#if 0
	179	/*
	180	* Do not make the thread preemptable until we clean up all
	181	* the splsoftclock() calls in the system. Since the threads
	182	* are no longer operated as a software interrupt, the
	183	* splsoftclock() calls will not have any effect on them.
	184	*/
	185	sc->thread.td_preemptable = lwkt_preempt;
	186	#endif
	187	}
	188	}
	189
	190	/*
	191	* Must occur after ncpus has been initialized.
	192	*/
	193	SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
	194	swi_softclock_setup, NULL);
	195
	196	/*
	197	* This routine is called from the hardclock() (basically a FASTint/IPI) on
	198	* each cpu in the system. sc->curticks is this cpu's notion of the timebase.
	199	* It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where
	200	* the callwheel is currently indexed.
	201	*
	202	* WARNING! The MP lock is not necessarily held on call, nor can it be
	203	* safely obtained.
	204	*
	205	* sc->softticks is adjusted by either this routine or our helper thread
	206	* depending on whether the helper thread is running or not.
	207	*/
	208	void
	209	hardclock_softtick(globaldata_t gd)
	210	{
	211	softclock_pcpu_t sc;
	212
	213	sc = &softclock_pcpu_ary[gd->gd_cpuid];
	214	++sc->curticks;
	215	if (sc->isrunning)
	216	return;
	217	if (sc->softticks == sc->curticks) {
	218	/*
	219	* in sync, only wakeup the thread if there is something to
	220	* do.
	221	*/
	222	if (TAILQ_FIRST(&sc->callwheel[sc->softticks & callwheelmask]))
	223	{
	224	sc->isrunning = 1;
	225	lwkt_schedule(&sc->thread);
	226	} else {
	227	++sc->softticks;
	228	}
	229	} else {
	230	/*
	231	* out of sync, wakeup the thread unconditionally so it can
	232	* catch up.
	233	*/
	234	sc->isrunning = 1;
	235	lwkt_schedule(&sc->thread);
	236	}
	237	}
	238
	239	/*
	240	* This procedure is the main loop of our per-cpu helper thread. The
	241	* sc->isrunning flag prevents us from racing hardclock_softtick() and
	242	* a critical section is sufficient to interlock sc->curticks and protect
	243	* us from remote IPI's / list removal.
	244	*
	245	* The thread starts with the MP lock held and not in a critical section.
	246	* The loop itself is MP safe while individual callbacks may or may not
	247	* be, so we obtain or release the MP lock as appropriate.
	248	*/
	249	static void
	250	softclock_handler(void *arg)
	251	{
	252	softclock_pcpu_t sc;
	253	struct callout *c;
	254	struct callout_tailq *bucket;
	255	void (c_func)(void );
	256	void *c_arg;
	257	#ifdef SMP
	258	int mpsafe = 0;
	259	#endif
	260
	261	lwkt_setpri_self(TDPRI_SOFT_NORM);
	262
	263	sc = arg;
	264	crit_enter();
	265	loop:
	266	while (sc->softticks != (int)(sc->curticks + 1)) {
	267	bucket = &sc->callwheel[sc->softticks & callwheelmask];
	268
	269	for (c = TAILQ_FIRST(bucket); c; c = sc->next) {
	270	if (c->c_time != sc->softticks) {
	271	sc->next = TAILQ_NEXT(c, c_links.tqe);
	272	continue;
	273	}
	274	#ifdef SMP
	275	if (c->c_flags & CALLOUT_MPSAFE) {
	276	if (mpsafe == 0) {
	277	mpsafe = 1;
	278	rel_mplock();
	279	}
	280	} else {
	281	/*
	282	* The request might be removed while we
	283	* are waiting to get the MP lock. If it
	284	* was removed sc->next will point to the
	285	* next valid request or NULL, loop up.
	286	*/
	287	if (mpsafe) {
	288	mpsafe = 0;
	289	sc->next = c;
	290	get_mplock();
	291	if (c != sc->next)
	292	continue;
	293	}
	294	}
	295	#endif
	296	sc->next = TAILQ_NEXT(c, c_links.tqe);
	297	TAILQ_REMOVE(bucket, c, c_links.tqe);
	298
	299	c_func = c->c_func;
	300	c_arg = c->c_arg;
	301	c->c_func = NULL;
	302	KKASSERT(c->c_flags & CALLOUT_DID_INIT);
	303	c->c_flags &= ~CALLOUT_PENDING;
	304	crit_exit();
	305	c_func(c_arg);
	306	crit_enter();
	307	/* NOTE: list may have changed */
	308	}
	309	++sc->softticks;
	310	}
	311	sc->isrunning = 0;
	312	lwkt_deschedule_self(&sc->thread); /* == curthread */
	313	lwkt_switch();
	314	goto loop;
	315	/* NOT REACHED */
	316	}
	317
	318	/*
	319	* New interface; clients allocate their own callout structures.
	320	*
	321	* callout_reset() - establish or change a timeout
	322	* callout_stop() - disestablish a timeout
	323	* callout_init() - initialize a callout structure so that it can
	324	* safely be passed to callout_reset() and callout_stop()
	325	* callout_init_mp() - same but any installed functions must be MP safe.
	326	*
	327	* <sys/callout.h> defines three convenience macros:
	328	*
	329	* callout_active() - returns truth if callout has not been serviced
	330	* callout_pending() - returns truth if callout is still waiting for timeout
	331	* callout_deactivate() - marks the callout as having been serviced
	332	*/
	333
	334	/*
	335	* Start or restart a timeout. Install the callout structure in the
	336	* callwheel. Callers may legally pass any value, even if 0 or negative,
	337	* but since the sc->curticks index may have already been processed a
	338	* minimum timeout of 1 tick will be enforced.
	339	*
	340	* The callout is installed on and will be processed on the current cpu's
	341	* callout wheel.
	342	*
	343	* WARNING! This function may be called from any cpu but the caller must
	344	* serialize callout_stop() and callout_reset() calls on the passed
	345	* structure regardless of cpu.
	346	*/
	347	void
	348	callout_reset(struct callout c, int to_ticks, void (ftn)(void *),
	349	void *arg)
	350	{
	351	softclock_pcpu_t sc;
	352	globaldata_t gd;
	353
	354	#ifdef INVARIANTS
	355	if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
	356	callout_init(c);
	357	kprintf(
	358	"callout_reset(%p) from %p: callout was not initialized\n",
	359	c, ((int **)&c)[-1]);
	360	print_backtrace();
	361	}
	362	#endif
	363	gd = mycpu;
	364	sc = &softclock_pcpu_ary[gd->gd_cpuid];
	365	crit_enter_gd(gd);
	366
	367	if (c->c_flags & CALLOUT_PENDING)
	368	callout_stop(c);
	369
	370	if (to_ticks <= 0)
	371	to_ticks = 1;
	372
	373	c->c_arg = arg;
	374	c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING);
	375	c->c_func = ftn;
	376	c->c_time = sc->curticks + to_ticks;
	377	#ifdef SMP
	378	c->c_gd = gd;
	379	#endif
	380
	381	TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & callwheelmask],
	382	c, c_links.tqe);
	383	crit_exit_gd(gd);
	384	}
	385
	386	/*
	387	* Stop a running timer. WARNING! If called on a cpu other then the one
	388	* the callout was started on this function will liveloop on its IPI to
	389	* the target cpu to process the request. It is possible for the callout
	390	* to execute in that case.
	391	*
	392	* WARNING! This function may be called from any cpu but the caller must
	393	* serialize callout_stop() and callout_reset() calls on the passed
	394	* structure regardless of cpu.
	395	*
	396	* WARNING! This routine may be called from an IPI
	397	*/
	398	int
	399	callout_stop(struct callout *c)
	400	{
	401	globaldata_t gd = mycpu;
	402	#ifdef SMP
	403	globaldata_t tgd;
	404	#endif
	405	softclock_pcpu_t sc;
	406
	407	#ifdef INVARIANTS
	408	if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
	409	callout_init(c);
	410	kprintf(
	411	"callout_stop(%p) from %p: callout was not initialized\n",
	412	c, ((int **)&c)[-1]);
	413	print_backtrace();
	414	}
	415	#endif
	416	crit_enter_gd(gd);
	417
	418	/*
	419	* Don't attempt to delete a callout that's not on the queue. The
	420	* callout may not have a cpu assigned to it. Callers do not have
	421	* to be on the issuing cpu but must still serialize access to the
	422	* callout structure.
	423	*
	424	* We are not cpu-localized here and cannot safely modify the
	425	* flags field in the callout structure. Note that most of the
	426	* time CALLOUT_ACTIVE will be 0 if CALLOUT_PENDING is also 0.
	427	*
	428	* If we race another cpu's dispatch of this callout it is possible
	429	* for CALLOUT_ACTIVE to be set with CALLOUT_PENDING unset. This
	430	* will cause us to fall through and synchronize with the other
	431	* cpu.
	432	*/
	433	if ((c->c_flags & CALLOUT_PENDING) == 0) {
	434	#ifdef SMP
	435	if ((c->c_flags & CALLOUT_ACTIVE) == 0) {
	436	crit_exit_gd(gd);
	437	return (0);
	438	}
	439	if (c->c_gd == NULL \|\| c->c_gd == gd) {
	440	c->c_flags &= ~CALLOUT_ACTIVE;
	441	crit_exit_gd(gd);
	442	return (0);
	443	}
	444	/* fall-through to the cpu-localization code. */
	445	#else
	446	c->c_flags &= ~CALLOUT_ACTIVE;
	447	crit_exit_gd(gd);
	448	return (0);
	449	#endif
	450	}
	451	#ifdef SMP
	452	if ((tgd = c->c_gd) != gd) {
	453	/*
	454	* If the callout is owned by a different CPU we have to
	455	* execute the function synchronously on the target cpu.
	456	*/
	457	int seq;
	458
	459	cpu_ccfence(); /* don't let tgd alias c_gd */
	460	seq = lwkt_send_ipiq(tgd, (void *)callout_stop, c);
	461	lwkt_wait_ipiq(tgd, seq);
	462	} else
	463	#endif
	464	{
	465	/*
	466	* If the callout is owned by the same CPU we can
	467	* process it directly, but if we are racing our helper
	468	* thread (sc->next), we have to adjust sc->next. The
	469	* race is interlocked by a critical section.
	470	*/
	471	sc = &softclock_pcpu_ary[gd->gd_cpuid];
	472
	473	c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING);
	474	if (sc->next == c)
	475	sc->next = TAILQ_NEXT(c, c_links.tqe);
	476
	477	TAILQ_REMOVE(&sc->callwheel[c->c_time & callwheelmask],
	478	c, c_links.tqe);
	479	c->c_func = NULL;
	480	}
	481	crit_exit_gd(gd);
	482	return (1);
	483	}
	484
	485	/*
	486	* Prepare a callout structure for use by callout_reset() and/or
	487	* callout_stop(). The MP version of this routine requires that the callback
	488	* function installed by callout_reset() be MP safe.
	489	*/
	490	void
	491	callout_init(struct callout *c)
	492	{
	493	bzero(c, sizeof *c);
	494	c->c_flags = CALLOUT_DID_INIT;
	495	}
	496
	497	void
	498	callout_init_mp(struct callout *c)
	499	{
	500	callout_init(c);
	501	c->c_flags \|= CALLOUT_MPSAFE;
	502	}
	503
	504	/* What, are you joking? This is nuts! -Matt */
	505	#if 0
	506	#ifdef APM_FIXUP_CALLTODO
	507	/*
	508	* Adjust the kernel calltodo timeout list. This routine is used after
	509	* an APM resume to recalculate the calltodo timer list values with the
	510	* number of hz's we have been sleeping. The next hardclock() will detect
	511	* that there are fired timers and run softclock() to execute them.
	512	*
	513	* Please note, I have not done an exhaustive analysis of what code this
	514	* might break. I am motivated to have my select()'s and alarm()'s that
	515	* have expired during suspend firing upon resume so that the applications
	516	* which set the timer can do the maintanence the timer was for as close
	517	* as possible to the originally intended time. Testing this code for a
	518	* week showed that resuming from a suspend resulted in 22 to 25 timers
	519	* firing, which seemed independant on whether the suspend was 2 hours or
	520	* 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu>
	521	*/
	522	void
	523	adjust_timeout_calltodo(struct timeval *time_change)
	524	{
	525	struct callout *p;
	526	unsigned long delta_ticks;
	527
	528	/*
	529	* How many ticks were we asleep?
	530	* (stolen from tvtohz()).
	531	*/
	532
	533	/* Don't do anything */
	534	if (time_change->tv_sec < 0)
	535	return;
	536	else if (time_change->tv_sec <= LONG_MAX / 1000000)
	537	delta_ticks = (time_change->tv_sec * 1000000 +
	538	time_change->tv_usec + (tick - 1)) / tick + 1;
	539	else if (time_change->tv_sec <= LONG_MAX / hz)
	540	delta_ticks = time_change->tv_sec * hz +
	541	(time_change->tv_usec + (tick - 1)) / tick + 1;
	542	else
	543	delta_ticks = LONG_MAX;
	544
	545	if (delta_ticks > INT_MAX)
	546	delta_ticks = INT_MAX;
	547
	548	/*
	549	* Now rip through the timer calltodo list looking for timers
	550	* to expire.
	551	*/
	552
	553	/* don't collide with softclock() */
	554	crit_enter();
	555	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
	556	p->c_time -= delta_ticks;
	557
	558	/* Break if the timer had more time on it than delta_ticks */
	559	if (p->c_time > 0)
	560	break;
	561
	562	/* take back the ticks the timer didn't use (p->c_time <= 0) */
	563	delta_ticks = -p->c_time;
	564	}
	565	crit_exit();
	566
	567	return;
	568	}
	569	#endif /* APM_FIXUP_CALLTODO */
	570	#endif
	571