gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Alex Hornung <ahornung@gmail.com>
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*/
	34	#include <sys/param.h>
	35	#include <sys/systm.h>
	36	#include <sys/kernel.h>
	37	#include <sys/proc.h>
	38	#include <sys/sysctl.h>
	39	#include <sys/buf.h>
	40	#include <sys/conf.h>
	41	#include <sys/diskslice.h>
	42	#include <sys/disk.h>
	43	#include <machine/atomic.h>
	44	#include <sys/malloc.h>
	45	#include <sys/thread.h>
	46	#include <sys/thread2.h>
	47	#include <sys/sysctl.h>
	48	#include <sys/spinlock2.h>
	49	#include <machine/md_var.h>
	50	#include <sys/ctype.h>
	51	#include <sys/syslog.h>
	52	#include <sys/device.h>
	53	#include <sys/msgport.h>
	54	#include <sys/msgport2.h>
	55	#include <sys/buf2.h>
	56	#include <sys/dsched.h>
	57	#include <machine/varargs.h>
	58	#include <machine/param.h>
	59
	60	#include <dsched/fq/dsched_fq.h>
	61
	62	MALLOC_DECLARE(M_DSCHEDFQ);
	63
	64	static int dsched_fq_version_maj = 0;
	65	static int dsched_fq_version_min = 8;
	66
	67	struct dsched_fq_stats fq_stats;
	68
	69	struct objcache_malloc_args dsched_fq_dpriv_malloc_args = {
	70	sizeof(struct dsched_fq_dpriv), M_DSCHEDFQ };
	71	struct objcache_malloc_args dsched_fq_priv_malloc_args = {
	72	sizeof(struct dsched_fq_priv), M_DSCHEDFQ };
	73	struct objcache_malloc_args dsched_fq_mpriv_malloc_args = {
	74	sizeof(struct dsched_fq_mpriv), M_DSCHEDFQ };
	75
	76	static struct objcache *fq_dpriv_cache;
	77	static struct objcache *fq_mpriv_cache;
	78	static struct objcache *fq_priv_cache;
	79
	80	TAILQ_HEAD(, dsched_fq_mpriv) dsched_fqmp_list =
	81	TAILQ_HEAD_INITIALIZER(dsched_fqmp_list);
	82
	83	struct spinlock fq_fqmp_lock;
	84	struct callout fq_callout;
	85
	86	extern struct dsched_ops dsched_fq_ops;
	87
	88	void
	89	fq_reference_dpriv(struct dsched_fq_dpriv *dpriv)
	90	{
	91	int refcount;
	92
	93	refcount = atomic_fetchadd_int(&dpriv->refcount, 1);
	94
	95	KKASSERT(refcount >= 0);
	96	}
	97
	98	void
	99	fq_reference_priv(struct dsched_fq_priv *fqp)
	100	{
	101	int refcount;
	102
	103	refcount = atomic_fetchadd_int(&fqp->refcount, 1);
	104
	105	KKASSERT(refcount >= 0);
	106	}
	107
	108	void
	109	fq_reference_mpriv(struct dsched_fq_mpriv *fqmp)
	110	{
	111	int refcount;
	112
	113	refcount = atomic_fetchadd_int(&fqmp->refcount, 1);
	114
	115	KKASSERT(refcount >= 0);
	116	}
	117
	118	void
	119	fq_dereference_dpriv(struct dsched_fq_dpriv *dpriv)
	120	{
	121	struct dsched_fq_priv fqp, fqp2;
	122	int refcount;
	123
	124	refcount = atomic_fetchadd_int(&dpriv->refcount, -1);
	125
	126
	127	KKASSERT(refcount >= 0 \|\| refcount <= -0x400);
	128
	129	if (refcount == 1) {
	130	atomic_subtract_int(&dpriv->refcount, 0x400); /* mark as: in destruction */
	131	#if 1
	132	kprintf("dpriv (%p) destruction started, trace:\n", dpriv);
	133	print_backtrace(4);
	134	#endif
	135	spin_lock_wr(&dpriv->lock);
	136	TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
	137	TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
	138	fqp->flags &= ~FQP_LINKED_DPRIV;
	139	fq_dereference_priv(fqp);
	140	}
	141	spin_unlock_wr(&dpriv->lock);
	142
	143	objcache_put(fq_dpriv_cache, dpriv);
	144	atomic_subtract_int(&fq_stats.dpriv_allocations, 1);
	145	}
	146	}
	147
	148	void
	149	fq_dereference_priv(struct dsched_fq_priv *fqp)
	150	{
	151	struct dsched_fq_mpriv *fqmp;
	152	struct dsched_fq_dpriv *dpriv;
	153	int refcount;
	154
	155	refcount = atomic_fetchadd_int(&fqp->refcount, -1);
	156
	157	KKASSERT(refcount >= 0 \|\| refcount <= -0x400);
	158
	159	if (refcount == 1) {
	160	atomic_subtract_int(&fqp->refcount, 0x400); /* mark as: in destruction */
	161	#if 0
	162	kprintf("fqp (%p) destruction started, trace:\n", fqp);
	163	print_backtrace(8);
	164	#endif
	165	dpriv = fqp->dpriv;
	166	KKASSERT(dpriv != NULL);
	167
	168	spin_lock_wr(&fqp->lock);
	169
	170	KKASSERT(fqp->qlength == 0);
	171
	172	if (fqp->flags & FQP_LINKED_DPRIV) {
	173	spin_lock_wr(&dpriv->lock);
	174
	175	TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
	176	fqp->flags &= ~FQP_LINKED_DPRIV;
	177
	178	spin_unlock_wr(&dpriv->lock);
	179	}
	180
	181	if (fqp->flags & FQP_LINKED_FQMP) {
	182	fqmp = fqp->fqmp;
	183	KKASSERT(fqmp != NULL);
	184
	185	spin_lock_wr(&fqmp->lock);
	186
	187	TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
	188	fqp->flags &= ~FQP_LINKED_FQMP;
	189
	190	spin_unlock_wr(&fqmp->lock);
	191	}
	192
	193	spin_unlock_wr(&fqp->lock);
	194
	195	objcache_put(fq_priv_cache, fqp);
	196	atomic_subtract_int(&fq_stats.fqp_allocations, 1);
	197	#if 0
	198	fq_dereference_dpriv(dpriv);
	199	#endif
	200	}
	201	}
	202
	203	void
	204	fq_dereference_mpriv(struct dsched_fq_mpriv *fqmp)
	205	{
	206	struct dsched_fq_priv fqp, fqp2;
	207	int refcount;
	208
	209	refcount = atomic_fetchadd_int(&fqmp->refcount, -1);
	210
	211	KKASSERT(refcount >= 0 \|\| refcount <= -0x400);
	212
	213	if (refcount == 1) {
	214	atomic_subtract_int(&fqmp->refcount, 0x400); /* mark as: in destruction */
	215	#if 0
	216	kprintf("fqmp (%p) destruction started, trace:\n", fqmp);
	217	print_backtrace(8);
	218	#endif
	219	FQ_GLOBAL_FQMP_LOCK();
	220	spin_lock_wr(&fqmp->lock);
	221
	222	TAILQ_FOREACH_MUTABLE(fqp, &fqmp->fq_priv_list, link, fqp2) {
	223	TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
	224	fqp->flags &= ~FQP_LINKED_FQMP;
	225	fq_dereference_priv(fqp);
	226	}
	227	TAILQ_REMOVE(&dsched_fqmp_list, fqmp, link);
	228
	229	spin_unlock_wr(&fqmp->lock);
	230	FQ_GLOBAL_FQMP_UNLOCK();
	231
	232	objcache_put(fq_mpriv_cache, fqmp);
	233	atomic_subtract_int(&fq_stats.fqmp_allocations, 1);
	234	}
	235	}
	236
	237
	238	struct dsched_fq_priv *
	239	fq_alloc_priv(struct disk dp, struct dsched_fq_mpriv fqmp)
	240	{
	241	struct dsched_fq_priv *fqp;
	242	#if 0
	243	fq_reference_dpriv(dsched_get_disk_priv(dp));
	244	#endif
	245	fqp = objcache_get(fq_priv_cache, M_WAITOK);
	246	bzero(fqp, sizeof(struct dsched_fq_priv));
	247
	248	/* XXX: maybe we do need another ref for the disk list for fqp */
	249	fq_reference_priv(fqp);
	250
	251	FQ_FQP_LOCKINIT(fqp);
	252	FQ_FQP_LOCK(fqp);
	253	fqp->dp = dp;
	254
	255	fqp->dpriv = dsched_get_disk_priv(dp);
	256
	257	if (fqmp) {
	258	fqp->fqmp = fqmp;
	259	fqp->p = fqmp->p;
	260
	261	/* Put the fqp in the fqmp list */
	262	FQ_FQMP_LOCK(fqmp);
	263	TAILQ_INSERT_TAIL(&fqmp->fq_priv_list, fqp, link);
	264	FQ_FQMP_UNLOCK(fqmp);
	265	fqp->flags \|= FQP_LINKED_FQMP;
	266	}
	267
	268	TAILQ_INIT(&fqp->queue);
	269	TAILQ_INSERT_TAIL(&fqp->dpriv->fq_priv_list, fqp, dlink);
	270	fqp->flags \|= FQP_LINKED_DPRIV;
	271
	272	atomic_add_int(&fq_stats.fqp_allocations, 1);
	273	FQ_FQP_UNLOCK(fqp);
	274	return fqp;
	275	}
	276
	277
	278	struct dsched_fq_dpriv *
	279	fq_alloc_dpriv(struct disk *dp)
	280	{
	281	struct dsched_fq_dpriv *dpriv;
	282
	283	dpriv = objcache_get(fq_dpriv_cache, M_WAITOK);
	284	bzero(dpriv, sizeof(struct dsched_fq_dpriv));
	285	fq_reference_dpriv(dpriv);
	286	dpriv->dp = dp;
	287	dpriv->avg_rq_time = 0;
	288	dpriv->incomplete_tp = 0;
	289	FQ_DPRIV_LOCKINIT(dpriv);
	290	TAILQ_INIT(&dpriv->fq_priv_list);
	291
	292	atomic_add_int(&fq_stats.dpriv_allocations, 1);
	293	return dpriv;
	294	}
	295
	296
	297	struct dsched_fq_mpriv *
	298	fq_alloc_mpriv(struct proc *p)
	299	{
	300	struct dsched_fq_mpriv *fqmp;
	301	struct dsched_fq_priv *fqp;
	302	struct disk *dp = NULL;
	303
	304	fqmp = objcache_get(fq_mpriv_cache, M_WAITOK);
	305	bzero(fqmp, sizeof(struct dsched_fq_mpriv));
	306	fq_reference_mpriv(fqmp);
	307	#if 0
	308	kprintf("fq_alloc_mpriv, new fqmp = %p\n", fqmp);
	309	#endif
	310	FQ_FQMP_LOCKINIT(fqmp);
	311	TAILQ_INIT(&fqmp->fq_priv_list);
	312	fqmp->p = p;
	313
	314	while ((dp = dsched_disk_enumerate(dp, &dsched_fq_ops))) {
	315	fqp = fq_alloc_priv(dp, fqmp);
	316	#if 0
	317	fq_reference_priv(fqp);
	318	#endif
	319	}
	320
	321	FQ_GLOBAL_FQMP_LOCK();
	322	TAILQ_INSERT_TAIL(&dsched_fqmp_list, fqmp, link);
	323	FQ_GLOBAL_FQMP_UNLOCK();
	324
	325	atomic_add_int(&fq_stats.fqmp_allocations, 1);
	326	return fqmp;
	327	}
	328
	329
	330	void
	331	fq_dispatcher(struct dsched_fq_dpriv *dpriv)
	332	{
	333	struct dsched_fq_mpriv *fqmp;
	334	struct dsched_fq_priv fqp, fqp2;
	335	struct bio bio, bio2;
	336	int idle;
	337
	338	/*
	339	* We need to manually assign an fqp to the fqmp of this thread
	340	* since it isn't assigned one during fq_prepare, as the disk
	341	* is not set up yet.
	342	*/
	343	fqmp = dsched_get_thread_priv(curthread);
	344	KKASSERT(fqmp != NULL);
	345
	346	fqp = fq_alloc_priv(dpriv->dp, fqmp);
	347	#if 0
	348	fq_reference_priv(fqp);
	349	#endif
	350
	351	FQ_DPRIV_LOCK(dpriv);
	352	for(;;) {
	353	idle = 0;
	354	/* sleep ~60 ms */
	355	if ((ssleep(dpriv, &dpriv->lock, 0, "fq_dispatcher", hz/15) == 0)) {
	356	/*
	357	* We've been woken up; this either means that we are
	358	* supposed to die away nicely or that the disk is idle.
	359	*/
	360
	361	if (__predict_false(dpriv->die == 1)) {
	362	/* If we are supposed to die, drain all queues */
	363	fq_drain(dpriv, FQ_DRAIN_FLUSH);
	364
	365	/* Now we can safely unlock and exit */
	366	FQ_DPRIV_UNLOCK(dpriv);
	367	kprintf("fq_dispatcher is peacefully dying\n");
	368	lwkt_exit();
	369	/* NOTREACHED */
	370	}
	371
	372	/*
	373	* We have been awakened because the disk is idle.
	374	* So let's get ready to dispatch some extra bios.
	375	*/
	376	idle = 1;
	377	}
	378
	379	/* Maybe the disk is idle and we just didn't get the wakeup */
	380	if (idle == 0)
	381	idle = dpriv->idle;
	382
	383	/*
	384	* XXX: further room for improvements here. It would be better
	385	* to dispatch a few requests from each fqp as to ensure
	386	* real fairness.
	387	*/
	388	TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
	389	if (fqp->qlength == 0)
	390	continue;
	391
	392	FQ_FQP_LOCK(fqp);
	393	if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
	394	fq_balance_self(fqp);
	395	/*
	396	* XXX: why 5 extra? should probably be dynamic,
	397	* relying on information on latency.
	398	*/
	399	if ((fqp->max_tp > 0) && idle &&
	400	(fqp->issued >= fqp->max_tp)) {
	401	fqp->max_tp += 5;
	402	}
	403
	404	TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
	405	if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
	406	fq_balance_self(fqp);
	407	if ((fqp->max_tp > 0) &&
	408	((fqp->issued >= fqp->max_tp)))
	409	break;
	410
	411	TAILQ_REMOVE(&fqp->queue, bio, link);
	412	--fqp->qlength;
	413
	414	/*
	415	* beware that we do have an fqp reference
	416	* from the queueing
	417	*/
	418	fq_dispatch(dpriv, bio, fqp);
	419	}
	420	FQ_FQP_UNLOCK(fqp);
	421
	422	}
	423	}
	424	}
	425
	426	void
	427	fq_balance_thread(struct dsched_fq_dpriv *dpriv)
	428	{
	429	struct dsched_fq_priv fqp, fqp2;
	430	static struct timeval old_tv;
	431	struct timeval tv;
	432	int64_t total_budget, product;
	433	int64_t budget[FQ_PRIO_MAX+1];
	434	int n, i, sum, total_disk_time;
	435	int lost_bits;
	436
	437	getmicrotime(&old_tv);
	438
	439	FQ_DPRIV_LOCK(dpriv);
	440	for (;;) {
	441	/* sleep ~1s */
	442	if ((ssleep(curthread, &dpriv->lock, 0, "fq_balancer", hz/2) == 0)) {
	443	if (__predict_false(dpriv->die)) {
	444	FQ_DPRIV_UNLOCK(dpriv);
	445	lwkt_exit();
	446	}
	447	}
	448
	449	bzero(budget, sizeof(budget));
	450	total_budget = 0;
	451	n = 0;
	452
	453	getmicrotime(&tv);
	454
	455	total_disk_time = (int)(1000000*((tv.tv_sec - old_tv.tv_sec)) +
	456	(tv.tv_usec - old_tv.tv_usec));
	457
	458	if (total_disk_time == 0)
	459	total_disk_time = 1;
	460
	461	dsched_debug(LOG_INFO, "total_disk_time = %d\n", total_disk_time);
	462
	463	old_tv = tv;
	464
	465	dpriv->disk_busy = (100*(total_disk_time - dpriv->idle_time)) / total_disk_time;
	466	if (dpriv->disk_busy < 0)
	467	dpriv->disk_busy = 0;
	468
	469	dpriv->idle_time = 0;
	470	lost_bits = 0;
	471
	472	TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
	473	fqp->s_avg_latency = fqp->avg_latency;
	474	fqp->s_transactions = fqp->transactions;
	475	if (fqp->s_transactions > 0 /* 30 */) {
	476	product = fqp->s_avg_latency * fqp->s_transactions;
	477	product >>= lost_bits;
	478	while(total_budget >= INT64_MAX - product) {
	479	++lost_bits;
	480	product >>= 1;
	481	total_budget >>= 1;
	482	}
	483	total_budget += product;
	484	++budget[(fqp->p) ? fqp->p->p_ionice : 0];
	485	KKASSERT(total_budget >= 0);
	486	dsched_debug(LOG_INFO,
	487	"%d) avg_latency = %d, transactions = %d, ioprio = %d\n",
	488	n, fqp->s_avg_latency, fqp->s_transactions,
	489	(fqp->p) ? fqp->p->p_ionice : 0);
	490	++n;
	491	} else {
	492	fqp->max_tp = 0;
	493	}
	494	fqp->rebalance = 0;
	495	fqp->transactions = 0;
	496	fqp->avg_latency = 0;
	497	fqp->issued = 0;
	498	}
	499
	500	dsched_debug(LOG_INFO, "%d procs competing for disk\n"
	501	"total_budget = %jd (lost bits = %d)\n"
	502	"incomplete tp = %d\n", n, (intmax_t)total_budget,
	503	lost_bits, dpriv->incomplete_tp);
	504
	505	if (n == 0)
	506	continue;
	507
	508	sum = 0;
	509
	510	for (i = 0; i < FQ_PRIO_MAX+1; i++) {
	511	if (budget[i] == 0)
	512	continue;
	513	sum += (FQ_PRIO_BIAS+i)*budget[i];
	514	}
	515
	516	if (sum == 0)
	517	sum = 1;
	518
	519	dsched_debug(LOG_INFO, "sum = %d\n", sum);
	520
	521	for (i = 0; i < FQ_PRIO_MAX+1; i++) {
	522	if (budget[i] == 0)
	523	continue;
	524
	525	/*
	526	* XXX: if we still overflow here, we really need to switch to
	527	* some more advanced mechanism such as compound int128 or
	528	* storing the lost bits so they can be used in the
	529	* fq_balance_self.
	530	*/
	531	dpriv->budgetpb[i] = ((FQ_PRIO_BIAS+i)*total_budget/sum) << lost_bits;
	532	KKASSERT(dpriv->budgetpb[i] >= 0);
	533	}
	534
	535	if (total_budget > dpriv->max_budget)
	536	dpriv->max_budget = total_budget;
	537
	538	dsched_debug(4, "disk is %d%% busy\n", dpriv->disk_busy);
	539	TAILQ_FOREACH(fqp, &dpriv->fq_priv_list, dlink) {
	540	fqp->rebalance = 1;
	541	}
	542
	543	dpriv->prev_full = dpriv->last_full;
	544	dpriv->last_full = (dpriv->disk_busy >= 90)?1:0;
	545	}
	546	}
	547
	548
	549	/*
	550	* fq_balance_self should be called from all sorts of dispatchers. It basically
	551	* offloads some of the heavier calculations on throttling onto the process that
	552	* wants to do I/O instead of doing it in the fq_balance thread.
	553	* - should be called with dpriv lock held
	554	*/
	555	void
	556	fq_balance_self(struct dsched_fq_priv *fqp) {
	557	struct dsched_fq_dpriv *dpriv;
	558
	559	int64_t budget, used_budget;
	560	int64_t avg_latency;
	561	int64_t transactions;
	562
	563	transactions = (int64_t)fqp->s_transactions;
	564	avg_latency = (int64_t)fqp->s_avg_latency;
	565	dpriv = fqp->dpriv;
	566
	567	used_budget = ((int64_t)avg_latency * transactions);
	568	budget = dpriv->budgetpb[(fqp->p) ? fqp->p->p_ionice : 0];
	569
	570	if (used_budget > 0) {
	571	dsched_debug(LOG_INFO,
	572	"info: used_budget = %jd, budget = %jd\n",
	573	(intmax_t)used_budget, budget);
	574	}
	575
	576	if ((used_budget > budget) && (dpriv->disk_busy >= 90)) {
	577	KKASSERT(avg_latency != 0);
	578
	579	fqp->max_tp = budget/(avg_latency);
	580	atomic_add_int(&fq_stats.procs_limited, 1);
	581
	582	dsched_debug(LOG_INFO,
	583	"rate limited to %d transactions\n", fqp->max_tp);
	584
	585	} else if (((used_budget*2 < budget) \|\| (dpriv->disk_busy < 80)) &&
	586	(!dpriv->prev_full && !dpriv->last_full)) {
	587	fqp->max_tp = 0;
	588	}
	589	}
	590
	591
	592	static int
	593	do_fqstats(SYSCTL_HANDLER_ARGS)
	594	{
	595	return (sysctl_handle_opaque(oidp, &fq_stats, sizeof(struct dsched_fq_stats), req));
	596	}
	597
	598
	599	SYSCTL_PROC(_kern, OID_AUTO, fq_stats, CTLTYPE_OPAQUE\|CTLFLAG_RD,
	600	0, sizeof(struct dsched_fq_stats), do_fqstats, "fq_stats",
	601	"dsched_fq statistics");
	602
	603
	604	static void
	605	fq_init(void)
	606	{
	607
	608	}
	609
	610	static void
	611	fq_uninit(void)
	612	{
	613
	614	}
	615
	616	static void
	617	fq_earlyinit(void)
	618	{
	619	fq_priv_cache = objcache_create("fq-priv-cache", 0, 0,
	620	NULL, NULL, NULL,
	621	objcache_malloc_alloc,
	622	objcache_malloc_free,
	623	&dsched_fq_priv_malloc_args );
	624
	625	fq_mpriv_cache = objcache_create("fq-mpriv-cache", 0, 0,
	626	NULL, NULL, NULL,
	627	objcache_malloc_alloc,
	628	objcache_malloc_free,
	629	&dsched_fq_mpriv_malloc_args );
	630
	631	FQ_GLOBAL_FQMP_LOCKINIT();
	632
	633	fq_dpriv_cache = objcache_create("fq-dpriv-cache", 0, 0,
	634	NULL, NULL, NULL,
	635	objcache_malloc_alloc,
	636	objcache_malloc_free,
	637	&dsched_fq_dpriv_malloc_args );
	638
	639	bzero(&fq_stats, sizeof(struct dsched_fq_stats));
	640
	641	dsched_register(&dsched_fq_ops);
	642	callout_init_mp(&fq_callout);
	643
	644	kprintf("FQ scheduler policy version %d.%d loaded\n",
	645	dsched_fq_version_maj, dsched_fq_version_min);
	646	}
	647
	648	static void
	649	fq_earlyuninit(void)
	650	{
	651	callout_stop(&fq_callout);
	652	callout_deactivate(&fq_callout);
	653	return;
	654	}
	655
	656	SYSINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, fq_init, NULL);
	657	SYSUNINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, fq_uninit, NULL);
	658
	659	SYSINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, fq_earlyinit, NULL);
	660	SYSUNINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, fq_earlyuninit, NULL);