76a2f53180b49c65efbf2c55cbd300d0da7dd089
[dragonfly.git] / sys / dsched / fq / dsched_fq_core.c
1 /*
2  * Copyright (c) 2009, 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Alex Hornung <ahornung@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/diskslice.h>
42 #include <sys/disk.h>
43 #include <machine/atomic.h>
44 #include <sys/malloc.h>
45 #include <sys/thread.h>
46 #include <sys/thread2.h>
47 #include <sys/sysctl.h>
48 #include <sys/spinlock2.h>
49 #include <machine/md_var.h>
50 #include <sys/ctype.h>
51 #include <sys/syslog.h>
52 #include <sys/device.h>
53 #include <sys/msgport.h>
54 #include <sys/msgport2.h>
55 #include <sys/buf2.h>
56 #include <sys/dsched.h>
57 #include <machine/varargs.h>
58 #include <machine/param.h>
59
60 #include <dsched/fq/dsched_fq.h>
61
62 MALLOC_DECLARE(M_DSCHEDFQ);
63
64 static int      dsched_fq_version_maj = 0;
65 static int      dsched_fq_version_min = 8;
66
67 struct dsched_fq_stats  fq_stats;
68
69 struct objcache_malloc_args dsched_fq_dpriv_malloc_args = {
70         sizeof(struct dsched_fq_dpriv), M_DSCHEDFQ };
71 struct objcache_malloc_args dsched_fq_priv_malloc_args = {
72         sizeof(struct dsched_fq_priv), M_DSCHEDFQ };
73 struct objcache_malloc_args dsched_fq_mpriv_malloc_args = {
74         sizeof(struct dsched_fq_mpriv), M_DSCHEDFQ };
75
76 static struct objcache  *fq_dpriv_cache;
77 static struct objcache  *fq_mpriv_cache;
78 static struct objcache  *fq_priv_cache;
79
80 TAILQ_HEAD(, dsched_fq_mpriv)   dsched_fqmp_list =
81                 TAILQ_HEAD_INITIALIZER(dsched_fqmp_list);
82
83 struct spinlock fq_fqmp_lock;
84 struct callout  fq_callout;
85
86 extern struct dsched_ops dsched_fq_ops;
87
88 void
89 fq_reference_dpriv(struct dsched_fq_dpriv *dpriv)
90 {
91         int refcount;
92
93         refcount = atomic_fetchadd_int(&dpriv->refcount, 1);
94
95         KKASSERT(refcount >= 0);
96 }
97
98 void
99 fq_reference_priv(struct dsched_fq_priv *fqp)
100 {
101         int refcount;
102
103         refcount = atomic_fetchadd_int(&fqp->refcount, 1);
104
105         KKASSERT(refcount >= 0);
106 }
107
108 void
109 fq_reference_mpriv(struct dsched_fq_mpriv *fqmp)
110 {
111         int refcount;
112
113         refcount = atomic_fetchadd_int(&fqmp->refcount, 1);
114
115         KKASSERT(refcount >= 0);
116 }
117
118 void
119 fq_dereference_dpriv(struct dsched_fq_dpriv *dpriv)
120 {
121         struct dsched_fq_priv   *fqp, *fqp2;
122         int refcount;
123
124         refcount = atomic_fetchadd_int(&dpriv->refcount, -1);
125
126
127         KKASSERT(refcount >= 0 || refcount <= -0x400);
128
129         if (refcount == 1) {
130                 atomic_subtract_int(&dpriv->refcount, 0x400); /* mark as: in destruction */
131 #if 1
132                 kprintf("dpriv (%p) destruction started, trace:\n", dpriv);
133                 print_backtrace(4);
134 #endif
135                 spin_lock_wr(&dpriv->lock);
136                 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
137                         TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
138                         fqp->flags &= ~FQP_LINKED_DPRIV;
139                         fq_dereference_priv(fqp);
140                 }
141                 spin_unlock_wr(&dpriv->lock);
142
143                 objcache_put(fq_dpriv_cache, dpriv);
144                 atomic_subtract_int(&fq_stats.dpriv_allocations, 1);
145         }
146 }
147
148 void
149 fq_dereference_priv(struct dsched_fq_priv *fqp)
150 {
151         struct dsched_fq_mpriv  *fqmp;
152         struct dsched_fq_dpriv  *dpriv;
153         int refcount;
154
155         refcount = atomic_fetchadd_int(&fqp->refcount, -1);
156
157         KKASSERT(refcount >= 0 || refcount <= -0x400);
158
159         if (refcount == 1) {
160                 atomic_subtract_int(&fqp->refcount, 0x400); /* mark as: in destruction */
161 #if 0
162                 kprintf("fqp (%p) destruction started, trace:\n", fqp);
163                 print_backtrace(8);
164 #endif
165                 dpriv = fqp->dpriv;
166                 KKASSERT(dpriv != NULL);
167
168                 spin_lock_wr(&fqp->lock);
169
170                 KKASSERT(fqp->qlength == 0);
171
172                 if (fqp->flags & FQP_LINKED_DPRIV) {
173                         spin_lock_wr(&dpriv->lock);
174
175                         TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
176                         fqp->flags &= ~FQP_LINKED_DPRIV;
177
178                         spin_unlock_wr(&dpriv->lock);
179                 }
180
181                 if (fqp->flags & FQP_LINKED_FQMP) {
182                         fqmp = fqp->fqmp;
183                         KKASSERT(fqmp != NULL);
184
185                         spin_lock_wr(&fqmp->lock);
186
187                         TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
188                         fqp->flags &= ~FQP_LINKED_FQMP;
189
190                         spin_unlock_wr(&fqmp->lock);
191                 }
192
193                 spin_unlock_wr(&fqp->lock);
194
195                 objcache_put(fq_priv_cache, fqp);
196                 atomic_subtract_int(&fq_stats.fqp_allocations, 1);
197 #if 0
198                 fq_dereference_dpriv(dpriv);
199 #endif
200         }
201 }
202
203 void
204 fq_dereference_mpriv(struct dsched_fq_mpriv *fqmp)
205 {
206         struct dsched_fq_priv   *fqp, *fqp2;
207         int refcount;
208
209         refcount = atomic_fetchadd_int(&fqmp->refcount, -1);
210
211         KKASSERT(refcount >= 0 || refcount <= -0x400);
212
213         if (refcount == 1) {
214                 atomic_subtract_int(&fqmp->refcount, 0x400); /* mark as: in destruction */
215 #if 0
216                 kprintf("fqmp (%p) destruction started, trace:\n", fqmp);
217                 print_backtrace(8);
218 #endif
219                 FQ_GLOBAL_FQMP_LOCK();
220                 spin_lock_wr(&fqmp->lock);
221
222                 TAILQ_FOREACH_MUTABLE(fqp, &fqmp->fq_priv_list, link, fqp2) {
223                         TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
224                         fqp->flags &= ~FQP_LINKED_FQMP;
225                         fq_dereference_priv(fqp);
226                 }
227                 TAILQ_REMOVE(&dsched_fqmp_list, fqmp, link);
228
229                 spin_unlock_wr(&fqmp->lock);
230                 FQ_GLOBAL_FQMP_UNLOCK();
231
232                 objcache_put(fq_mpriv_cache, fqmp);
233                 atomic_subtract_int(&fq_stats.fqmp_allocations, 1);
234         }
235 }
236
237
238 struct dsched_fq_priv *
239 fq_alloc_priv(struct disk *dp, struct dsched_fq_mpriv *fqmp)
240 {
241         struct dsched_fq_priv   *fqp;
242 #if 0
243         fq_reference_dpriv(dsched_get_disk_priv(dp));
244 #endif
245         fqp = objcache_get(fq_priv_cache, M_WAITOK);
246         bzero(fqp, sizeof(struct dsched_fq_priv));
247
248         /* XXX: maybe we do need another ref for the disk list for fqp */
249         fq_reference_priv(fqp);
250
251         FQ_FQP_LOCKINIT(fqp);
252         FQ_FQP_LOCK(fqp);
253         fqp->dp = dp;
254
255         fqp->dpriv = dsched_get_disk_priv(dp);
256
257         if (fqmp) {
258                 fqp->fqmp = fqmp;
259                 fqp->p = fqmp->p;
260
261                 /* Put the fqp in the fqmp list */
262                 FQ_FQMP_LOCK(fqmp);
263                 TAILQ_INSERT_TAIL(&fqmp->fq_priv_list, fqp, link);
264                 FQ_FQMP_UNLOCK(fqmp);
265                 fqp->flags |= FQP_LINKED_FQMP;
266         }
267
268         TAILQ_INIT(&fqp->queue);
269         TAILQ_INSERT_TAIL(&fqp->dpriv->fq_priv_list, fqp, dlink);
270         fqp->flags |= FQP_LINKED_DPRIV;
271
272         atomic_add_int(&fq_stats.fqp_allocations, 1);
273         FQ_FQP_UNLOCK(fqp);
274         return fqp;
275 }
276
277
278 struct dsched_fq_dpriv *
279 fq_alloc_dpriv(struct disk *dp)
280 {
281         struct dsched_fq_dpriv *dpriv;
282
283         dpriv = objcache_get(fq_dpriv_cache, M_WAITOK);
284         bzero(dpriv, sizeof(struct dsched_fq_dpriv));
285         fq_reference_dpriv(dpriv);
286         dpriv->dp = dp;
287         dpriv->avg_rq_time = 0;
288         dpriv->incomplete_tp = 0;
289         FQ_DPRIV_LOCKINIT(dpriv);
290         TAILQ_INIT(&dpriv->fq_priv_list);
291
292         atomic_add_int(&fq_stats.dpriv_allocations, 1);
293         return dpriv;
294 }
295
296
297 struct dsched_fq_mpriv *
298 fq_alloc_mpriv(struct proc *p)
299 {
300         struct dsched_fq_mpriv  *fqmp;
301         struct dsched_fq_priv   *fqp;
302         struct disk     *dp = NULL;
303
304         fqmp = objcache_get(fq_mpriv_cache, M_WAITOK);
305         bzero(fqmp, sizeof(struct dsched_fq_mpriv));
306         fq_reference_mpriv(fqmp);
307 #if 0
308         kprintf("fq_alloc_mpriv, new fqmp = %p\n", fqmp);
309 #endif
310         FQ_FQMP_LOCKINIT(fqmp);
311         TAILQ_INIT(&fqmp->fq_priv_list);
312         fqmp->p = p;
313
314         while ((dp = dsched_disk_enumerate(dp, &dsched_fq_ops))) {
315                 fqp = fq_alloc_priv(dp, fqmp);
316 #if 0
317                 fq_reference_priv(fqp);
318 #endif
319         }
320
321         FQ_GLOBAL_FQMP_LOCK();
322         TAILQ_INSERT_TAIL(&dsched_fqmp_list, fqmp, link);
323         FQ_GLOBAL_FQMP_UNLOCK();
324
325         atomic_add_int(&fq_stats.fqmp_allocations, 1);
326         return fqmp;
327 }
328
329
330 void
331 fq_dispatcher(struct dsched_fq_dpriv *dpriv)
332 {
333         struct dsched_fq_mpriv  *fqmp;
334         struct dsched_fq_priv   *fqp, *fqp2;
335         struct bio *bio, *bio2;
336         int idle;
337
338         /*
339          * We need to manually assign an fqp to the fqmp of this thread
340          * since it isn't assigned one during fq_prepare, as the disk
341          * is not set up yet.
342          */
343         fqmp = dsched_get_thread_priv(curthread);
344         KKASSERT(fqmp != NULL);
345
346         fqp = fq_alloc_priv(dpriv->dp, fqmp);
347 #if 0
348         fq_reference_priv(fqp);
349 #endif
350
351         FQ_DPRIV_LOCK(dpriv);
352         for(;;) {
353                 idle = 0;
354                 /* sleep ~60 ms */
355                 if ((ssleep(dpriv, &dpriv->lock, 0, "fq_dispatcher", hz/15) == 0)) {
356                         /*
357                          * We've been woken up; this either means that we are
358                          * supposed to die away nicely or that the disk is idle.
359                          */
360
361                         if (__predict_false(dpriv->die == 1)) {
362                                 /* If we are supposed to die, drain all queues */
363                                 fq_drain(dpriv, FQ_DRAIN_FLUSH);
364
365                                 /* Now we can safely unlock and exit */
366                                 FQ_DPRIV_UNLOCK(dpriv);
367                                 kprintf("fq_dispatcher is peacefully dying\n");
368                                 lwkt_exit();
369                                 /* NOTREACHED */
370                         }
371
372                         /*
373                          * We have been awakened because the disk is idle.
374                          * So let's get ready to dispatch some extra bios.
375                          */
376                         idle = 1;
377                 }
378
379                 /* Maybe the disk is idle and we just didn't get the wakeup */
380                 if (idle == 0)
381                         idle = dpriv->idle;
382
383                 /*
384                  * XXX: further room for improvements here. It would be better
385                  *      to dispatch a few requests from each fqp as to ensure
386                  *      real fairness.
387                  */
388                 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
389                         if (fqp->qlength == 0)
390                                 continue;
391
392                         FQ_FQP_LOCK(fqp);
393                         if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
394                                 fq_balance_self(fqp);
395                         /*
396                          * XXX: why 5 extra? should probably be dynamic,
397                          *      relying on information on latency.
398                          */
399                         if ((fqp->max_tp > 0) && idle &&
400                             (fqp->issued >= fqp->max_tp)) {
401                                 fqp->max_tp += 5;
402                         }
403
404                         TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
405                                 if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
406                                         fq_balance_self(fqp);
407                                 if ((fqp->max_tp > 0) &&
408                                     ((fqp->issued >= fqp->max_tp)))
409                                         break;
410
411                                 TAILQ_REMOVE(&fqp->queue, bio, link);
412                                 --fqp->qlength;
413
414                                 /*
415                                  * beware that we do have an fqp reference
416                                  * from the queueing
417                                  */
418                                 fq_dispatch(dpriv, bio, fqp);
419                         }
420                         FQ_FQP_UNLOCK(fqp);
421
422                 }
423         }
424 }
425
426 void
427 fq_balance_thread(struct dsched_fq_dpriv *dpriv)
428 {
429         struct  dsched_fq_priv  *fqp, *fqp2;
430         static struct timeval old_tv;
431         struct timeval tv;
432         int64_t total_budget, product;
433         int64_t budget[FQ_PRIO_MAX+1];
434         int     n, i, sum, total_disk_time;
435         int     lost_bits;
436
437         getmicrotime(&old_tv);
438
439         FQ_DPRIV_LOCK(dpriv);
440         for (;;) {
441                 /* sleep ~1s */
442                 if ((ssleep(curthread, &dpriv->lock, 0, "fq_balancer", hz/2) == 0)) {
443                         if (__predict_false(dpriv->die)) {
444                                 FQ_DPRIV_UNLOCK(dpriv);
445                                 lwkt_exit();
446                         }
447                 }
448
449                 bzero(budget, sizeof(budget));
450                 total_budget = 0;
451                 n = 0;
452
453                 getmicrotime(&tv);
454
455                 total_disk_time = (int)(1000000*((tv.tv_sec - old_tv.tv_sec)) +
456                     (tv.tv_usec - old_tv.tv_usec));
457
458                 if (total_disk_time == 0)
459                         total_disk_time = 1;
460
461                 dsched_debug(LOG_INFO, "total_disk_time = %d\n", total_disk_time);
462
463                 old_tv = tv;
464
465                 dpriv->disk_busy = (100*(total_disk_time - dpriv->idle_time)) / total_disk_time;
466                 if (dpriv->disk_busy < 0)
467                         dpriv->disk_busy = 0;
468
469                 dpriv->idle_time = 0;
470                 lost_bits = 0;
471
472                 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
473                         fqp->s_avg_latency = fqp->avg_latency;
474                         fqp->s_transactions = fqp->transactions;
475                         if (fqp->s_transactions > 0 /* 30 */) {
476                                 product = fqp->s_avg_latency * fqp->s_transactions;
477                                 product >>= lost_bits;
478                                 while(total_budget >= INT64_MAX - product) {
479                                         ++lost_bits;
480                                         product >>= 1;
481                                         total_budget >>= 1;
482                                 }
483                                 total_budget += product;
484                                 ++budget[(fqp->p) ? fqp->p->p_ionice : 0];
485                                 KKASSERT(total_budget >= 0);
486                                 dsched_debug(LOG_INFO,
487                                     "%d) avg_latency = %d, transactions = %d, ioprio = %d\n",
488                                     n, fqp->s_avg_latency, fqp->s_transactions,
489                                     (fqp->p) ? fqp->p->p_ionice : 0);
490                                 ++n;
491                         } else {
492                                 fqp->max_tp = 0;
493                         }
494                         fqp->rebalance = 0;
495                         fqp->transactions = 0;
496                         fqp->avg_latency = 0;
497                         fqp->issued = 0;
498                 }
499
500                 dsched_debug(LOG_INFO, "%d procs competing for disk\n"
501                     "total_budget = %lld (lost bits = %d)\n"
502                     "incomplete tp = %d\n", n, total_budget, lost_bits,
503                     dpriv->incomplete_tp);
504
505                 if (n == 0)
506                         continue;
507
508                 sum = 0;
509
510                 for (i = 0; i < FQ_PRIO_MAX+1; i++) {
511                         if (budget[i] == 0)
512                                 continue;
513                         sum += (FQ_PRIO_BIAS+i)*budget[i];
514                 }
515
516                 if (sum == 0)
517                         sum = 1;
518
519                 dsched_debug(LOG_INFO, "sum = %d\n", sum);
520
521                 for (i = 0; i < FQ_PRIO_MAX+1; i++) {
522                         if (budget[i] == 0)
523                                 continue;
524
525                         /*
526                          * XXX: if we still overflow here, we really need to switch to
527                          *      some more advanced mechanism such as compound int128 or
528                          *      storing the lost bits so they can be used in the
529                          *      fq_balance_self.
530                          */
531                         dpriv->budgetpb[i] = ((FQ_PRIO_BIAS+i)*total_budget/sum) << lost_bits;
532                         KKASSERT(dpriv->budgetpb[i] >= 0);
533                 }
534
535                 if (total_budget > dpriv->max_budget)
536                         dpriv->max_budget = total_budget;
537
538                 dsched_debug(4, "disk is %d\% busy\n", dpriv->disk_busy);
539                 TAILQ_FOREACH(fqp, &dpriv->fq_priv_list, dlink) {
540                         fqp->rebalance = 1;
541                 }
542
543                 dpriv->prev_full = dpriv->last_full;
544                 dpriv->last_full = (dpriv->disk_busy >= 90)?1:0;
545         }
546 }
547
548
549 /*
550  * fq_balance_self should be called from all sorts of dispatchers. It basically
551  * offloads some of the heavier calculations on throttling onto the process that
552  * wants to do I/O instead of doing it in the fq_balance thread.
553  * - should be called with dpriv lock held
554  */
555 void
556 fq_balance_self(struct dsched_fq_priv *fqp) {
557         struct dsched_fq_dpriv *dpriv;
558
559         int64_t budget, used_budget;
560         int64_t avg_latency;
561         int64_t transactions;
562
563         transactions = (int64_t)fqp->s_transactions;
564         avg_latency = (int64_t)fqp->s_avg_latency;
565         dpriv = fqp->dpriv;
566
567         used_budget = ((int64_t)avg_latency * transactions);
568         budget = dpriv->budgetpb[(fqp->p) ? fqp->p->p_ionice : 0];
569
570         if (used_budget > 0) {
571                 dsched_debug(LOG_INFO,
572                     "info: used_budget = %lld, budget = %lld\n", used_budget,
573                     budget);
574         }
575
576         if ((used_budget > budget) && (dpriv->disk_busy >= 90)) {
577                 KKASSERT(avg_latency != 0);
578
579                 fqp->max_tp = budget/(avg_latency);
580                 atomic_add_int(&fq_stats.procs_limited, 1);
581
582                 dsched_debug(LOG_INFO,
583                     "rate limited to %d transactions\n", fqp->max_tp);
584
585         } else if (((used_budget*2 < budget) || (dpriv->disk_busy < 80)) &&
586             (!dpriv->prev_full && !dpriv->last_full)) {
587                 fqp->max_tp = 0;
588         }
589 }
590
591
592 static int
593 do_fqstats(SYSCTL_HANDLER_ARGS)
594 {
595         return (sysctl_handle_opaque(oidp, &fq_stats, sizeof(struct dsched_fq_stats), req));
596 }
597
598
599 SYSCTL_PROC(_kern, OID_AUTO, fq_stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
600     0, sizeof(struct dsched_fq_stats), do_fqstats, "fq_stats",
601     "dsched_fq statistics");
602
603
604 static void
605 fq_init(void)
606 {
607
608 }
609
610 static void
611 fq_uninit(void)
612 {
613
614 }
615
616 static void
617 fq_earlyinit(void)
618 {
619         fq_priv_cache = objcache_create("fq-priv-cache", 0, 0,
620                                            NULL, NULL, NULL,
621                                            objcache_malloc_alloc,
622                                            objcache_malloc_free,
623                                            &dsched_fq_priv_malloc_args );
624
625         fq_mpriv_cache = objcache_create("fq-mpriv-cache", 0, 0,
626                                            NULL, NULL, NULL,
627                                            objcache_malloc_alloc,
628                                            objcache_malloc_free,
629                                            &dsched_fq_mpriv_malloc_args );
630
631         FQ_GLOBAL_FQMP_LOCKINIT();
632
633         fq_dpriv_cache = objcache_create("fq-dpriv-cache", 0, 0,
634                                            NULL, NULL, NULL,
635                                            objcache_malloc_alloc,
636                                            objcache_malloc_free,
637                                            &dsched_fq_dpriv_malloc_args );
638
639         bzero(&fq_stats, sizeof(struct dsched_fq_stats));
640
641         dsched_register(&dsched_fq_ops);
642         callout_init_mp(&fq_callout);
643
644         kprintf("FQ scheduler policy version %d.%d loaded\n",
645             dsched_fq_version_maj, dsched_fq_version_min);
646 }
647
648 static void
649 fq_earlyuninit(void)
650 {
651         callout_stop(&fq_callout);
652         callout_deactivate(&fq_callout);
653         return;
654 }
655
656 SYSINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, fq_init, NULL);
657 SYSUNINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, fq_uninit, NULL);
658
659 SYSINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, fq_earlyinit, NULL);
660 SYSUNINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, fq_earlyuninit, NULL);