sys/dsched/fq/dsched_fq_core.c

   1 /*
   2  * Copyright (c) 2009, 2010 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Alex Hornung <ahornung@gmail.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 #include <sys/param.h>
  35 #include <sys/systm.h>
  36 #include <sys/kernel.h>
  37 #include <sys/proc.h>
  38 #include <sys/sysctl.h>
  39 #include <sys/buf.h>
  40 #include <sys/conf.h>
  41 #include <sys/diskslice.h>
  42 #include <sys/disk.h>
  43 #include <machine/atomic.h>
  44 #include <sys/malloc.h>
  45 #include <sys/thread.h>
  46 #include <sys/thread2.h>
  47 #include <sys/sysctl.h>
  48 #include <sys/spinlock2.h>
  49 #include <machine/md_var.h>
  50 #include <sys/ctype.h>
  51 #include <sys/syslog.h>
  52 #include <sys/device.h>
  53 #include <sys/msgport.h>
  54 #include <sys/msgport2.h>
  55 #include <sys/buf2.h>
  56 #include <sys/dsched.h>
  57 #include <machine/varargs.h>
  58 #include <machine/param.h>
  59
  60 #include <dsched/fq/dsched_fq.h>
  61
  62 MALLOC_DECLARE(M_DSCHEDFQ);
  63
  64 static int      dsched_fq_version_maj = 0;
  65 static int      dsched_fq_version_min = 7;
  66
  67 struct dsched_fq_stats  fq_stats;
  68
  69 struct objcache_malloc_args dsched_fq_dpriv_malloc_args = {
  70         sizeof(struct dsched_fq_dpriv), M_DSCHEDFQ };
  71 struct objcache_malloc_args dsched_fq_priv_malloc_args = {
  72         sizeof(struct dsched_fq_priv), M_DSCHEDFQ };
  73 struct objcache_malloc_args dsched_fq_mpriv_malloc_args = {
  74         sizeof(struct dsched_fq_mpriv), M_DSCHEDFQ };
  75
  76 static struct objcache  *fq_dpriv_cache;
  77 static struct objcache  *fq_mpriv_cache;
  78 static struct objcache  *fq_priv_cache;
  79
  80 TAILQ_HEAD(, dsched_fq_mpriv)   dsched_fqmp_list =
  81                 TAILQ_HEAD_INITIALIZER(dsched_fqmp_list);
  82
  83 struct spinlock fq_fqmp_lock;
  84 struct callout  fq_callout;
  85
  86 extern struct dsched_ops dsched_fq_ops;
  87
  88 void db_stack_trace_cmd(int addr, boolean_t have_addr, int count,char *modif);
  89 void fq_print_backtrace(void);
  90
  91 void
  92 fq_print_backtrace(void)
  93 {
  94         register_t  ebp;
  95
  96         __asm __volatile("movl %%ebp, %0" : "=r" (ebp));
  97         db_stack_trace_cmd(ebp, 1, 4, NULL);
  98 }
  99
 100 void
 101 fq_reference_dpriv(struct dsched_fq_dpriv *dpriv)
 102 {
 103         int refcount;
 104
 105         refcount = atomic_fetchadd_int(&dpriv->refcount, 1);
 106
 107         KKASSERT(refcount >= 0);
 108 }
 109
 110 void
 111 fq_reference_priv(struct dsched_fq_priv *fqp)
 112 {
 113         int refcount;
 114
 115         refcount = atomic_fetchadd_int(&fqp->refcount, 1);
 116
 117         KKASSERT(refcount >= 0);
 118 }
 119
 120 void
 121 fq_reference_mpriv(struct dsched_fq_mpriv *fqmp)
 122 {
 123         int refcount;
 124
 125         refcount = atomic_fetchadd_int(&fqmp->refcount, 1);
 126
 127         KKASSERT(refcount >= 0);
 128 }
 129
 130 void
 131 fq_dereference_dpriv(struct dsched_fq_dpriv *dpriv)
 132 {
 133         struct dsched_fq_priv   *fqp, *fqp2;
 134         int refcount;
 135
 136         refcount = atomic_fetchadd_int(&dpriv->refcount, -1);
 137
 138
 139         KKASSERT(refcount >= -3);
 140
 141         if (refcount == 1) {
 142                 atomic_subtract_int(&dpriv->refcount, 3); /* mark as: in destruction */
 143 #if 1
 144                 kprintf("dpriv (%p) destruction started, trace:\n", dpriv);
 145                 fq_print_backtrace();
 146 #endif
 147                 spin_lock_wr(&dpriv->lock);
 148                 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
 149                         TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
 150                         fqp->flags &= ~FQP_LINKED_DPRIV;
 151                         fq_dereference_priv(fqp);
 152                 }
 153                 spin_unlock_wr(&dpriv->lock);
 154
 155                 objcache_put(fq_dpriv_cache, dpriv);
 156                 atomic_subtract_int(&fq_stats.dpriv_allocations, 1);
 157         }
 158 }
 159
 160 void
 161 fq_dereference_priv(struct dsched_fq_priv *fqp)
 162 {
 163         struct dsched_fq_mpriv  *fqmp;
 164         struct dsched_fq_dpriv  *dpriv;
 165         int refcount;
 166
 167         refcount = atomic_fetchadd_int(&fqp->refcount, -1);
 168
 169         KKASSERT(refcount >= -3);
 170
 171         if (refcount == 1) {
 172                 atomic_subtract_int(&fqp->refcount, 3); /* mark as: in destruction */
 173 #if 0
 174                 kprintf("fqp (%p) destruction started, trace:\n", fqp);
 175                 fq_print_backtrace();
 176 #endif
 177                 dpriv = fqp->dpriv;
 178                 KKASSERT(dpriv != NULL);
 179
 180                 spin_lock_wr(&fqp->lock);
 181
 182                 KKASSERT(fqp->qlength == 0);
 183
 184                 if (fqp->flags & FQP_LINKED_DPRIV) {
 185                         spin_lock_wr(&dpriv->lock);
 186
 187                         TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
 188                         fqp->flags &= ~FQP_LINKED_DPRIV;
 189
 190                         spin_unlock_wr(&dpriv->lock);
 191                 }
 192
 193                 if (fqp->flags & FQP_LINKED_FQMP) {
 194                         fqmp = fqp->fqmp;
 195                         KKASSERT(fqmp != NULL);
 196
 197                         spin_lock_wr(&fqmp->lock);
 198
 199                         TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
 200                         fqp->flags &= ~FQP_LINKED_FQMP;
 201
 202                         spin_unlock_wr(&fqmp->lock);
 203                 }
 204
 205                 spin_unlock_wr(&fqp->lock);
 206
 207                 objcache_put(fq_priv_cache, fqp);
 208                 atomic_subtract_int(&fq_stats.fqp_allocations, 1);
 209 #if 0
 210                 fq_dereference_dpriv(dpriv);
 211 #endif
 212         }
 213 }
 214
 215 void
 216 fq_dereference_mpriv(struct dsched_fq_mpriv *fqmp)
 217 {
 218         struct dsched_fq_priv   *fqp, *fqp2;
 219         int refcount;
 220
 221         refcount = atomic_fetchadd_int(&fqmp->refcount, -1);
 222
 223         KKASSERT(refcount >= -3);
 224
 225         if (refcount == 1) {
 226                 atomic_subtract_int(&fqmp->refcount, 3); /* mark as: in destruction */
 227 #if 0
 228                 kprintf("fqmp (%p) destruction started, trace:\n", fqmp);
 229                 fq_print_backtrace();
 230 #endif
 231                 FQ_GLOBAL_FQMP_LOCK();
 232                 spin_lock_wr(&fqmp->lock);
 233
 234                 TAILQ_FOREACH_MUTABLE(fqp, &fqmp->fq_priv_list, link, fqp2) {
 235                         TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
 236                         fqp->flags &= ~FQP_LINKED_FQMP;
 237                         fq_dereference_priv(fqp);
 238                 }
 239                 TAILQ_REMOVE(&dsched_fqmp_list, fqmp, link);
 240
 241                 spin_unlock_wr(&fqmp->lock);
 242                 FQ_GLOBAL_FQMP_UNLOCK();
 243
 244                 objcache_put(fq_mpriv_cache, fqmp);
 245                 atomic_subtract_int(&fq_stats.fqmp_allocations, 1);
 246         }
 247 }
 248
 249
 250 struct dsched_fq_priv *
 251 fq_alloc_priv(struct disk *dp)
 252 {
 253         struct dsched_fq_priv   *fqp;
 254 #if 0
 255         fq_reference_dpriv(dsched_get_disk_priv(dp));
 256 #endif
 257         fqp = objcache_get(fq_priv_cache, M_WAITOK);
 258         bzero(fqp, sizeof(struct dsched_fq_priv));
 259
 260         /* XXX: maybe we do need another ref for the disk list for fqp */
 261         fq_reference_priv(fqp);
 262
 263         FQ_FQP_LOCKINIT(fqp);
 264         FQ_FQP_LOCK(fqp);
 265         fqp->dp = dp;
 266
 267         fqp->dpriv = dsched_get_disk_priv(dp);
 268
 269         TAILQ_INIT(&fqp->queue);
 270         TAILQ_INSERT_TAIL(&fqp->dpriv->fq_priv_list, fqp, dlink);
 271         fqp->flags |= FQP_LINKED_DPRIV;
 272
 273         atomic_add_int(&fq_stats.fqp_allocations, 1);
 274         FQ_FQP_UNLOCK(fqp);
 275         return fqp;
 276 }
 277
 278
 279 struct dsched_fq_dpriv *
 280 fq_alloc_dpriv(struct disk *dp)
 281 {
 282         struct dsched_fq_dpriv *dpriv;
 283
 284         dpriv = objcache_get(fq_dpriv_cache, M_WAITOK);
 285         bzero(dpriv, sizeof(struct dsched_fq_dpriv));
 286         fq_reference_dpriv(dpriv);
 287         dpriv->dp = dp;
 288         dpriv->avg_rq_time = 0;
 289         dpriv->incomplete_tp = 0;
 290         FQ_DPRIV_LOCKINIT(dpriv);
 291         TAILQ_INIT(&dpriv->fq_priv_list);
 292
 293         atomic_add_int(&fq_stats.dpriv_allocations, 1);
 294         return dpriv;
 295 }
 296
 297
 298 struct dsched_fq_mpriv *
 299 fq_alloc_mpriv(struct proc *p)
 300 {
 301         struct dsched_fq_mpriv  *fqmp;
 302         struct dsched_fq_priv   *fqp;
 303         struct disk     *dp = NULL;
 304
 305         fqmp = objcache_get(fq_mpriv_cache, M_WAITOK);
 306         bzero(fqmp, sizeof(struct dsched_fq_mpriv));
 307         fq_reference_mpriv(fqmp);
 308 #if 0
 309         kprintf("fq_alloc_mpriv, new fqmp = %p\n", fqmp);
 310 #endif
 311         FQ_FQMP_LOCKINIT(fqmp);
 312         FQ_FQMP_LOCK(fqmp);
 313         TAILQ_INIT(&fqmp->fq_priv_list);
 314
 315         while ((dp = dsched_disk_enumerate(dp, &dsched_fq_ops))) {
 316                 fqp = fq_alloc_priv(dp);
 317                 fqp->p = p;
 318 #if 0
 319                 fq_reference_priv(fqp);
 320 #endif
 321                 fqp->fqmp = fqmp;
 322                 TAILQ_INSERT_TAIL(&fqmp->fq_priv_list, fqp, link);
 323                 fqp->flags |= FQP_LINKED_FQMP;
 324         }
 325
 326         FQ_GLOBAL_FQMP_LOCK();
 327         TAILQ_INSERT_TAIL(&dsched_fqmp_list, fqmp, link);
 328         FQ_GLOBAL_FQMP_UNLOCK();
 329         FQ_FQMP_UNLOCK(fqmp);
 330
 331         atomic_add_int(&fq_stats.fqmp_allocations, 1);
 332         return fqmp;
 333 }
 334
 335
 336 void
 337 fq_dispatcher(struct dsched_fq_dpriv *dpriv)
 338 {
 339         struct dsched_fq_mpriv  *fqmp;
 340         struct dsched_fq_priv   *fqp, *fqp2;
 341         struct bio *bio, *bio2;
 342         int idle;
 343
 344         /*
 345          * We need to manually assign an fqp to the fqmp of this thread
 346          * since it isn't assigned one during fq_prepare, as the disk
 347          * is not set up yet.
 348          */
 349         fqmp = dsched_get_thread_priv(curthread);
 350         KKASSERT(fqmp != NULL);
 351
 352         fqp = fq_alloc_priv(dpriv->dp);
 353         FQ_FQMP_LOCK(fqmp);
 354 #if 0
 355         fq_reference_priv(fqp);
 356 #endif
 357         TAILQ_INSERT_TAIL(&fqmp->fq_priv_list, fqp, link);
 358         FQ_FQMP_UNLOCK(fqmp);
 359
 360
 361         FQ_DPRIV_LOCK(dpriv);
 362         for(;;) {
 363                 idle = 0;
 364                 /* sleep ~60 ms */
 365                 if ((ssleep(dpriv, &dpriv->lock, 0, "fq_dispatcher", hz/15) == 0)) {
 366                         /*
 367                          * We've been woken up; this either means that we are
 368                          * supposed to die away nicely or that the disk is idle.
 369                          */
 370
 371                         if (dpriv->die == 1) {
 372                                 FQ_DPRIV_UNLOCK(dpriv);
 373                                 kprintf("fq_dispatcher is peacefully dying\n");
 374                                 lwkt_exit();
 375                                 /* NOTREACHED */
 376                         }
 377
 378                         /*
 379                          * We have been awakened because the disk is idle.
 380                          * So let's get ready to dispatch some extra bios.
 381                          */
 382                         idle = 1;
 383                 }
 384
 385                 /* Maybe the disk is idle and we just didn't get the wakeup */
 386                 if (idle == 0)
 387                         idle = dpriv->idle;
 388
 389                 /*
 390                  * XXX: further room for improvements here. It would be better
 391                  *      to dispatch a few requests from each fqp as to ensure
 392                  *      real fairness.
 393                  */
 394                 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
 395                         if (fqp->qlength == 0)
 396                                 continue;
 397
 398                         FQ_FQP_LOCK(fqp);
 399
 400                         /*
 401                          * XXX: why 5 extra? should probably be dynamic,
 402                          *      relying on information on latency.
 403                          */
 404                         if ((fqp->max_tp > 0) && idle &&
 405                             (fqp->issued >= fqp->max_tp)) {
 406                                 fqp->max_tp += 5;
 407                                 ++fqp->idle_generation;
 408                         }
 409
 410                         TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
 411                                 if ((fqp->max_tp > 0) &&
 412                                     ((fqp->issued >= fqp->max_tp)))
 413                                         break;
 414
 415                                 TAILQ_REMOVE(&fqp->queue, bio, link);
 416                                 --fqp->qlength;
 417
 418                                 /*
 419                                  * beware that we do have an fqp reference
 420                                  * from the queueing
 421                                  */
 422                                 fq_dispatch(dpriv, bio, fqp);
 423                         }
 424                         FQ_FQP_UNLOCK(fqp);
 425
 426                 }
 427         }
 428 }
 429
 430 void
 431 fq_balance_thread(struct dsched_fq_dpriv *dpriv)
 432 {
 433         struct  dsched_fq_priv  *fqp, *fqp2;
 434         int     n = 0;
 435         static int last_full = 0, prev_full = 0;
 436         static int limited_procs = 0;
 437         int     incomplete_tp;
 438         int     disk_busy;
 439         int64_t budget, total_budget, used_budget;
 440         int64_t budgetpb[FQ_PRIO_MAX+1];
 441         int sum, i;
 442
 443         bzero(budgetpb, sizeof(budgetpb));
 444         total_budget = 0;
 445
 446         FQ_DPRIV_LOCK(dpriv);
 447         disk_busy = (100*(FQ_TOTAL_DISK_TIME - dpriv->idle_time)) /
 448             FQ_TOTAL_DISK_TIME;
 449         if (disk_busy < 0)
 450                 disk_busy = 0;
 451         incomplete_tp = dpriv->incomplete_tp;
 452         dpriv->idle_time = 0;
 453
 454         TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
 455                 if (fqp->transactions > 0 /* 30 */) {
 456
 457                         total_budget += (fqp->avg_latency * fqp->transactions);
 458                         /*
 459                          * XXX: while the code below really sucked, the problem needs to
 460                          *      be addressed eventually. Some processes take up their "fair"
 461                          *      slice, but don't really need even a 10th of it.
 462                          *      This kills performance for those that do need the
 463                          *      performance.
 464                          */
 465 #if 0
 466                         /*
 467                          * This is *very* hackish. It basically tries to avoid that
 468                          * processes that do only very few tps take away more bandwidth
 469                          * than they should.
 470                          */
 471                         if ((limited_procs >= 1) && (fqp->transactions < 25) &&
 472                             (budgetpb[(fqp->p) ? fqp->p->p_ionice : 0] >= 1))
 473                                 continue;
 474 #endif
 475
 476                         ++budgetpb[(fqp->p) ? fqp->p->p_ionice : 0];
 477
 478                         dsched_debug(LOG_INFO,
 479                             "%d) avg_latency = %d, transactions = %d, ioprio = %d\n",
 480                             n, fqp->avg_latency, fqp->transactions,
 481                             (fqp->p) ? fqp->p->p_ionice : 0);
 482                         ++n;
 483                 } else {
 484                         fqp->max_tp = 0;
 485                         fqp->avg_latency = 0;
 486                 }
 487         }
 488
 489         dsched_debug(LOG_INFO, "%d procs competing for disk\n"
 490             "total_budget = %lld\n"
 491             "incomplete tp = %d\n", n, total_budget, incomplete_tp);
 492
 493         if (n == 0)
 494                 goto done;
 495
 496         sum = 0;
 497         for (i = 0; i < FQ_PRIO_MAX+1; i++) {
 498                 if (budgetpb[i] == 0)
 499                         continue;
 500                 sum += (FQ_PRIO_BIAS+i)*budgetpb[i];
 501         }
 502         if (sum == 0)
 503                 sum = 1;
 504         dsched_debug(LOG_INFO, "sum = %d\n", sum);
 505
 506         for (i = 0; i < FQ_PRIO_MAX+1; i++) {
 507                 if (budgetpb[i] == 0)
 508                         continue;
 509
 510                 budgetpb[i] = ((FQ_PRIO_BIAS+i)*10)*total_budget/sum;
 511         }
 512
 513         if (total_budget > dpriv->max_budget)
 514                 dpriv->max_budget = total_budget;
 515
 516         limited_procs = 0;
 517
 518         dsched_debug(4, "disk is %d\% busy\n", disk_busy);
 519
 520         /*
 521          * XXX: eventually remove all the silly *10...
 522          */
 523         TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
 524                 budget = budgetpb[(fqp->p) ? fqp->p->p_ionice : 0];
 525
 526                 used_budget = ((int64_t)10*(int64_t)fqp->avg_latency *
 527                     (int64_t)fqp->transactions);
 528                 if (used_budget > 0) {
 529                         dsched_debug(LOG_INFO,
 530                             "info: used_budget = %lld, budget = %lld\n", used_budget,
 531                             budget);
 532                 }
 533
 534                 /*
 535                  * process is exceeding its fair share; rate-limit it, but only
 536                  * if the disk is being used at 90+% of capacity
 537                  */
 538                 if ((used_budget > budget) && (disk_busy >= 90) &&
 539                     (incomplete_tp > n*2)) {
 540                         KKASSERT(fqp->avg_latency != 0);
 541
 542                         fqp->max_tp = budget/(10*fqp->avg_latency);
 543                         ++limited_procs;
 544                         dsched_debug(LOG_INFO,
 545                             "rate limited to %d transactions\n", fqp->max_tp);
 546                         atomic_add_int(&fq_stats.procs_limited, 1);
 547                 } else if (((used_budget*2 < budget) || (disk_busy < 90)) &&
 548                     (!prev_full && !last_full)) {
 549                         /*
 550                          * process is really using little of its timeslice, or the
 551                          * disk is not busy, so let's reset the rate-limit.
 552                          * Without this, exceeding processes will get an unlimited
 553                          * slice every other slice.
 554                          * XXX: this still doesn't quite fix the issue, but maybe
 555                          * it's good that way, so that heavy writes are interleaved.
 556                          */
 557                         fqp->max_tp = 0;
 558                 }
 559                 fqp->transactions = 0;
 560                 fqp->avg_latency = 0;
 561                 fqp->issued = 0;
 562         }
 563
 564         prev_full = last_full;
 565         last_full = (disk_busy >= 90)?1:0;
 566
 567 done:
 568         FQ_DPRIV_UNLOCK(dpriv);
 569         callout_reset(&fq_callout, hz * FQ_REBALANCE_TIMEOUT,
 570             (void (*)(void *))fq_balance_thread, dpriv);
 571 }
 572
 573
 574 static int
 575 do_fqstats(SYSCTL_HANDLER_ARGS)
 576 {
 577         return (sysctl_handle_opaque(oidp, &fq_stats, sizeof(struct dsched_fq_stats), req));
 578 }
 579
 580
 581 SYSCTL_PROC(_kern, OID_AUTO, fq_stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
 582     0, sizeof(struct dsched_fq_stats), do_fqstats, "fq_stats",
 583     "dsched_fq statistics");
 584
 585
 586
 587
 588 static void
 589 fq_init(void)
 590 {
 591
 592 }
 593
 594 static void
 595 fq_uninit(void)
 596 {
 597
 598 }
 599
 600 static void
 601 fq_earlyinit(void)
 602 {
 603         fq_priv_cache = objcache_create("fq-priv-cache", 0, 0,
 604                                            NULL, NULL, NULL,
 605                                            objcache_malloc_alloc,
 606                                            objcache_malloc_free,
 607                                            &dsched_fq_priv_malloc_args );
 608
 609         fq_mpriv_cache = objcache_create("fq-mpriv-cache", 0, 0,
 610                                            NULL, NULL, NULL,
 611                                            objcache_malloc_alloc,
 612                                            objcache_malloc_free,
 613                                            &dsched_fq_mpriv_malloc_args );
 614
 615         FQ_GLOBAL_FQMP_LOCKINIT();
 616
 617         fq_dpriv_cache = objcache_create("fq-dpriv-cache", 0, 0,
 618                                            NULL, NULL, NULL,
 619                                            objcache_malloc_alloc,
 620                                            objcache_malloc_free,
 621                                            &dsched_fq_dpriv_malloc_args );
 622
 623         bzero(&fq_stats, sizeof(struct dsched_fq_stats));
 624
 625         dsched_register(&dsched_fq_ops);
 626         callout_init_mp(&fq_callout);
 627
 628         kprintf("FQ scheduler policy version %d.%d loaded\n",
 629             dsched_fq_version_maj, dsched_fq_version_min);
 630 }
 631
 632 static void
 633 fq_earlyuninit(void)
 634 {
 635         callout_stop(&fq_callout);
 636         callout_deactivate(&fq_callout);
 637         return;
 638 }
 639
 640 SYSINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, fq_init, NULL);
 641 SYSUNINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, fq_uninit, NULL);
 642
 643 SYSINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, fq_earlyinit, NULL);
 644 SYSUNINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, fq_earlyuninit, NULL);