dsched, dsched_fq - Major cleanup
authorAlex Hornung <ahornung@gmail.com>
Mon, 19 Apr 2010 18:24:34 +0000 (18:24 +0000)
committerAlex Hornung <ahornung@gmail.com>
Mon, 19 Apr 2010 19:01:34 +0000 (19:01 +0000)
SHORT version: major cleanup and rename to useful names

LONG version:

dsched:
* Unify dsched_ops with dsched_policy, remove fallout

* Rename dsched_{create,destroy} -> dsched_disk_{create,destroy}_callback

* Kill .head in dsched_policy

* Kill dead code

dsched_fq:
* Rename fqp -> thread_io/tdio
* Rename fqmp -> thread_ctx/tdctx
* Rename dpriv -> disk_ctx/diskctx

* Several related renames of functions (alloc/ref/unref).

* Remove dead code

* rename tdctx->s_* -> tdctx->interval_*

* comment struct members

* ... and some more changes I probably forgot

Huge-Thanks-To: Aggelos Economopoulos

sys/conf/files
sys/dsched/fq/dsched_fq.h
sys/dsched/fq/dsched_fq_core.c
sys/dsched/fq/dsched_fq_diskops.c
sys/dsched/fq/dsched_fq_procops.c
sys/kern/kern_dsched.c [moved from sys/kern/subr_dsched.c with 81% similarity]
sys/kern/subr_disk.c
sys/sys/disk.h
sys/sys/dsched.h
test/dsched_fq/fqstats.c

index 9c721ff..e3994d4 100644 (file)
@@ -707,6 +707,7 @@ kern/lwkt_token.c   standard
 kern/lwkt_msgport.c    standard
 kern/lwkt_serialize.c  standard
 kern/lwkt_caps.c       standard
+kern/kern_dsched.c     standard
 kern/kern_sensors.c    standard
 kern/kern_spinlock.c   standard
 kern/kern_synch.c      standard
@@ -722,7 +723,6 @@ kern/subr_autoconf.c        standard
 kern/subr_bus.c                standard
 kern/subr_busdma.c     standard
 kern/subr_devstat.c    standard
-kern/subr_dsched.c     standard
 dsched/fq/dsched_fq_core.c     standard
 dsched/fq/dsched_fq_diskops.c  standard
 dsched/fq/dsched_fq_procops.c  standard
index fb30faa..6b31c87 100644 (file)
 #include <sys/spinlock.h>
 #endif
 
-#define        FQ_FQP_LOCKINIT(x)      lockinit(&(x)->lock, "fqpbioq", 0, LK_CANRECURSE)
-#define        FQ_FQP_LOCK(x)          fq_reference_priv((x)); \
-                               lockmgr(&(x)->lock, LK_EXCLUSIVE)
-#define        FQ_FQP_UNLOCK(x)        lockmgr(&(x)->lock, LK_RELEASE); \
-                               fq_dereference_priv((x));
+#define        FQ_THREAD_IO_LOCKINIT(x)        lockinit(&(x)->lock, "tdiobioq", 0, LK_CANRECURSE)
+#define        FQ_THREAD_IO_LOCK(x)            fq_thread_io_ref((x)); \
+                                       lockmgr(&(x)->lock, LK_EXCLUSIVE)
+#define        FQ_THREAD_IO_UNLOCK(x)          lockmgr(&(x)->lock, LK_RELEASE); \
+                                       fq_thread_io_unref((x));
 
-#define        FQ_DPRIV_LOCKINIT(x)    lockinit(&(x)->lock, "fqpdiskq", 0, LK_CANRECURSE)
-#define        FQ_DPRIV_LOCK(x)        fq_reference_dpriv((x)); \
-                               lockmgr(&(x)->lock, LK_EXCLUSIVE)
-#define        FQ_DPRIV_UNLOCK(x)      lockmgr(&(x)->lock, LK_RELEASE); \
-                               fq_dereference_dpriv((x))
+#define        FQ_DISK_CTX_LOCKINIT(x)         lockinit(&(x)->lock, "tdiodiskq", 0, LK_CANRECURSE)
+#define        FQ_DISK_CTX_LOCK(x)             fq_disk_ctx_ref((x)); \
+                                       lockmgr(&(x)->lock, LK_EXCLUSIVE)
+#define        FQ_DISK_CTX_UNLOCK(x)           lockmgr(&(x)->lock, LK_RELEASE); \
+                                       fq_disk_ctx_unref((x))
+#define FQ_DISK_CTX_LOCK_ASSERT(x)     KKASSERT(lockstatus(&(x)->lock, curthread) == LK_EXCLUSIVE)
 
-#define        FQ_GLOBAL_FQMP_LOCKINIT(x)      lockinit(&fq_fqmp_lock, "fqmpglob", 0, LK_CANRECURSE)
-#define        FQ_GLOBAL_FQMP_LOCK(x)  lockmgr(&fq_fqmp_lock, LK_EXCLUSIVE)
-#define        FQ_GLOBAL_FQMP_UNLOCK(x)        lockmgr(&fq_fqmp_lock, LK_RELEASE)
+#define        FQ_GLOBAL_THREAD_CTX_LOCKINIT(x)        lockinit(&fq_tdctx_lock, "tdctxglob", 0, LK_CANRECURSE)
+#define        FQ_GLOBAL_THREAD_CTX_LOCK(x)            lockmgr(&fq_tdctx_lock, LK_EXCLUSIVE)
+#define        FQ_GLOBAL_THREAD_CTX_UNLOCK(x)  lockmgr(&fq_tdctx_lock, LK_RELEASE)
 
 
 
-#define        FQ_FQMP_LOCKINIT(x)     spin_init(&(x)->lock)
+#define        FQ_THREAD_CTX_LOCKINIT(x)       spin_init(&(x)->lock)
 #if 0
-#define        FQ_FQP_LOCKINIT(x)      spin_init(&(x)->lock)
+#define        FQ_THREAD_IO_LOCKINIT(x)        spin_init(&(x)->lock)
 #endif
 #if 0
-#define        FQ_DPRIV_LOCKINIT(x)    spin_init(&(x)->lock)
+#define        FQ_DISK_CTX_LOCKINIT(x) spin_init(&(x)->lock)
 #endif
 #if 0
-#define        FQ_GLOBAL_FQMP_LOCKINIT(x)      spin_init(&fq_fqmp_lock)
+#define        FQ_GLOBAL_THREAD_CTX_LOCKINIT(x)        spin_init(&fq_tdctx_lock)
 #endif
 
-#define        FQ_FQMP_LOCK(x)         fq_reference_mpriv((x)); \
+#define        FQ_THREAD_CTX_LOCK(x)           fq_thread_ctx_ref((x)); \
                                spin_lock_wr(&(x)->lock)
 #if 0
-#define        FQ_FQP_LOCK(x)          fq_reference_priv((x)); \
+#define        FQ_THREAD_IO_LOCK(x)            fq_thread_io_ref((x)); \
                                spin_lock_wr(&(x)->lock)
 #endif
 #if 0
-#define        FQ_DPRIV_LOCK(x)        fq_reference_dpriv((x)); \
+#define        FQ_DISK_CTX_LOCK(x)     fq_disk_ctx_ref((x)); \
                                spin_lock_wr(&(x)->lock)
 #endif
 #if 0
-#define        FQ_GLOBAL_FQMP_LOCK(x)  spin_lock_wr(&fq_fqmp_lock)
+#define        FQ_GLOBAL_THREAD_CTX_LOCK(x)    spin_lock_wr(&fq_tdctx_lock)
 #endif
 
-#define        FQ_FQMP_UNLOCK(x)       spin_unlock_wr(&(x)->lock); \
-                               fq_dereference_mpriv((x))
+#define        FQ_THREAD_CTX_UNLOCK(x) spin_unlock_wr(&(x)->lock); \
+                               fq_thread_ctx_unref((x))
 
 #if 0
-#define        FQ_FQP_UNLOCK(x)        spin_unlock_wr(&(x)->lock); \
-                               fq_dereference_priv((x))
+#define        FQ_THREAD_IO_UNLOCK(x)  spin_unlock_wr(&(x)->lock); \
+                               fq_thread_io_unref((x))
 #endif
 #if 0
-#define        FQ_DPRIV_UNLOCK(x)      spin_unlock_wr(&(x)->lock); \
-                               fq_dereference_dpriv((x))
+#define        FQ_DISK_CTX_UNLOCK(x)   spin_unlock_wr(&(x)->lock); \
+                               fq_disk_ctx_unref((x))
 #endif
 #if 0
-#define        FQ_GLOBAL_FQMP_UNLOCK(x) spin_unlock_wr(&fq_fqmp_lock)
+#define        FQ_GLOBAL_THREAD_CTX_UNLOCK(x) spin_unlock_wr(&fq_tdctx_lock)
 #endif
 
-#define        FQ_REBALANCE_TIMEOUT    1       /* in seconds */
-#define FQ_TOTAL_DISK_TIME     1000000*FQ_REBALANCE_TIMEOUT    /* in useconds */
-
 #define FQ_PRIO_BIAS           5
 #define FQ_PRIO_MAX            10
 #define FQ_PRIO_MIN            1
 struct disk;
 struct proc;
 
-#define        FQP_LINKED_DPRIV        0x01
-#define        FQP_LINKED_FQMP         0x02
+#define        FQ_LINKED_DISK_CTX      0x01
+#define        FQ_LINKED_THREAD_CTX            0x02
 
-struct dsched_fq_priv {
-       TAILQ_ENTRY(dsched_fq_priv)     link;
-       TAILQ_ENTRY(dsched_fq_priv)     dlink;
-       TAILQ_HEAD(, bio)       queue;
+struct fq_thread_io {
+       TAILQ_ENTRY(fq_thread_io)       link;
+       TAILQ_ENTRY(fq_thread_io)       dlink;
+       TAILQ_HEAD(, bio)       queue;  /* IO queue (bio) */
 
        struct lock             lock;
        struct disk             *dp;
-       struct dsched_fq_dpriv  *dpriv;
-       struct dsched_fq_mpriv  *fqmp;
+       struct fq_disk_ctx      *diskctx;
+       struct fq_thread_ctx    *tdctx;
        struct proc             *p;
 
-       int32_t qlength;
+       int32_t qlength;        /* IO queue length */
        int32_t flags;
 
        int     refcount;
-       int32_t transactions;
-       int32_t avg_latency;
-       int32_t s_transactions;
-       int32_t s_avg_latency;
-       int32_t max_tp;
-       int32_t issued;
-
-       int     rebalance;
+       int32_t transactions;   /* IOs completed so far during current interval */
+       int32_t avg_latency;    /* avg latency for current interval IOs */
+       int32_t interval_transactions;  /* IOs completed during last interval */
+       int32_t interval_avg_latency;   /* avg latency for last interval IOs */
+       int32_t max_tp;         /* rate limit of transactions per interval */
+       int32_t issued;         /* IOs issued to disk (but not completed) */
+
+       int     rebalance;      /* thread needs to rebalance w/ fq_balance_self */
 };
 
-struct dsched_fq_dpriv {
-       struct thread   *td;
-       struct thread   *td_balance;
-       struct disk     *dp;
+struct fq_disk_ctx {
+       struct thread   *td;            /* dispatcher thread td */
+       struct thread   *td_balance;    /* balancer thread td */
+       struct disk     *dp;            /* back pointer to disk struct */
        struct lock     lock;
        int     refcount;
 
-       int     avg_rq_time;    /* XXX: unused */
-       int32_t incomplete_tp;
-       int64_t max_budget;
-       int     idle;
-       struct timeval start_idle;
-       int     idle_time;
-       int     die;
-
-       int     prev_full;
-       int     last_full;
-       int     disk_busy;
-       int64_t budgetpb[FQ_PRIO_MAX+1];
-
-       /* list contains all fq_priv for this disk */
-       TAILQ_HEAD(, dsched_fq_priv)    fq_priv_list;
-       TAILQ_ENTRY(dsched_fq_dpriv)    link;
+       int     avg_rq_time;            /* XXX: not yet used */
+       int32_t incomplete_tp;          /* IOs issued but not completed */
+       int     idle;                   /* disk idle ? */
+       struct timeval start_idle;      /* disk idleness start time */
+       int     idle_time;              /* aggregate idle time in interval */
+       int     die;                    /* flag to kill related threads */
+
+       int     prev_full;              /* disk >90% busy during prev. to last
+                                          interval? */
+       int     last_full;              /* disk >90% busy during last interval */
+       int     disk_busy;              /* disk >90% busy during cur. interval */
+       int64_t budgetpb[FQ_PRIO_MAX+1];/* next interval budget for each thread
+                                          in each prio */
+
+       /* list contains all fq_thread_io for this disk */
+       TAILQ_HEAD(, fq_thread_io)      fq_tdio_list;   /* list of thread_io of disk */
+       TAILQ_ENTRY(fq_disk_ctx)        link;
 };
 
-struct dsched_fq_mpriv {
+struct fq_thread_ctx {
        struct proc *p;
        struct thread *td;
        int dead;
        struct spinlock lock;
        int     refcount;
-       TAILQ_HEAD(, dsched_fq_priv)    fq_priv_list;
-       TAILQ_ENTRY(dsched_fq_mpriv)    link;
+       TAILQ_HEAD(, fq_thread_io)      fq_tdio_list;   /* list of thread_io */
+       TAILQ_ENTRY(fq_thread_ctx)      link;
 };
 
 
 
 
 
-struct dsched_fq_priv  *fq_alloc_priv(struct disk *dp, struct dsched_fq_mpriv *fqmp);
-struct dsched_fq_dpriv *fq_alloc_dpriv(struct disk *dp);
-struct dsched_fq_mpriv *fq_alloc_mpriv(struct proc *p);
-void   fq_balance_thread(struct dsched_fq_dpriv *dpriv);
-void   fq_dispatcher(struct dsched_fq_dpriv *dpriv);
+struct fq_thread_io    *fq_thread_io_alloc(struct disk *dp, struct fq_thread_ctx *tdctx);
+struct fq_disk_ctx     *fq_disk_ctx_alloc(struct disk *dp);
+struct fq_thread_ctx   *fq_thread_ctx_alloc(struct proc *p);
+void   fq_balance_thread(struct fq_disk_ctx *diskctx);
+void   fq_dispatcher(struct fq_disk_ctx *diskctx);
 biodone_t      fq_completed;
 
-void   fq_reference_dpriv(struct dsched_fq_dpriv *dpriv);
-void   fq_reference_priv(struct dsched_fq_priv *fqp);
-void   fq_reference_mpriv(struct dsched_fq_mpriv *fqmp);
-void   fq_dereference_dpriv(struct dsched_fq_dpriv *dpriv);
-void   fq_dereference_priv(struct dsched_fq_priv *fqp);
-void   fq_dereference_mpriv(struct dsched_fq_mpriv *fqmp);
-void   fq_dispatch(struct dsched_fq_dpriv *dpriv, struct bio *bio,
-                       struct dsched_fq_priv *fqp);
-void   fq_drain(struct dsched_fq_dpriv *dpriv, int mode);
-void   fq_balance_self(struct dsched_fq_priv *fqp);
+void   fq_disk_ctx_ref(struct fq_disk_ctx *diskctx);
+void   fq_thread_io_ref(struct fq_thread_io *tdio);
+void   fq_thread_ctx_ref(struct fq_thread_ctx *tdctx);
+void   fq_disk_ctx_unref(struct fq_disk_ctx *diskctx);
+void   fq_thread_io_unref(struct fq_thread_io *tdio);
+void   fq_thread_ctx_unref(struct fq_thread_ctx *tdctx);
+void   fq_dispatch(struct fq_disk_ctx *diskctx, struct bio *bio,
+                       struct fq_thread_io *tdio);
+void   fq_drain(struct fq_disk_ctx *diskctx, int mode);
+void   fq_balance_self(struct fq_thread_io *tdio);
 #endif /* _KERNEL || _KERNEL_STRUCTURES */
 
 
 struct dsched_fq_stats {
-       int32_t fqmp_allocations;
-       int32_t fqp_allocations;
-       int32_t dpriv_allocations;
+       int32_t tdctx_allocations;
+       int32_t tdio_allocations;
+       int32_t diskctx_allocations;
 
        int32_t procs_limited;
 
@@ -220,12 +219,10 @@ struct dsched_fq_stats {
        int32_t transactions_completed;
        int32_t cancelled;
 
-       int32_t no_fqmp;
+       int32_t no_tdctx;
 
        int32_t nthreads;
        int32_t nprocs;
-
-       int32_t nbufs;
 };
 
 #endif /* _DSCHED_FQ_H_ */
index 19b675c..de07b33 100644 (file)
@@ -66,295 +66,294 @@ static int        dsched_fq_version_min = 8;
 
 struct dsched_fq_stats fq_stats;
 
-struct objcache_malloc_args dsched_fq_dpriv_malloc_args = {
-       sizeof(struct dsched_fq_dpriv), M_DSCHEDFQ };
-struct objcache_malloc_args dsched_fq_priv_malloc_args = {
-       sizeof(struct dsched_fq_priv), M_DSCHEDFQ };
-struct objcache_malloc_args dsched_fq_mpriv_malloc_args = {
-       sizeof(struct dsched_fq_mpriv), M_DSCHEDFQ };
+struct objcache_malloc_args fq_disk_ctx_malloc_args = {
+       sizeof(struct fq_disk_ctx), M_DSCHEDFQ };
+struct objcache_malloc_args fq_thread_io_malloc_args = {
+       sizeof(struct fq_thread_io), M_DSCHEDFQ };
+struct objcache_malloc_args fq_thread_ctx_malloc_args = {
+       sizeof(struct fq_thread_ctx), M_DSCHEDFQ };
 
-static struct objcache *fq_dpriv_cache;
-static struct objcache *fq_mpriv_cache;
-static struct objcache *fq_priv_cache;
+static struct objcache *fq_diskctx_cache;
+static struct objcache *fq_tdctx_cache;
+static struct objcache *fq_tdio_cache;
 
-TAILQ_HEAD(, dsched_fq_mpriv)  dsched_fqmp_list =
-               TAILQ_HEAD_INITIALIZER(dsched_fqmp_list);
+TAILQ_HEAD(, fq_thread_ctx)    dsched_tdctx_list =
+               TAILQ_HEAD_INITIALIZER(dsched_tdctx_list);
 
-struct lock    fq_fqmp_lock;
-struct callout fq_callout;
+struct lock    fq_tdctx_lock;
 
-extern struct dsched_ops dsched_fq_ops;
+extern struct dsched_policy dsched_fq_policy;
 
 void
-fq_reference_dpriv(struct dsched_fq_dpriv *dpriv)
+fq_disk_ctx_ref(struct fq_disk_ctx *diskctx)
 {
        int refcount;
 
-       refcount = atomic_fetchadd_int(&dpriv->refcount, 1);
+       refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
 
        KKASSERT(refcount >= 0);
 }
 
 void
-fq_reference_priv(struct dsched_fq_priv *fqp)
+fq_thread_io_ref(struct fq_thread_io *tdio)
 {
        int refcount;
 
-       refcount = atomic_fetchadd_int(&fqp->refcount, 1);
+       refcount = atomic_fetchadd_int(&tdio->refcount, 1);
 
        KKASSERT(refcount >= 0);
 }
 
 void
-fq_reference_mpriv(struct dsched_fq_mpriv *fqmp)
+fq_thread_ctx_ref(struct fq_thread_ctx *tdctx)
 {
        int refcount;
 
-       refcount = atomic_fetchadd_int(&fqmp->refcount, 1);
+       refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
 
        KKASSERT(refcount >= 0);
 }
 
 void
-fq_dereference_dpriv(struct dsched_fq_dpriv *dpriv)
+fq_disk_ctx_unref(struct fq_disk_ctx *diskctx)
 {
-       struct dsched_fq_priv   *fqp, *fqp2;
+       struct fq_thread_io     *tdio, *tdio2;
        int refcount;
 
-       refcount = atomic_fetchadd_int(&dpriv->refcount, -1);
+       refcount = atomic_fetchadd_int(&diskctx->refcount, -1);
 
 
        KKASSERT(refcount >= 0 || refcount <= -0x400);
 
        if (refcount == 1) {
-               atomic_subtract_int(&dpriv->refcount, 0x400); /* mark as: in destruction */
+               atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */
 #if 1
-               kprintf("dpriv (%p) destruction started, trace:\n", dpriv);
+               kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
                print_backtrace(4);
 #endif
-               lockmgr(&dpriv->lock, LK_EXCLUSIVE);
-               TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
-                       TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
-                       fqp->flags &= ~FQP_LINKED_DPRIV;
-                       fq_dereference_priv(fqp);
+               lockmgr(&diskctx->lock, LK_EXCLUSIVE);
+               TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) {
+                       TAILQ_REMOVE(&diskctx->fq_tdio_list, tdio, dlink);
+                       tdio->flags &= ~FQ_LINKED_DISK_CTX;
+                       fq_thread_io_unref(tdio);
                }
-               lockmgr(&dpriv->lock, LK_RELEASE);
+               lockmgr(&diskctx->lock, LK_RELEASE);
 
-               objcache_put(fq_dpriv_cache, dpriv);
-               atomic_subtract_int(&fq_stats.dpriv_allocations, 1);
+               objcache_put(fq_diskctx_cache, diskctx);
+               atomic_subtract_int(&fq_stats.diskctx_allocations, 1);
        }
 }
 
 void
-fq_dereference_priv(struct dsched_fq_priv *fqp)
+fq_thread_io_unref(struct fq_thread_io *tdio)
 {
-       struct dsched_fq_mpriv  *fqmp;
-       struct dsched_fq_dpriv  *dpriv;
+       struct fq_thread_ctx    *tdctx;
+       struct fq_disk_ctx      *diskctx;
        int refcount;
 
-       refcount = atomic_fetchadd_int(&fqp->refcount, -1);
+       refcount = atomic_fetchadd_int(&tdio->refcount, -1);
 
        KKASSERT(refcount >= 0 || refcount <= -0x400);
 
        if (refcount == 1) {
-               atomic_subtract_int(&fqp->refcount, 0x400); /* mark as: in destruction */
+               atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */
 #if 0
-               kprintf("fqp (%p) destruction started, trace:\n", fqp);
+               kprintf("tdio (%p) destruction started, trace:\n", tdio);
                print_backtrace(8);
 #endif
-               dpriv = fqp->dpriv;
-               KKASSERT(dpriv != NULL);
-               KKASSERT(fqp->qlength == 0);
+               diskctx = tdio->diskctx;
+               KKASSERT(diskctx != NULL);
+               KKASSERT(tdio->qlength == 0);
 
-               if (fqp->flags & FQP_LINKED_DPRIV) {
-                       lockmgr(&dpriv->lock, LK_EXCLUSIVE);
+               if (tdio->flags & FQ_LINKED_DISK_CTX) {
+                       lockmgr(&diskctx->lock, LK_EXCLUSIVE);
 
-                       TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
-                       fqp->flags &= ~FQP_LINKED_DPRIV;
+                       TAILQ_REMOVE(&diskctx->fq_tdio_list, tdio, dlink);
+                       tdio->flags &= ~FQ_LINKED_DISK_CTX;
 
-                       lockmgr(&dpriv->lock, LK_RELEASE);
+                       lockmgr(&diskctx->lock, LK_RELEASE);
                }
 
-               if (fqp->flags & FQP_LINKED_FQMP) {
-                       fqmp = fqp->fqmp;
-                       KKASSERT(fqmp != NULL);
+               if (tdio->flags & FQ_LINKED_THREAD_CTX) {
+                       tdctx = tdio->tdctx;
+                       KKASSERT(tdctx != NULL);
 
-                       spin_lock_wr(&fqmp->lock);
+                       spin_lock_wr(&tdctx->lock);
 
-                       TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
-                       fqp->flags &= ~FQP_LINKED_FQMP;
+                       TAILQ_REMOVE(&tdctx->fq_tdio_list, tdio, link);
+                       tdio->flags &= ~FQ_LINKED_THREAD_CTX;
 
-                       spin_unlock_wr(&fqmp->lock);
+                       spin_unlock_wr(&tdctx->lock);
                }
 
-               objcache_put(fq_priv_cache, fqp);
-               atomic_subtract_int(&fq_stats.fqp_allocations, 1);
+               objcache_put(fq_tdio_cache, tdio);
+               atomic_subtract_int(&fq_stats.tdio_allocations, 1);
 #if 0
-               fq_dereference_dpriv(dpriv);
+               fq_disk_ctx_unref(diskctx);
 #endif
        }
 }
 
 void
-fq_dereference_mpriv(struct dsched_fq_mpriv *fqmp)
+fq_thread_ctx_unref(struct fq_thread_ctx *tdctx)
 {
-       struct dsched_fq_priv   *fqp, *fqp2;
+       struct fq_thread_io     *tdio, *tdio2;
        int refcount;
 
-       refcount = atomic_fetchadd_int(&fqmp->refcount, -1);
+       refcount = atomic_fetchadd_int(&tdctx->refcount, -1);
 
        KKASSERT(refcount >= 0 || refcount <= -0x400);
 
        if (refcount == 1) {
-               atomic_subtract_int(&fqmp->refcount, 0x400); /* mark as: in destruction */
+               atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */
 #if 0
-               kprintf("fqmp (%p) destruction started, trace:\n", fqmp);
+               kprintf("tdctx (%p) destruction started, trace:\n", tdctx);
                print_backtrace(8);
 #endif
-               FQ_GLOBAL_FQMP_LOCK();
+               FQ_GLOBAL_THREAD_CTX_LOCK();
 
-               TAILQ_FOREACH_MUTABLE(fqp, &fqmp->fq_priv_list, link, fqp2) {
-                       TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
-                       fqp->flags &= ~FQP_LINKED_FQMP;
-                       fq_dereference_priv(fqp);
+               TAILQ_FOREACH_MUTABLE(tdio, &tdctx->fq_tdio_list, link, tdio2) {
+                       TAILQ_REMOVE(&tdctx->fq_tdio_list, tdio, link);
+                       tdio->flags &= ~FQ_LINKED_THREAD_CTX;
+                       fq_thread_io_unref(tdio);
                }
-               TAILQ_REMOVE(&dsched_fqmp_list, fqmp, link);
+               TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link);
 
-               FQ_GLOBAL_FQMP_UNLOCK();
+               FQ_GLOBAL_THREAD_CTX_UNLOCK();
 
-               objcache_put(fq_mpriv_cache, fqmp);
-               atomic_subtract_int(&fq_stats.fqmp_allocations, 1);
+               objcache_put(fq_tdctx_cache, tdctx);
+               atomic_subtract_int(&fq_stats.tdctx_allocations, 1);
        }
 }
 
 
-struct dsched_fq_priv *
-fq_alloc_priv(struct disk *dp, struct dsched_fq_mpriv *fqmp)
+struct fq_thread_io *
+fq_thread_io_alloc(struct disk *dp, struct fq_thread_ctx *tdctx)
 {
-       struct dsched_fq_priv   *fqp;
+       struct fq_thread_io     *tdio;
 #if 0
-       fq_reference_dpriv(dsched_get_disk_priv(dp));
+       fq_disk_ctx_ref(dsched_get_disk_priv(dp));
 #endif
-       fqp = objcache_get(fq_priv_cache, M_WAITOK);
-       bzero(fqp, sizeof(struct dsched_fq_priv));
+       tdio = objcache_get(fq_tdio_cache, M_WAITOK);
+       bzero(tdio, sizeof(struct fq_thread_io));
 
-       /* XXX: maybe we do need another ref for the disk list for fqp */
-       fq_reference_priv(fqp);
+       /* XXX: maybe we do need another ref for the disk list for tdio */
+       fq_thread_io_ref(tdio);
 
-       FQ_FQP_LOCKINIT(fqp);
-       fqp->dp = dp;
+       FQ_THREAD_IO_LOCKINIT(tdio);
+       tdio->dp = dp;
 
-       fqp->dpriv = dsched_get_disk_priv(dp);
-       TAILQ_INIT(&fqp->queue);
+       tdio->diskctx = dsched_get_disk_priv(dp);
+       TAILQ_INIT(&tdio->queue);
 
-       TAILQ_INSERT_TAIL(&fqp->dpriv->fq_priv_list, fqp, dlink);
-       fqp->flags |= FQP_LINKED_DPRIV;
+       TAILQ_INSERT_TAIL(&tdio->diskctx->fq_tdio_list, tdio, dlink);
+       tdio->flags |= FQ_LINKED_DISK_CTX;
 
-       if (fqmp) {
-               fqp->fqmp = fqmp;
-               fqp->p = fqmp->p;
+       if (tdctx) {
+               tdio->tdctx = tdctx;
+               tdio->p = tdctx->p;
 
-               /* Put the fqp in the fqmp list */
-               FQ_FQMP_LOCK(fqmp);
-               TAILQ_INSERT_TAIL(&fqmp->fq_priv_list, fqp, link);
-               FQ_FQMP_UNLOCK(fqmp);
-               fqp->flags |= FQP_LINKED_FQMP;
+               /* Put the tdio in the tdctx list */
+               FQ_THREAD_CTX_LOCK(tdctx);
+               TAILQ_INSERT_TAIL(&tdctx->fq_tdio_list, tdio, link);
+               FQ_THREAD_CTX_UNLOCK(tdctx);
+               tdio->flags |= FQ_LINKED_THREAD_CTX;
        }
 
-       atomic_add_int(&fq_stats.fqp_allocations, 1);
-       return fqp;
+       atomic_add_int(&fq_stats.tdio_allocations, 1);
+       return tdio;
 }
 
 
-struct dsched_fq_dpriv *
-fq_alloc_dpriv(struct disk *dp)
+struct fq_disk_ctx *
+fq_disk_ctx_alloc(struct disk *dp)
 {
-       struct dsched_fq_dpriv *dpriv;
-
-       dpriv = objcache_get(fq_dpriv_cache, M_WAITOK);
-       bzero(dpriv, sizeof(struct dsched_fq_dpriv));
-       fq_reference_dpriv(dpriv);
-       dpriv->dp = dp;
-       dpriv->avg_rq_time = 0;
-       dpriv->incomplete_tp = 0;
-       FQ_DPRIV_LOCKINIT(dpriv);
-       TAILQ_INIT(&dpriv->fq_priv_list);
-
-       atomic_add_int(&fq_stats.dpriv_allocations, 1);
-       return dpriv;
+       struct fq_disk_ctx *diskctx;
+
+       diskctx = objcache_get(fq_diskctx_cache, M_WAITOK);
+       bzero(diskctx, sizeof(struct fq_disk_ctx));
+       fq_disk_ctx_ref(diskctx);
+       diskctx->dp = dp;
+       diskctx->avg_rq_time = 0;
+       diskctx->incomplete_tp = 0;
+       FQ_DISK_CTX_LOCKINIT(diskctx);
+       TAILQ_INIT(&diskctx->fq_tdio_list);
+
+       atomic_add_int(&fq_stats.diskctx_allocations, 1);
+       return diskctx;
 }
 
 
-struct dsched_fq_mpriv *
-fq_alloc_mpriv(struct proc *p)
+struct fq_thread_ctx *
+fq_thread_ctx_alloc(struct proc *p)
 {
-       struct dsched_fq_mpriv  *fqmp;
-       struct dsched_fq_priv   *fqp;
+       struct fq_thread_ctx    *tdctx;
+       struct fq_thread_io     *tdio;
        struct disk     *dp = NULL;
 
-       fqmp = objcache_get(fq_mpriv_cache, M_WAITOK);
-       bzero(fqmp, sizeof(struct dsched_fq_mpriv));
-       fq_reference_mpriv(fqmp);
+       tdctx = objcache_get(fq_tdctx_cache, M_WAITOK);
+       bzero(tdctx, sizeof(struct fq_thread_ctx));
+       fq_thread_ctx_ref(tdctx);
 #if 0
-       kprintf("fq_alloc_mpriv, new fqmp = %p\n", fqmp);
+       kprintf("fq_thread_ctx_alloc, new tdctx = %p\n", tdctx);
 #endif
-       FQ_FQMP_LOCKINIT(fqmp);
-       TAILQ_INIT(&fqmp->fq_priv_list);
-       fqmp->p = p;
+       FQ_THREAD_CTX_LOCKINIT(tdctx);
+       TAILQ_INIT(&tdctx->fq_tdio_list);
+       tdctx->p = p;
 
-       while ((dp = dsched_disk_enumerate(dp, &dsched_fq_ops))) {
-               fqp = fq_alloc_priv(dp, fqmp);
+       while ((dp = dsched_disk_enumerate(dp, &dsched_fq_policy))) {
+               tdio = fq_thread_io_alloc(dp, tdctx);
 #if 0
-               fq_reference_priv(fqp);
+               fq_thread_io_ref(tdio);
 #endif
        }
 
-       FQ_GLOBAL_FQMP_LOCK();
-       TAILQ_INSERT_TAIL(&dsched_fqmp_list, fqmp, link);
-       FQ_GLOBAL_FQMP_UNLOCK();
+       FQ_GLOBAL_THREAD_CTX_LOCK();
+       TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link);
+       FQ_GLOBAL_THREAD_CTX_UNLOCK();
 
-       atomic_add_int(&fq_stats.fqmp_allocations, 1);
-       return fqmp;
+       atomic_add_int(&fq_stats.tdctx_allocations, 1);
+       return tdctx;
 }
 
 
 void
-fq_dispatcher(struct dsched_fq_dpriv *dpriv)
+fq_dispatcher(struct fq_disk_ctx *diskctx)
 {
-       struct dsched_fq_mpriv  *fqmp;
-       struct dsched_fq_priv   *fqp, *fqp2;
+       struct fq_thread_ctx    *tdctx;
+       struct fq_thread_io     *tdio, *tdio2;
        struct bio *bio, *bio2;
        int idle;
 
        /*
-        * We need to manually assign an fqp to the fqmp of this thread
+        * We need to manually assign an tdio to the tdctx of this thread
         * since it isn't assigned one during fq_prepare, as the disk
         * is not set up yet.
         */
-       fqmp = dsched_get_thread_priv(curthread);
-       KKASSERT(fqmp != NULL);
+       tdctx = dsched_get_thread_priv(curthread);
+       KKASSERT(tdctx != NULL);
 
-       fqp = fq_alloc_priv(dpriv->dp, fqmp);
+       tdio = fq_thread_io_alloc(diskctx->dp, tdctx);
 #if 0
-       fq_reference_priv(fqp);
+       fq_thread_io_ref(tdio);
 #endif
 
-       FQ_DPRIV_LOCK(dpriv);
+       FQ_DISK_CTX_LOCK(diskctx);
        for(;;) {
                idle = 0;
                /* sleep ~60 ms */
-               if ((lksleep(dpriv, &dpriv->lock, 0, "fq_dispatcher", hz/15) == 0)) {
+               if ((lksleep(diskctx, &diskctx->lock, 0, "fq_dispatcher", hz/15) == 0)) {
                        /*
                         * We've been woken up; this either means that we are
                         * supposed to die away nicely or that the disk is idle.
                         */
 
-                       if (__predict_false(dpriv->die == 1)) {
+                       if (__predict_false(diskctx->die == 1)) {
                                /* If we are supposed to die, drain all queues */
-                               fq_drain(dpriv, FQ_DRAIN_FLUSH);
+                               fq_drain(diskctx, FQ_DRAIN_FLUSH);
 
                                /* Now we can safely unlock and exit */
-                               FQ_DPRIV_UNLOCK(dpriv);
+                               FQ_DISK_CTX_UNLOCK(diskctx);
                                kprintf("fq_dispatcher is peacefully dying\n");
                                lwkt_exit();
                                /* NOTREACHED */
@@ -369,55 +368,55 @@ fq_dispatcher(struct dsched_fq_dpriv *dpriv)
 
                /* Maybe the disk is idle and we just didn't get the wakeup */
                if (idle == 0)
-                       idle = dpriv->idle;
+                       idle = diskctx->idle;
 
                /*
                 * XXX: further room for improvements here. It would be better
-                *      to dispatch a few requests from each fqp as to ensure
+                *      to dispatch a few requests from each tdio as to ensure
                 *      real fairness.
                 */
-               TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
-                       if (fqp->qlength == 0)
+               TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) {
+                       if (tdio->qlength == 0)
                                continue;
 
-                       FQ_FQP_LOCK(fqp);
-                       if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
-                               fq_balance_self(fqp);
+                       FQ_THREAD_IO_LOCK(tdio);
+                       if (atomic_cmpset_int(&tdio->rebalance, 1, 0))
+                               fq_balance_self(tdio);
                        /*
                         * XXX: why 5 extra? should probably be dynamic,
                         *      relying on information on latency.
                         */
-                       if ((fqp->max_tp > 0) && idle &&
-                           (fqp->issued >= fqp->max_tp)) {
-                               fqp->max_tp += 5;
+                       if ((tdio->max_tp > 0) && idle &&
+                           (tdio->issued >= tdio->max_tp)) {
+                               tdio->max_tp += 5;
                        }
 
-                       TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
-                               if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
-                                       fq_balance_self(fqp);
-                               if ((fqp->max_tp > 0) &&
-                                   ((fqp->issued >= fqp->max_tp)))
+                       TAILQ_FOREACH_MUTABLE(bio, &tdio->queue, link, bio2) {
+                               if (atomic_cmpset_int(&tdio->rebalance, 1, 0))
+                                       fq_balance_self(tdio);
+                               if ((tdio->max_tp > 0) &&
+                                   ((tdio->issued >= tdio->max_tp)))
                                        break;
 
-                               TAILQ_REMOVE(&fqp->queue, bio, link);
-                               --fqp->qlength;
+                               TAILQ_REMOVE(&tdio->queue, bio, link);
+                               --tdio->qlength;
 
                                /*
-                                * beware that we do have an fqp reference
+                                * beware that we do have an tdio reference
                                 * from the queueing
                                 */
-                               fq_dispatch(dpriv, bio, fqp);
+                               fq_dispatch(diskctx, bio, tdio);
                        }
-                       FQ_FQP_UNLOCK(fqp);
+                       FQ_THREAD_IO_UNLOCK(tdio);
 
                }
        }
 }
 
 void
-fq_balance_thread(struct dsched_fq_dpriv *dpriv)
+fq_balance_thread(struct fq_disk_ctx *diskctx)
 {
-       struct  dsched_fq_priv  *fqp, *fqp2;
+       struct  fq_thread_io    *tdio, *tdio2;
        static struct timeval old_tv;
        struct timeval tv;
        int64_t total_budget, product;
@@ -427,12 +426,12 @@ fq_balance_thread(struct dsched_fq_dpriv *dpriv)
 
        getmicrotime(&old_tv);
 
-       FQ_DPRIV_LOCK(dpriv);
+       FQ_DISK_CTX_LOCK(diskctx);
        for (;;) {
                /* sleep ~1s */
-               if ((lksleep(curthread, &dpriv->lock, 0, "fq_balancer", hz/2) == 0)) {
-                       if (__predict_false(dpriv->die)) {
-                               FQ_DPRIV_UNLOCK(dpriv);
+               if ((lksleep(curthread, &diskctx->lock, 0, "fq_balancer", hz/2) == 0)) {
+                       if (__predict_false(diskctx->die)) {
+                               FQ_DISK_CTX_UNLOCK(diskctx);
                                lwkt_exit();
                        }
                }
@@ -453,18 +452,18 @@ fq_balance_thread(struct dsched_fq_dpriv *dpriv)
 
                old_tv = tv;
 
-               dpriv->disk_busy = (100*(total_disk_time - dpriv->idle_time)) / total_disk_time;
-               if (dpriv->disk_busy < 0)
-                       dpriv->disk_busy = 0;
+               diskctx->disk_busy = (100*(total_disk_time - diskctx->idle_time)) / total_disk_time;
+               if (diskctx->disk_busy < 0)
+                       diskctx->disk_busy = 0;
 
-               dpriv->idle_time = 0;
+               diskctx->idle_time = 0;
                lost_bits = 0;
 
-               TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
-                       fqp->s_avg_latency = fqp->avg_latency;
-                       fqp->s_transactions = fqp->transactions;
-                       if (fqp->s_transactions > 0 /* 30 */) {
-                               product = fqp->s_avg_latency * fqp->s_transactions;
+               TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) {
+                       tdio->interval_avg_latency = tdio->avg_latency;
+                       tdio->interval_transactions = tdio->transactions;
+                       if (tdio->interval_transactions > 0) {
+                               product = tdio->interval_avg_latency * tdio->interval_transactions;
                                product >>= lost_bits;
                                while(total_budget >= INT64_MAX - product) {
                                        ++lost_bits;
@@ -472,26 +471,26 @@ fq_balance_thread(struct dsched_fq_dpriv *dpriv)
                                        total_budget >>= 1;
                                }
                                total_budget += product;
-                               ++budget[(fqp->p) ? fqp->p->p_ionice : 0];
+                               ++budget[(tdio->p) ? tdio->p->p_ionice : 0];
                                KKASSERT(total_budget >= 0);
                                dsched_debug(LOG_INFO,
                                    "%d) avg_latency = %d, transactions = %d, ioprio = %d\n",
-                                   n, fqp->s_avg_latency, fqp->s_transactions,
-                                   (fqp->p) ? fqp->p->p_ionice : 0);
+                                   n, tdio->interval_avg_latency, tdio->interval_transactions,
+                                   (tdio->p) ? tdio->p->p_ionice : 0);
                                ++n;
                        } else {
-                               fqp->max_tp = 0;
+                               tdio->max_tp = 0;
                        }
-                       fqp->rebalance = 0;
-                       fqp->transactions = 0;
-                       fqp->avg_latency = 0;
-                       fqp->issued = 0;
+                       tdio->rebalance = 0;
+                       tdio->transactions = 0;
+                       tdio->avg_latency = 0;
+                       tdio->issued = 0;
                }
 
                dsched_debug(LOG_INFO, "%d procs competing for disk\n"
                    "total_budget = %jd (lost bits = %d)\n"
                    "incomplete tp = %d\n", n, (intmax_t)total_budget,
-                   lost_bits, dpriv->incomplete_tp);
+                   lost_bits, diskctx->incomplete_tp);
 
                if (n == 0)
                        continue;
@@ -519,20 +518,17 @@ fq_balance_thread(struct dsched_fq_dpriv *dpriv)
                         *      storing the lost bits so they can be used in the
                         *      fq_balance_self.
                         */
-                       dpriv->budgetpb[i] = ((FQ_PRIO_BIAS+i)*total_budget/sum) << lost_bits;
-                       KKASSERT(dpriv->budgetpb[i] >= 0);
+                       diskctx->budgetpb[i] = ((FQ_PRIO_BIAS+i)*total_budget/sum) << lost_bits;
+                       KKASSERT(diskctx->budgetpb[i] >= 0);
                }
 
-               if (total_budget > dpriv->max_budget)
-                       dpriv->max_budget = total_budget;
-
-               dsched_debug(4, "disk is %d%% busy\n", dpriv->disk_busy);
-               TAILQ_FOREACH(fqp, &dpriv->fq_priv_list, dlink) {
-                       fqp->rebalance = 1;
+               dsched_debug(4, "disk is %d%% busy\n", diskctx->disk_busy);
+               TAILQ_FOREACH(tdio, &diskctx->fq_tdio_list, dlink) {
+                       tdio->rebalance = 1;
                }
 
-               dpriv->prev_full = dpriv->last_full;
-               dpriv->last_full = (dpriv->disk_busy >= 90)?1:0;
+               diskctx->prev_full = diskctx->last_full;
+               diskctx->last_full = (diskctx->disk_busy >= 90)?1:0;
        }
 }
 
@@ -541,22 +537,27 @@ fq_balance_thread(struct dsched_fq_dpriv *dpriv)
  * fq_balance_self should be called from all sorts of dispatchers. It basically
  * offloads some of the heavier calculations on throttling onto the process that
  * wants to do I/O instead of doing it in the fq_balance thread.
- * - should be called with dpriv lock held
+ * - should be called with diskctx lock held
  */
 void
-fq_balance_self(struct dsched_fq_priv *fqp) {
-       struct dsched_fq_dpriv *dpriv;
+fq_balance_self(struct fq_thread_io *tdio) {
+       struct fq_disk_ctx *diskctx;
 
        int64_t budget, used_budget;
        int64_t avg_latency;
        int64_t transactions;
 
-       transactions = (int64_t)fqp->s_transactions;
-       avg_latency = (int64_t)fqp->s_avg_latency;
-       dpriv = fqp->dpriv;
+       transactions = (int64_t)tdio->interval_transactions;
+       avg_latency = (int64_t)tdio->interval_avg_latency;
+       diskctx = tdio->diskctx;
+
+#if 0
+       /* XXX: do we really require the lock? */
+       FQ_DISK_CTX_LOCK_ASSERT(diskctx);
+#endif
 
        used_budget = ((int64_t)avg_latency * transactions);
-       budget = dpriv->budgetpb[(fqp->p) ? fqp->p->p_ionice : 0];
+       budget = diskctx->budgetpb[(tdio->p) ? tdio->p->p_ionice : 0];
 
        if (used_budget > 0) {
                dsched_debug(LOG_INFO,
@@ -564,18 +565,18 @@ fq_balance_self(struct dsched_fq_priv *fqp) {
                    (intmax_t)used_budget, budget);
        }
 
-       if ((used_budget > budget) && (dpriv->disk_busy >= 90)) {
+       if ((used_budget > budget) && (diskctx->disk_busy >= 90)) {
                KKASSERT(avg_latency != 0);
 
-               fqp->max_tp = budget/(avg_latency);
+               tdio->max_tp = budget/(avg_latency);
                atomic_add_int(&fq_stats.procs_limited, 1);
 
                dsched_debug(LOG_INFO,
-                   "rate limited to %d transactions\n", fqp->max_tp);
+                   "rate limited to %d transactions\n", tdio->max_tp);
 
-       } else if (((used_budget*2 < budget) || (dpriv->disk_busy < 80)) &&
-           (!dpriv->prev_full && !dpriv->last_full)) {
-               fqp->max_tp = 0;
+       } else if (((used_budget*2 < budget) || (diskctx->disk_busy < 80)) &&
+           (!diskctx->prev_full && !diskctx->last_full)) {
+               tdio->max_tp = 0;
        }
 }
 
@@ -607,30 +608,29 @@ fq_uninit(void)
 static void
 fq_earlyinit(void)
 {
-       fq_priv_cache = objcache_create("fq-priv-cache", 0, 0,
+       fq_tdio_cache = objcache_create("fq-tdio-cache", 0, 0,
                                           NULL, NULL, NULL,
                                           objcache_malloc_alloc,
                                           objcache_malloc_free,
-                                          &dsched_fq_priv_malloc_args );
+                                          &fq_thread_io_malloc_args );
 
-       fq_mpriv_cache = objcache_create("fq-mpriv-cache", 0, 0,
+       fq_tdctx_cache = objcache_create("fq-tdctx-cache", 0, 0,
                                           NULL, NULL, NULL,
                                           objcache_malloc_alloc,
                                           objcache_malloc_free,
-                                          &dsched_fq_mpriv_malloc_args );
+                                          &fq_thread_ctx_malloc_args );
 
-       FQ_GLOBAL_FQMP_LOCKINIT();
+       FQ_GLOBAL_THREAD_CTX_LOCKINIT();
 
-       fq_dpriv_cache = objcache_create("fq-dpriv-cache", 0, 0,
+       fq_diskctx_cache = objcache_create("fq-diskctx-cache", 0, 0,
                                           NULL, NULL, NULL,
                                           objcache_malloc_alloc,
                                           objcache_malloc_free,
-                                          &dsched_fq_dpriv_malloc_args );
+                                          &fq_disk_ctx_malloc_args );
 
        bzero(&fq_stats, sizeof(struct dsched_fq_stats));
 
-       dsched_register(&dsched_fq_ops);
-       callout_init_mp(&fq_callout);
+       dsched_register(&dsched_fq_policy);
 
        kprintf("FQ scheduler policy version %d.%d loaded\n",
            dsched_fq_version_maj, dsched_fq_version_min);
@@ -639,8 +639,6 @@ fq_earlyinit(void)
 static void
 fq_earlyuninit(void)
 {
-       callout_stop(&fq_callout);
-       callout_deactivate(&fq_callout);
        return;
 }
 
index c9af26c..031a415 100644 (file)
@@ -77,14 +77,13 @@ dsched_exit_proc_t  fq_exit_proc;
 dsched_exit_thread_t   fq_exit_thread;
 
 extern struct dsched_fq_stats  fq_stats;
-extern struct lock     fq_fqmp_lock;
-extern TAILQ_HEAD(, dsched_fq_mpriv)   dsched_fqmp_list;
+extern struct lock     fq_tdctx_lock;
+extern TAILQ_HEAD(, fq_thread_ctx)     dsched_tdctx_list;
 extern struct callout  fq_callout;
 
-struct dsched_ops dsched_fq_ops = {
-       .head = {
-               .name = "fq"
-       },
+struct dsched_policy dsched_fq_policy = {
+       .name = "fq",
+
        .prepare = fq_prepare,
        .teardown = fq_teardown,
        .flush = fq_flush,
@@ -104,29 +103,29 @@ struct dsched_ops dsched_fq_ops = {
 static int
 fq_prepare(struct disk *dp)
 {
-       struct  dsched_fq_dpriv *dpriv;
-       struct dsched_fq_mpriv  *fqmp;
-       struct dsched_fq_priv   *fqp;
+       struct  fq_disk_ctx     *diskctx;
+       struct fq_thread_ctx    *tdctx;
+       struct fq_thread_io     *tdio;
        struct thread *td_core, *td_balance;
 
-       dpriv = fq_alloc_dpriv(dp);
-       fq_reference_dpriv(dpriv);
-       dsched_set_disk_priv(dp, dpriv);
+       diskctx = fq_disk_ctx_alloc(dp);
+       fq_disk_ctx_ref(diskctx);
+       dsched_set_disk_priv(dp, diskctx);
 
-       FQ_GLOBAL_FQMP_LOCK();
-       TAILQ_FOREACH(fqmp, &dsched_fqmp_list, link) {
-               fqp = fq_alloc_priv(dp, fqmp);
+       FQ_GLOBAL_THREAD_CTX_LOCK();
+       TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) {
+               tdio = fq_thread_io_alloc(dp, tdctx);
 #if 0
-               fq_reference_priv(fqp);
+               fq_thread_io_ref(tdio);
 #endif
        }
-       FQ_GLOBAL_FQMP_UNLOCK();
+       FQ_GLOBAL_THREAD_CTX_UNLOCK();
 
-       lwkt_create((void (*)(void *))fq_dispatcher, dpriv, &td_core, NULL,
+       lwkt_create((void (*)(void *))fq_dispatcher, diskctx, &td_core, NULL,
            TDF_MPSAFE, -1, "fq_dispatch_%s", dp->d_cdev->si_name);
-       lwkt_create((void (*)(void *))fq_balance_thread, dpriv, &td_balance,
+       lwkt_create((void (*)(void *))fq_balance_thread, diskctx, &td_balance,
            NULL, TDF_MPSAFE, -1, "fq_balance_%s", dp->d_cdev->si_name);
-       dpriv->td_balance = td_balance;
+       diskctx->td_balance = td_balance;
 
        return 0;
 }
@@ -136,44 +135,44 @@ fq_prepare(struct disk *dp)
 static void
 fq_teardown(struct disk *dp)
 {
-       struct dsched_fq_dpriv *dpriv;
+       struct fq_disk_ctx *diskctx;
 
-       dpriv = dsched_get_disk_priv(dp);
-       KKASSERT(dpriv != NULL);
+       diskctx = dsched_get_disk_priv(dp);
+       KKASSERT(diskctx != NULL);
 
        /* Basically kill the dispatcher thread */
-       dpriv->die = 1;
-       wakeup(dpriv->td_balance);
-       wakeup(dpriv);
-       tsleep(dpriv, 0, "fq_dispatcher", hz/5); /* wait 200 ms */
-       wakeup(dpriv->td_balance);
-       wakeup(dpriv);
-       tsleep(dpriv, 0, "fq_dispatcher", hz/10); /* wait 100 ms */
-       wakeup(dpriv->td_balance);
-       wakeup(dpriv);
-
-       fq_dereference_dpriv(dpriv); /* from prepare */
-       fq_dereference_dpriv(dpriv); /* from alloc */
+       diskctx->die = 1;
+       wakeup(diskctx->td_balance);
+       wakeup(diskctx);
+       tsleep(diskctx, 0, "fq_dispatcher", hz/5); /* wait 200 ms */
+       wakeup(diskctx->td_balance);
+       wakeup(diskctx);
+       tsleep(diskctx, 0, "fq_dispatcher", hz/10); /* wait 100 ms */
+       wakeup(diskctx->td_balance);
+       wakeup(diskctx);
+
+       fq_disk_ctx_unref(diskctx); /* from prepare */
+       fq_disk_ctx_unref(diskctx); /* from alloc */
 
        dsched_set_disk_priv(dp, NULL);
 }
 
 
-/* Must be called with locked dpriv */
+/* Must be called with locked diskctx */
 void
-fq_drain(struct dsched_fq_dpriv *dpriv, int mode)
+fq_drain(struct fq_disk_ctx *diskctx, int mode)
 {
-       struct dsched_fq_priv *fqp, *fqp2;
+       struct fq_thread_io *tdio, *tdio2;
        struct bio *bio, *bio2;
 
-       TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
-               if (fqp->qlength == 0)
+       TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) {
+               if (tdio->qlength == 0)
                        continue;
 
-               FQ_FQP_LOCK(fqp);
-               TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
-                       TAILQ_REMOVE(&fqp->queue, bio, link);
-                       --fqp->qlength;
+               FQ_THREAD_IO_LOCK(tdio);
+               TAILQ_FOREACH_MUTABLE(bio, &tdio->queue, link, bio2) {
+                       TAILQ_REMOVE(&tdio->queue, bio, link);
+                       --tdio->qlength;
                        if (__predict_false(mode == FQ_DRAIN_CANCEL)) {
                                /* FQ_DRAIN_CANCEL */
                                dsched_cancel_bio(bio);
@@ -181,13 +180,13 @@ fq_drain(struct dsched_fq_dpriv *dpriv, int mode)
 
                                /* Release ref acquired on fq_queue */
                                /* XXX: possible failure point */
-                               fq_dereference_priv(fqp);
+                               fq_thread_io_unref(tdio);
                        } else {
                                /* FQ_DRAIN_FLUSH */
-                               fq_dispatch(dpriv, bio, fqp);
+                               fq_dispatch(diskctx, bio, tdio);
                        }
                }
-               FQ_FQP_UNLOCK(fqp);
+               FQ_THREAD_IO_UNLOCK(tdio);
        }
        return;
 }
@@ -203,18 +202,18 @@ fq_flush(struct disk *dp, struct bio *bio)
 static void
 fq_cancel(struct disk *dp)
 {
-       struct dsched_fq_dpriv  *dpriv;
+       struct fq_disk_ctx      *diskctx;
 
-       dpriv = dsched_get_disk_priv(dp);
-       KKASSERT(dpriv != NULL);
+       diskctx = dsched_get_disk_priv(dp);
+       KKASSERT(diskctx != NULL);
 
        /*
-        * all bios not in flight are queued in their respective fqps.
-        * good thing we have a list of fqps per disk dpriv.
+        * all bios not in flight are queued in their respective tdios.
+        * good thing we have a list of tdios per disk diskctx.
         */
-       FQ_DPRIV_LOCK(dpriv);
-       fq_drain(dpriv, FQ_DRAIN_CANCEL);
-       FQ_DPRIV_UNLOCK(dpriv);
+       FQ_DISK_CTX_LOCK(diskctx);
+       fq_drain(diskctx, FQ_DRAIN_CANCEL);
+       FQ_DISK_CTX_UNLOCK(diskctx);
 }
 
 
@@ -222,9 +221,9 @@ static int
 fq_queue(struct disk *dp, struct bio *obio)
 {
        struct bio *bio, *bio2;
-       struct dsched_fq_mpriv  *fqmp;
-       struct dsched_fq_priv   *fqp;
-       struct dsched_fq_dpriv  *dpriv;
+       struct fq_thread_ctx    *tdctx;
+       struct fq_thread_io     *tdio;
+       struct fq_disk_ctx      *diskctx;
        int found = 0;
        int max_tp, transactions;
 
@@ -232,84 +231,83 @@ fq_queue(struct disk *dp, struct bio *obio)
        if (__predict_false(obio->bio_buf->b_cmd == BUF_CMD_FLUSH))
                return (EINVAL);
 
-       /* get fqmp and fqp */
-       fqmp = dsched_get_buf_priv(obio->bio_buf);
+       /* get tdctx and tdio */
+       tdctx = dsched_get_buf_priv(obio->bio_buf);
 
        /*
-        * XXX: hack. we don't want the assert because some null-fqmps are
+        * XXX: hack. we don't want the assert because some null-tdctxs are
         * leaking through; just dispatch them. These come from the
         * mi_startup() mess, which does the initial root mount.
         */
 #if 0
-       KKASSERT(fqmp != NULL);
+       KKASSERT(tdctx != NULL);
 #endif
-       if (fqmp == NULL) {
+       if (tdctx == NULL) {
                /* We don't handle this case, let dsched dispatch */
-               atomic_add_int(&fq_stats.no_fqmp, 1);
+               atomic_add_int(&fq_stats.no_tdctx, 1);
                return (EINVAL);
        }
 
 
-       FQ_FQMP_LOCK(fqmp);
+       FQ_THREAD_CTX_LOCK(tdctx);
 #if 0
-       kprintf("fq_queue, fqmp = %p\n", fqmp);
+       kprintf("fq_queue, tdctx = %p\n", tdctx);
 #endif
-       KKASSERT(!TAILQ_EMPTY(&fqmp->fq_priv_list));
-       TAILQ_FOREACH(fqp, &fqmp->fq_priv_list, link) {
-               if (fqp->dp == dp) {
-                       fq_reference_priv(fqp);
+       KKASSERT(!TAILQ_EMPTY(&tdctx->fq_tdio_list));
+       TAILQ_FOREACH(tdio, &tdctx->fq_tdio_list, link) {
+               if (tdio->dp == dp) {
+                       fq_thread_io_ref(tdio);
                        found = 1;
                        break;
                }
        }
-       FQ_FQMP_UNLOCK(fqmp);
+       FQ_THREAD_CTX_UNLOCK(tdctx);
        dsched_clr_buf_priv(obio->bio_buf);
-       fq_dereference_mpriv(fqmp); /* acquired on new_buf */
-       atomic_subtract_int(&fq_stats.nbufs, 1);
+       fq_thread_ctx_unref(tdctx); /* acquired on new_buf */
 
        KKASSERT(found == 1);
-       dpriv = dsched_get_disk_priv(dp);
+       diskctx = dsched_get_disk_priv(dp);
 
-       if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
-               fq_balance_self(fqp);
+       if (atomic_cmpset_int(&tdio->rebalance, 1, 0))
+               fq_balance_self(tdio);
 
        /* XXX: probably rather pointless doing this atomically */
-       max_tp = atomic_fetchadd_int(&fqp->max_tp, 0);
-       transactions = atomic_fetchadd_int(&fqp->issued, 0);
+       max_tp = atomic_fetchadd_int(&tdio->max_tp, 0);
+       transactions = atomic_fetchadd_int(&tdio->issued, 0);
 
        /* | No rate limiting || Hasn't reached limit rate | */
        if ((max_tp == 0) || (transactions < max_tp)) {
                /*
                 * Process pending bios from previous _queue() actions that
-                * have been rate-limited and hence queued in the fqp.
+                * have been rate-limited and hence queued in the tdio.
                 */
-               KKASSERT(fqp->qlength >= 0);
+               KKASSERT(tdio->qlength >= 0);
 
-               if (fqp->qlength > 0) {
-                       FQ_FQP_LOCK(fqp);
+               if (tdio->qlength > 0) {
+                       FQ_THREAD_IO_LOCK(tdio);
 
-                       TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
+                       TAILQ_FOREACH_MUTABLE(bio, &tdio->queue, link, bio2) {
                                /* Rebalance ourselves if required */
-                               if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
-                                       fq_balance_self(fqp);
-                               if ((fqp->max_tp > 0) && (fqp->issued >= fqp->max_tp))
+                               if (atomic_cmpset_int(&tdio->rebalance, 1, 0))
+                                       fq_balance_self(tdio);
+                               if ((tdio->max_tp > 0) && (tdio->issued >= tdio->max_tp))
                                        break;
-                               TAILQ_REMOVE(&fqp->queue, bio, link);
-                               --fqp->qlength;
+                               TAILQ_REMOVE(&tdio->queue, bio, link);
+                               --tdio->qlength;
 
                                /*
-                                * beware that we do have an fqp reference from the
+                                * beware that we do have an tdio reference from the
                                 * queueing
                                 */
-                               fq_dispatch(dpriv, bio, fqp);
+                               fq_dispatch(diskctx, bio, tdio);
                        }
-                       FQ_FQP_UNLOCK(fqp);
+                       FQ_THREAD_IO_UNLOCK(tdio);
                }
 
                /* Nothing is pending from previous IO, so just pass it down */
-               fq_reference_priv(fqp);
+               fq_thread_io_ref(tdio);
 
-               fq_dispatch(dpriv, obio, fqp);
+               fq_dispatch(diskctx, obio, tdio);
        } else {
                /*
                 * This thread has exceeeded its fair share,
@@ -318,8 +316,8 @@ fq_queue(struct disk *dp, struct bio *obio)
                 * we just queue requests instead of
                 * despatching them.
                 */
-               FQ_FQP_LOCK(fqp);
-               fq_reference_priv(fqp);
+               FQ_THREAD_IO_LOCK(tdio);
+               fq_thread_io_ref(tdio);
 
                /*
                 * Prioritize reads by inserting them at the front of the
@@ -330,15 +328,15 @@ fq_queue(struct disk *dp, struct bio *obio)
                 *      actually been written yet.
                 */
                if (obio->bio_buf->b_cmd == BUF_CMD_READ)
-                       TAILQ_INSERT_HEAD(&fqp->queue, obio, link);
+                       TAILQ_INSERT_HEAD(&tdio->queue, obio, link);
                else
-                       TAILQ_INSERT_TAIL(&fqp->queue, obio, link);
+                       TAILQ_INSERT_TAIL(&tdio->queue, obio, link);
 
-               ++fqp->qlength;
-               FQ_FQP_UNLOCK(fqp);
+               ++tdio->qlength;
+               FQ_THREAD_IO_UNLOCK(tdio);
        }
 
-       fq_dereference_priv(fqp);
+       fq_thread_io_unref(tdio);
        return 0;
 }
 
@@ -348,8 +346,8 @@ fq_completed(struct bio *bp)
 {
        struct bio *obio;
        int     delta;
-       struct dsched_fq_priv   *fqp;
-       struct dsched_fq_dpriv  *dpriv;
+       struct fq_thread_io     *tdio;
+       struct fq_disk_ctx      *diskctx;
        struct disk     *dp;
        int transactions, latency;
 
@@ -358,12 +356,12 @@ fq_completed(struct bio *bp)
        getmicrotime(&tv);
 
        dp = dsched_get_bio_dp(bp);
-       dpriv = dsched_get_disk_priv(dp);
-       fqp = dsched_get_bio_priv(bp);
-       KKASSERT(fqp != NULL);
-       KKASSERT(dpriv != NULL);
+       diskctx = dsched_get_disk_priv(dp);
+       tdio = dsched_get_bio_priv(bp);
+       KKASSERT(tdio != NULL);
+       KKASSERT(diskctx != NULL);
 
-       fq_reference_dpriv(dpriv);
+       fq_disk_ctx_ref(diskctx);
 
        if (!(bp->bio_buf->b_flags & B_ERROR)) {
                /*
@@ -376,14 +374,14 @@ fq_completed(struct bio *bp)
                        delta = 10000; /* default assume 10 ms */
 
                /* This is the last in-flight request and the disk is not idle yet */
-               if ((dpriv->incomplete_tp <= 1) && (!dpriv->idle)) {
-                       dpriv->idle = 1;        /* Mark disk as idle */
-                       dpriv->start_idle = tv; /* Save start idle time */
-                       wakeup(dpriv);          /* Wake up fq_dispatcher */
+               if ((diskctx->incomplete_tp <= 1) && (!diskctx->idle)) {
+                       diskctx->idle = 1;      /* Mark disk as idle */
+                       diskctx->start_idle = tv;       /* Save start idle time */
+                       wakeup(diskctx);                /* Wake up fq_dispatcher */
                }
-               atomic_subtract_int(&dpriv->incomplete_tp, 1);
-               transactions = atomic_fetchadd_int(&fqp->transactions, 1);
-               latency = atomic_fetchadd_int(&fqp->avg_latency, 0);
+               atomic_subtract_int(&diskctx->incomplete_tp, 1);
+               transactions = atomic_fetchadd_int(&tdio->transactions, 1);
+               latency = atomic_fetchadd_int(&tdio->avg_latency, 0);
 
                if (latency != 0) {
                        /* Moving averager, ((n-1)*avg_{n-1} + x) / n */
@@ -394,35 +392,35 @@ fq_completed(struct bio *bp)
                        latency = delta;
                }
 
-               fqp->avg_latency = latency;
+               tdio->avg_latency = latency;
 
                atomic_add_int(&fq_stats.transactions_completed, 1);
        }
 
-       fq_dereference_dpriv(dpriv);
+       fq_disk_ctx_unref(diskctx);
        /* decrease the ref count that was bumped for us on dispatch */
-       fq_dereference_priv(fqp);
+       fq_thread_io_unref(tdio);
 
        obio = pop_bio(bp);
        biodone(obio);
 }
 
 void
-fq_dispatch(struct dsched_fq_dpriv *dpriv, struct bio *bio,
-    struct dsched_fq_priv *fqp)
+fq_dispatch(struct fq_disk_ctx *diskctx, struct bio *bio,
+    struct fq_thread_io *tdio)
 {
        struct timeval tv;
 
-       if (dpriv->idle) {
+       if (diskctx->idle) {
                getmicrotime(&tv);
-               atomic_add_int(&dpriv->idle_time,
-                   (int)(1000000*((tv.tv_sec - dpriv->start_idle.tv_sec)) +
-                   (tv.tv_usec - dpriv->start_idle.tv_usec)));
-               dpriv->idle = 0;
+               atomic_add_int(&diskctx->idle_time,
+                   (int)(1000000*((tv.tv_sec - diskctx->start_idle.tv_sec)) +
+                   (tv.tv_usec - diskctx->start_idle.tv_usec)));
+               diskctx->idle = 0;
        }
-       dsched_strategy_async(dpriv->dp, bio, fq_completed, fqp);
+       dsched_strategy_async(diskctx->dp, bio, fq_completed, tdio);
 
-       atomic_add_int(&fqp->issued, 1);
-       atomic_add_int(&dpriv->incomplete_tp, 1);
+       atomic_add_int(&tdio->issued, 1);
+       atomic_add_int(&diskctx->incomplete_tp, 1);
        atomic_add_int(&fq_stats.transactions, 1);
 }
index 341ed19..6cc680f 100644 (file)
@@ -73,108 +73,106 @@ extern struct dsched_fq_stats     fq_stats;
 void
 fq_new_buf(struct buf *bp)
 {
-       struct dsched_fq_mpriv  *fqmp = NULL;
+       struct fq_thread_ctx    *tdctx = NULL;
 
        if (curproc != NULL) {
-               fqmp = dsched_get_proc_priv(curproc);
+               tdctx = dsched_get_proc_priv(curproc);
        } else {
                /* This is a kernel thread, so no proc info is available */
-               fqmp = dsched_get_thread_priv(curthread);
+               tdctx = dsched_get_thread_priv(curthread);
        }
 
 #if 0
        /*
         * XXX: hack. we don't want this assert because we aren't catching all
-        *      threads. mi_startup() is still getting away without an fqmp.
+        *      threads. mi_startup() is still getting away without an tdctx.
         */
 
-       /* by now we should have an fqmp. if not, something bad is going on */
-       KKASSERT(fqmp != NULL);
+       /* by now we should have an tdctx. if not, something bad is going on */
+       KKASSERT(tdctx != NULL);
 #endif
 
-       if (fqmp) {
-               atomic_add_int(&fq_stats.nbufs, 1);
-               fq_reference_mpriv(fqmp);
+       if (tdctx) {
+               fq_thread_ctx_ref(tdctx);
        }
-       dsched_set_buf_priv(bp, fqmp);
+       dsched_set_buf_priv(bp, tdctx);
        
 }
 
 void
 fq_exit_buf(struct buf *bp)
 {
-       struct dsched_fq_mpriv  *fqmp;
+       struct fq_thread_ctx    *tdctx;
 
-       fqmp = dsched_get_buf_priv(bp);
-       if (fqmp != NULL) {
+       tdctx = dsched_get_buf_priv(bp);
+       if (tdctx != NULL) {
                dsched_clr_buf_priv(bp);
-               fq_dereference_mpriv(fqmp);
-               atomic_subtract_int(&fq_stats.nbufs, 1);
+               fq_thread_ctx_unref(tdctx);
        }
 }
 
 void
 fq_new_proc(struct proc *p)
 {
-       struct dsched_fq_mpriv  *fqmp;
+       struct fq_thread_ctx    *tdctx;
 
        KKASSERT(p != NULL);
 
-       fqmp = fq_alloc_mpriv(p);
-       fq_reference_mpriv(fqmp);
-       dsched_set_proc_priv(p, fqmp);
+       tdctx = fq_thread_ctx_alloc(p);
+       fq_thread_ctx_ref(tdctx);
+       dsched_set_proc_priv(p, tdctx);
        atomic_add_int(&fq_stats.nprocs, 1);
-       fqmp->p = p;
+       tdctx->p = p;
 }
 
 void
 fq_new_thread(struct thread *td)
 {
-       struct dsched_fq_mpriv  *fqmp;
+       struct fq_thread_ctx    *tdctx;
 
        KKASSERT(td != NULL);
 
-       fqmp = fq_alloc_mpriv(NULL);
-       fq_reference_mpriv(fqmp);
-       dsched_set_thread_priv(td, fqmp);
+       tdctx = fq_thread_ctx_alloc(NULL);
+       fq_thread_ctx_ref(tdctx);
+       dsched_set_thread_priv(td, tdctx);
        atomic_add_int(&fq_stats.nthreads, 1);
-       fqmp->td = td;
+       tdctx->td = td;
 }
 
 void
 fq_exit_proc(struct proc *p)
 {
-       struct dsched_fq_mpriv  *fqmp;
+       struct fq_thread_ctx    *tdctx;
 
        KKASSERT(p != NULL);
 
-       fqmp = dsched_get_proc_priv(p);
-       KKASSERT(fqmp != NULL);
+       tdctx = dsched_get_proc_priv(p);
+       KKASSERT(tdctx != NULL);
 #if 0
-       kprintf("exit_proc: fqmp = %p\n", fqmp);
+       kprintf("exit_proc: tdctx = %p\n", tdctx);
 #endif
-       fqmp->dead = 0x1337;
+       tdctx->dead = 0x1337;
        dsched_set_proc_priv(p, 0);
-       fq_dereference_mpriv(fqmp); /* one for alloc, */
-       fq_dereference_mpriv(fqmp); /* one for ref */
+       fq_thread_ctx_unref(tdctx); /* one for alloc, */
+       fq_thread_ctx_unref(tdctx); /* one for ref */
        atomic_subtract_int(&fq_stats.nprocs, 1);
 }
 
 void
 fq_exit_thread(struct thread *td)
 {
-       struct dsched_fq_mpriv  *fqmp;
+       struct fq_thread_ctx    *tdctx;
 
        KKASSERT(td != NULL);
 
-       fqmp = dsched_get_thread_priv(td);
-       KKASSERT(fqmp != NULL);
+       tdctx = dsched_get_thread_priv(td);
+       KKASSERT(tdctx != NULL);
 #if 0
-       kprintf("exit_thread: fqmp = %p\n", fqmp);
+       kprintf("exit_thread: tdctx = %p\n", tdctx);
 #endif
-       fqmp->dead = 0x1337;
+       tdctx->dead = 0x1337;
        dsched_set_thread_priv(td, 0);
-       fq_dereference_mpriv(fqmp); /* one for alloc, */
-       fq_dereference_mpriv(fqmp); /* one for ref */
+       fq_thread_ctx_unref(tdctx); /* one for alloc, */
+       fq_thread_ctx_unref(tdctx); /* one for ref */
        atomic_subtract_int(&fq_stats.nthreads, 1);
 }
similarity index 81%
rename from sys/kern/subr_dsched.c
rename to sys/kern/kern_dsched.c
index c398b02..ac63398 100644 (file)
 #include <sys/fcntl.h>
 #include <machine/varargs.h>
 
-MALLOC_DEFINE(M_DSCHED, "dsched", "Disk Scheduler Framework allocations");
-
-static dsched_prepare_t        default_prepare;
+static dsched_prepare_t                default_prepare;
 static dsched_teardown_t       default_teardown;
-static dsched_flush_t  default_flush;
-static dsched_cancel_t default_cancel;
-static dsched_queue_t  default_queue;
-#if 0
-static biodone_t       default_completed;
-#endif
+static dsched_flush_t          default_flush;
+static dsched_cancel_t         default_cancel;
+static dsched_queue_t          default_queue;
 
-dsched_new_buf_t       *default_new_buf;
-dsched_new_proc_t      *default_new_proc;
-dsched_new_thread_t    *default_new_thread;
-dsched_exit_buf_t      *default_exit_buf;
-dsched_exit_proc_t     *default_exit_proc;
-dsched_exit_thread_t   *default_exit_thread;
+static dsched_new_buf_t                *default_new_buf;
+static dsched_new_proc_t       *default_new_proc;
+static dsched_new_thread_t     *default_new_thread;
+static dsched_exit_buf_t       *default_exit_buf;
+static dsched_exit_proc_t      *default_exit_proc;
+static dsched_exit_thread_t    *default_exit_thread;
 
 static d_open_t      dsched_dev_open;
 static d_close_t     dsched_dev_close;
@@ -89,10 +84,9 @@ static cdev_t        dsched_dev;
 static struct dsched_policy_head dsched_policy_list =
                TAILQ_HEAD_INITIALIZER(dsched_policy_list);
 
-static struct dsched_ops dsched_default_ops = {
-       .head = {
-               .name = "noop"
-       },
+static struct dsched_policy dsched_default_policy = {
+       .name = "noop",
+
        .prepare = default_prepare,
        .teardown = default_teardown,
        .flush = default_flush,
@@ -131,14 +125,14 @@ dsched_debug(int level, char *fmt, ...)
  * none specified, the default policy is used.
  */
 void
-dsched_create(struct disk *dp, const char *head_name, int unit)
+dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
 {
        char tunable_key[SPECNAMELEN + 48];
        char sched_policy[DSCHED_POLICY_NAME_LENGTH];
        struct dsched_policy *policy = NULL;
 
        /* Also look for serno stuff? */
-       /* kprintf("dsched_create() for disk %s%d\n", head_name, unit); */
+       /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
        lockmgr(&dsched_lock, LK_EXCLUSIVE);
 
        ksnprintf(tunable_key, sizeof(tunable_key), "kern.dsched.policy.%s%d",
@@ -164,9 +158,9 @@ dsched_create(struct disk *dp, const char *head_name, int unit)
        if (!policy) {
                dsched_debug(0, "No policy for %s%d specified, "
                    "or policy not found\n", head_name, unit);
-               dsched_set_policy(dp, &dsched_default_ops);
+               dsched_set_policy(dp, &dsched_default_policy);
        } else {
-               dsched_set_policy(dp, policy->d_ops);
+               dsched_set_policy(dp, policy);
        }
 
        lockmgr(&dsched_lock, LK_RELEASE);
@@ -177,18 +171,18 @@ dsched_create(struct disk *dp, const char *head_name, int unit)
  * shuts down the scheduler core and cancels all remaining bios
  */
 void
-dsched_destroy(struct disk *dp)
+dsched_disk_destroy_callback(struct disk *dp)
 {
-       struct dsched_ops *old_ops;
+       struct dsched_policy *old_policy;
 
        lockmgr(&dsched_lock, LK_EXCLUSIVE);
 
-       old_ops = dp->d_sched_ops;
-       dp->d_sched_ops = &dsched_default_ops;
-       old_ops->cancel_all(dp);
-       old_ops->teardown(dp);
-       atomic_subtract_int(&old_ops->head.ref_count, 1);
-       KKASSERT(old_ops->head.ref_count >= 0);
+       old_policy = dp->d_sched_policy;
+       dp->d_sched_policy = &dsched_default_policy;
+       old_policy->cancel_all(dp);
+       old_policy->teardown(dp);
+       atomic_subtract_int(&old_policy->ref_count, 1);
+       KKASSERT(old_policy->ref_count >= 0);
 
        lockmgr(&dsched_lock, LK_RELEASE);
 }
@@ -198,11 +192,11 @@ void
 dsched_queue(struct disk *dp, struct bio *bio)
 {
        int error = 0;
-       error = dp->d_sched_ops->bio_queue(dp, bio);
+       error = dp->d_sched_policy->bio_queue(dp, bio);
 
        if (error) {
                if (bio->bio_buf->b_cmd == BUF_CMD_FLUSH) {
-                       dp->d_sched_ops->flush(dp, bio);
+                       dp->d_sched_policy->flush(dp, bio);
                }
                dsched_strategy_raw(dp, bio);
        }
@@ -214,20 +208,20 @@ dsched_queue(struct disk *dp, struct bio *bio)
  * registers the policy in the local policy list.
  */
 int
-dsched_register(struct dsched_ops *d_ops)
+dsched_register(struct dsched_policy *d_policy)
 {
        struct dsched_policy *policy;
        int error = 0;
 
        lockmgr(&dsched_lock, LK_EXCLUSIVE);
 
-       policy = dsched_find_policy(d_ops->head.name);
+       policy = dsched_find_policy(d_policy->name);
 
        if (!policy) {
-               if ((d_ops->new_buf != NULL) || (d_ops->new_proc != NULL) ||
-                   (d_ops->new_thread != NULL)) {
+               if ((d_policy->new_buf != NULL) || (d_policy->new_proc != NULL) ||
+                   (d_policy->new_thread != NULL)) {
                        /*
-                        * Policy ops has hooks for proc/thread/buf creation,
+                        * Policy policy has hooks for proc/thread/buf creation,
                         * so check if there are already hooks for those present
                         * and if so, stop right now.
                         */
@@ -241,21 +235,19 @@ dsched_register(struct dsched_ops *d_ops)
                        }
 
                        /* If everything is fine, just register the hooks */
-                       default_new_buf = d_ops->new_buf;
-                       default_new_proc = d_ops->new_proc;
-                       default_new_thread = d_ops->new_thread;
-                       default_exit_buf = d_ops->exit_buf;
-                       default_exit_proc = d_ops->exit_proc;
-                       default_exit_thread = d_ops->exit_thread;
+                       default_new_buf = d_policy->new_buf;
+                       default_new_proc = d_policy->new_proc;
+                       default_new_thread = d_policy->new_thread;
+                       default_exit_buf = d_policy->exit_buf;
+                       default_exit_proc = d_policy->exit_proc;
+                       default_exit_thread = d_policy->exit_thread;
                }
 
-               policy = kmalloc(sizeof(struct dsched_policy), M_DSCHED, M_WAITOK);
-               policy->d_ops = d_ops;
-               TAILQ_INSERT_TAIL(&dsched_policy_list, policy, link);
-               atomic_add_int(&policy->d_ops->head.ref_count, 1);
+               TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
+               atomic_add_int(&d_policy->ref_count, 1);
        } else {
                dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
-                   d_ops->head.name);
+                   d_policy->name);
                error = 1;
        }
 
@@ -269,20 +261,19 @@ done:
  * unregisters the policy
  */
 int
-dsched_unregister(struct dsched_ops *d_ops)
+dsched_unregister(struct dsched_policy *d_policy)
 {
        struct dsched_policy *policy;
 
        lockmgr(&dsched_lock, LK_EXCLUSIVE);
-       policy = dsched_find_policy(d_ops->head.name);
+       policy = dsched_find_policy(d_policy->name);
 
        if (policy) {
-               if (policy->d_ops->head.ref_count > 1)
+               if (policy->ref_count > 1)
                        return 1;
                TAILQ_REMOVE(&dsched_policy_list, policy, link);
-               atomic_subtract_int(&policy->d_ops->head.ref_count, 1);
-               KKASSERT(policy->d_ops->head.ref_count >= 0);
-               kfree(policy, M_DSCHED);
+               atomic_subtract_int(&policy->ref_count, 1);
+               KKASSERT(policy->ref_count >= 0);
        }
        lockmgr(&dsched_lock, LK_RELEASE);
        return 0;
@@ -294,26 +285,26 @@ dsched_unregister(struct dsched_ops *d_ops)
  * enabling the new one.
  */
 int
-dsched_switch(struct disk *dp, struct dsched_ops *new_ops)
+dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
 {
-       struct dsched_ops *old_ops;
+       struct dsched_policy *old_policy;
 
        /* If we are asked to set the same policy, do nothing */
-       if (dp->d_sched_ops == new_ops)
+       if (dp->d_sched_policy == new_policy)
                return 0;
 
        /* lock everything down, diskwise */
        lockmgr(&dsched_lock, LK_EXCLUSIVE);
-       old_ops = dp->d_sched_ops;
+       old_policy = dp->d_sched_policy;
 
-       atomic_subtract_int(&dp->d_sched_ops->head.ref_count, 1);
-       KKASSERT(dp->d_sched_ops->head.ref_count >= 0);
+       atomic_subtract_int(&dp->d_sched_policy->ref_count, 1);
+       KKASSERT(dp->d_sched_policy->ref_count >= 0);
 
-       dp->d_sched_ops = &dsched_default_ops;
-       old_ops->teardown(dp);
+       dp->d_sched_policy = &dsched_default_policy;
+       old_policy->teardown(dp);
 
        /* Bring everything back to life */
-       dsched_set_policy(dp, new_ops);
+       dsched_set_policy(dp, new_policy);
                lockmgr(&dsched_lock, LK_RELEASE);
        return 0;
 }
@@ -324,7 +315,7 @@ dsched_switch(struct disk *dp, struct dsched_ops *new_ops)
  * Also initializes the core for the policy
  */
 void
-dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops)
+dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
 {
        int locked = 0;
 
@@ -334,11 +325,11 @@ dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops)
                locked = 1;
        }
 
-       new_ops->prepare(dp);
-       dp->d_sched_ops = new_ops;
-       atomic_add_int(&new_ops->head.ref_count, 1);
+       new_policy->prepare(dp);
+       dp->d_sched_policy = new_policy;
+       atomic_add_int(&new_policy->ref_count, 1);
        kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
-           new_ops->head.name);
+           new_policy->name);
 
        /* If we acquired the lock, we also get rid of it */
        if (locked)
@@ -359,7 +350,7 @@ dsched_find_policy(char *search)
        }
 
        TAILQ_FOREACH(policy, &dsched_policy_list, link) {
-               if (!strcmp(policy->d_ops->head.name, search)) {
+               if (!strcmp(policy->name, search)) {
                        policy_found = policy;
                        break;
                }
@@ -389,10 +380,10 @@ dsched_find_disk(char *search)
 }
 
 struct disk*
-dsched_disk_enumerate(struct disk *dp, struct dsched_ops *ops)
+dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy)
 {
        while ((dp = disk_enumerate(dp))) {
-               if (dp->d_sched_ops == ops)
+               if (dp->d_sched_policy == policy)
                        return dp;
        }
 
@@ -530,31 +521,31 @@ dsched_exit_thread(struct thread *td)
                default_exit_thread(td);
 }
 
-int
+static int
 default_prepare(struct disk *dp)
 {
        return 0;
 }
 
-void
+static void
 default_teardown(struct disk *dp)
 {
 
 }
 
-void
+static void
 default_flush(struct disk *dp, struct bio *bio)
 {
 
 }
 
-void
+static void
 default_cancel(struct disk *dp)
 {
 
 }
 
-int
+static int
 default_queue(struct disk *dp, struct bio *bio)
 {
        dsched_strategy_raw(dp, bio);
@@ -564,17 +555,6 @@ default_queue(struct disk *dp, struct bio *bio)
        return 0;
 }
 
-#if 0
-void
-default_completed(struct bio *bp)
-{
-       struct bio *obio;
-
-       obio = pop_bio(bp);
-       biodone(obio);
-}
-#endif
-
 /*
  * dsched device stuff
  */
@@ -592,8 +572,8 @@ dsched_dev_list_disks(struct dsched_ioctl *data)
 
        strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name));
 
-       if (dp->d_sched_ops) {
-               strncpy(data->pol_name, dp->d_sched_ops->head.name,
+       if (dp->d_sched_policy) {
+               strncpy(data->pol_name, dp->d_sched_policy->name,
                    sizeof(data->pol_name));
        } else {
                strncpy(data->pol_name, "N/A (error)", 12);
@@ -611,10 +591,10 @@ dsched_dev_list_disk(struct dsched_ioctl *data)
        while ((dp = disk_enumerate(dp))) {
                if (!strncmp(dp->d_cdev->si_name, data->dev_name,
                    sizeof(data->dev_name))) {
-                       KKASSERT(dp->d_sched_ops != NULL);
+                       KKASSERT(dp->d_sched_policy != NULL);
 
                        found = 1;
-                       strncpy(data->pol_name, dp->d_sched_ops->head.name,
+                       strncpy(data->pol_name, dp->d_sched_policy->name,
                            sizeof(data->pol_name));
                        break;
                }
@@ -636,7 +616,7 @@ dsched_dev_list_policies(struct dsched_ioctl *data)
        if (pol == NULL)
                return -1;
 
-       strncpy(data->pol_name, pol->d_ops->head.name, sizeof(data->pol_name));
+       strncpy(data->pol_name, pol->name, sizeof(data->pol_name));
        return 0;
 }
 
@@ -652,7 +632,7 @@ dsched_dev_handle_switch(char *disk, char *policy)
        if ((dp == NULL) || (pol == NULL))
                return -1;
 
-       return (dsched_switch(dp, pol->d_ops));
+       return (dsched_switch(dp, pol));
 }
 
 static int
@@ -732,7 +712,7 @@ static void
 dsched_init(void)
 {
        lockinit(&dsched_lock, "dsched lock", 0, 0);
-       dsched_register(&dsched_default_ops);
+       dsched_register(&dsched_default_policy);
 }
 
 static void
index eb4360a..f79554e 100644 (file)
@@ -531,7 +531,7 @@ disk_create(int unit, struct disk *dp, struct dev_ops *raw_ops)
                    "disk_create (end): %s%d\n",
                        raw_ops->head.name, unit);
 
-       dsched_create(dp, raw_ops->head.name, unit);
+       dsched_disk_create_callback(dp, raw_ops->head.name, unit);
        return (dp->d_rawdev);
 }
 
@@ -626,6 +626,7 @@ disk_setdiskinfo_sync(struct disk *disk, struct disk_info *info)
 void
 disk_destroy(struct disk *disk)
 {
+       dsched_disk_destroy_callback(disk);
        disk_msg_send_sync(DISK_DISK_DESTROY, disk, NULL);
        return;
 }
index 3afb317..c736582 100644 (file)
@@ -129,7 +129,7 @@ struct disk {
        struct disk_info        d_info;         /* info structure for media */
        void                    *d_dsched_priv1;/* I/O scheduler priv. data */
        void                    *d_dsched_priv2;/* I/O scheduler priv. data */
-       struct dsched_ops       *d_sched_ops;   /* I/O scheduler ops */
+       struct dsched_policy    *d_sched_policy;/* I/O scheduler policy */
        LIST_ENTRY(disk)        d_list;
 };
 
index f61dd34..dc81978 100644 (file)
@@ -91,12 +91,13 @@ typedef     void    dsched_exit_buf_t(struct buf *bp);
 typedef        void    dsched_exit_proc_t(struct proc *p);
 typedef        void    dsched_exit_thread_t(struct thread *td);
 
-struct dsched_ops {
-       struct {
-               char            name[DSCHED_POLICY_NAME_LENGTH];
-               uint64_t        uniq_id;
-               int             ref_count;
-       } head;
+
+struct dsched_policy {
+       char                    name[DSCHED_POLICY_NAME_LENGTH];
+       uint64_t                uniq_id;
+       int                     ref_count;
+
+       TAILQ_ENTRY(dsched_policy) link;
 
        dsched_prepare_t        *prepare;
        dsched_teardown_t       *teardown;
@@ -112,35 +113,19 @@ struct dsched_ops {
        dsched_exit_thread_t    *exit_thread;
 };
 
-struct dsched_policy {
-       TAILQ_ENTRY(dsched_policy) link;
-
-       struct dsched_ops       *d_ops;
-};
-
-struct dsched_object
-{
-       struct disk     *dp;
-       struct bio      *bio;
-       int             pid;
-       struct thread   *thread;
-       struct proc     *proc;
-};
-
-TAILQ_HEAD(dschedq, dsched_object);
 TAILQ_HEAD(dsched_policy_head, dsched_policy);
 
-void   dsched_create(struct disk *dp, const char *head_name, int unit);
-void   dsched_destroy(struct disk *dp);
+void   dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit);
+void   dsched_disk_destroy_callback(struct disk *dp);
 void   dsched_queue(struct disk *dp, struct bio *bio);
-int    dsched_register(struct dsched_ops *d_ops);
-int    dsched_unregister(struct dsched_ops *d_ops);
-int    dsched_switch(struct disk *dp, struct dsched_ops *new_ops);
-void   dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops);
+int    dsched_register(struct dsched_policy *d_policy);
+int    dsched_unregister(struct dsched_policy *d_policy);
+int    dsched_switch(struct disk *dp, struct dsched_policy *new_policy);
+void   dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy);
 struct dsched_policy *dsched_find_policy(char *search);
 struct disk    *dsched_find_disk(char *search);
 struct dsched_policy *dsched_policy_enumerate(struct dsched_policy *pol);
-struct disk    *dsched_disk_enumerate(struct disk *dp, struct dsched_ops *ops);
+struct disk    *dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy);
 void   dsched_cancel_bio(struct bio *bp);
 void   dsched_strategy_raw(struct disk *dp, struct bio *bp);
 void   dsched_strategy_sync(struct disk *dp, struct bio *bp);
index a395a14..91e79d4 100644 (file)
@@ -22,9 +22,9 @@ int main(void)
                err(1, "sysctlbyname");
 
        printf( "Allocations\n"
-               "FQMP:\t%d\n"
-               "FQP:\t%d\n"
-               "DPRIV:\t%d\n"
+               "thread_ctx:\t%d\n"
+               "thread_io:\t%d\n"
+               "disk_ctx:\t%d\n"
                "---------------------------------------------\n"
                "Procs/Threads tracked\n"
                "procs:\t\t%d\n"
@@ -36,14 +36,14 @@ int main(void)
                "Transactions\n"
                "Issued:\t\t%d\n"
                "Completed:\t%d\n"
-               "without FQMP:\t%d\n"
+               "without thread_ctx:\t%d\n"
                "---------------------------------------------\n"
                "Misc\n"
                "FQMP refs for buf:\t%d\n",
 
-               fq_stats.fqmp_allocations,
-               fq_stats.fqp_allocations,
-               fq_stats.dpriv_allocations,
+               fq_stats.tdctx_allocations,
+               fq_stats.tdio_allocations,
+               fq_stats.diskctx_allocations,
 
                fq_stats.nprocs,
                fq_stats.nthreads,
@@ -52,8 +52,7 @@ int main(void)
 
                fq_stats.transactions,
                fq_stats.transactions_completed,
-               fq_stats.no_fqmp,
-               fq_stats.nbufs
+               fq_stats.no_tdctx
                );