From: Alex Hornung Date: Thu, 22 Apr 2010 09:48:50 +0000 (+0000) Subject: dsched - expand framework to track threads X-Git-Tag: v2.7.3~70 X-Git-Url: https://gitweb.dragonflybsd.org/~nant/dragonfly.git/commitdiff_plain/e02e815e5b4a80e8f373bebaff9be146d3092b56 dsched - expand framework to track threads * The dsched framework now takes care of tracking threads/procs and bufs. Most of this code was factored out of dsched_fq. * fq now uses the new, much simplified API, reducing the lines of code by about 50%. * this will also allow for runtime policy switching, even to other policies that need to track procs/threads. Previously it was only possible to have one policy that tracked threads. * Now all policies can be loaded at any time and will still be able to track all the threads. * dsched_fq is now a module that can be loaded if required. Once loaded the policy is registered and ready to use with any disk. * There is also a kernel option DSCHED_FQ now; otherwise dsched_fq_load="YES" has to be set in loader.conf to be able to use fq from boot on. * Make a dsched sysctl tree. Suggested-by: Aggelos Economopoulos --- diff --git a/sys/Makefile b/sys/Makefile index ad77163e81..b02eae7559 100644 --- a/sys/Makefile +++ b/sys/Makefile @@ -14,6 +14,7 @@ SUBDIR+=${MODULES_OVERRIDE} .else SUBDIR+=bus crypto emulation firmware dev kern net netbt netgraph netproto vfs SUBDIR+=libiconv +SUBDIR+=dsched .endif .endif diff --git a/sys/Makefile.modules b/sys/Makefile.modules index bd5f3ba32a..9081671a15 100644 --- a/sys/Makefile.modules +++ b/sys/Makefile.modules @@ -9,6 +9,7 @@ SUBDIR=bus .endif SUBDIR+=crypto emulation dev kern net netbt netgraph netproto vfs SUBDIR+=libiconv +SUBDIR+=dsched .endif .include diff --git a/sys/conf/files b/sys/conf/files index d0d1d17ccb..fd4a647223 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -723,9 +723,6 @@ kern/subr_autoconf.c standard kern/subr_bus.c standard kern/subr_busdma.c standard kern/subr_devstat.c standard -dsched/fq/dsched_fq_core.c standard -dsched/fq/dsched_fq_diskops.c standard -dsched/fq/dsched_fq_procops.c standard kern/subr_disk.c standard kern/subr_disklabel32.c standard kern/subr_disklabel64.c standard @@ -1784,6 +1781,10 @@ ${OSACPI_MI_DIR}/acpi_toshiba/acpi_toshiba.c optional acpi_toshiba acpi ${OSACPI_MI_DIR}/acpi_video/acpi_video.c optional acpi_video acpi ${OSACPI_MI_DIR}/aibs/atk0110.c optional aibs acpi +#dsched stuff +dsched/fq/dsched_fq_core.c optional dsched_fq +dsched/fq/dsched_fq_diskops.c optional dsched_fq + # ACPICA code ${ACPICA_DIR}/debugger/dbcmds.c optional acpi acpi_debug ${ACPICA_DIR}/debugger/dbdisply.c optional acpi acpi_debug diff --git a/sys/conf/options b/sys/conf/options index 5a0d35e35a..31ec5f76ef 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -656,6 +656,9 @@ SCTP_MAP_LOGGING opt_sctp.h # SYSLINK opt_syslink.h +# DSCHED stuff +DSCHED_FQ opt_dsched.h + # Receive Side Scaling RSS opt_rss.h RSS_DEBUG opt_rss.h diff --git a/sys/dsched/Makefile b/sys/dsched/Makefile new file mode 100644 index 0000000000..950fa6c0a8 --- /dev/null +++ b/sys/dsched/Makefile @@ -0,0 +1,5 @@ +# $DragonFly: src/sys/dev/Makefile,v 1.12 2007/01/30 14:50:10 corecode Exp $ + +SUBDIR= fq + +.include diff --git a/sys/dsched/fq/Makefile b/sys/dsched/fq/Makefile new file mode 100644 index 0000000000..5d15d6f3be --- /dev/null +++ b/sys/dsched/fq/Makefile @@ -0,0 +1,4 @@ +KMOD= dsched_fq +SRCS= dsched_fq_core.c dsched_fq_diskops.c + +.include diff --git a/sys/dsched/fq/dsched_fq.h b/sys/dsched/fq/dsched_fq.h index 001e5eb5e7..a77bd484a4 100644 --- a/sys/dsched/fq/dsched_fq.h +++ b/sys/dsched/fq/dsched_fq.h @@ -49,65 +49,6 @@ #include #endif -#define FQ_THREAD_IO_LOCKINIT(x) lockinit(&(x)->lock, "tdiobioq", 0, LK_CANRECURSE) -#define FQ_THREAD_IO_LOCK(x) fq_thread_io_ref((x)); \ - lockmgr(&(x)->lock, LK_EXCLUSIVE) -#define FQ_THREAD_IO_UNLOCK(x) lockmgr(&(x)->lock, LK_RELEASE); \ - fq_thread_io_unref((x)); - -#define FQ_DISK_CTX_LOCKINIT(x) lockinit(&(x)->lock, "tdiodiskq", 0, LK_CANRECURSE) -#define FQ_DISK_CTX_LOCK(x) fq_disk_ctx_ref((x)); \ - lockmgr(&(x)->lock, LK_EXCLUSIVE) -#define FQ_DISK_CTX_UNLOCK(x) lockmgr(&(x)->lock, LK_RELEASE); \ - fq_disk_ctx_unref((x)) -#define FQ_DISK_CTX_LOCK_ASSERT(x) KKASSERT(lockstatus(&(x)->lock, curthread) == LK_EXCLUSIVE) - -#define FQ_GLOBAL_THREAD_CTX_LOCKINIT(x) lockinit(&fq_tdctx_lock, "tdctxglob", 0, LK_CANRECURSE) -#define FQ_GLOBAL_THREAD_CTX_LOCK(x) lockmgr(&fq_tdctx_lock, LK_EXCLUSIVE) -#define FQ_GLOBAL_THREAD_CTX_UNLOCK(x) lockmgr(&fq_tdctx_lock, LK_RELEASE) - - - -#define FQ_THREAD_CTX_LOCKINIT(x) spin_init(&(x)->lock) -#if 0 -#define FQ_THREAD_IO_LOCKINIT(x) spin_init(&(x)->lock) -#endif -#if 0 -#define FQ_DISK_CTX_LOCKINIT(x) spin_init(&(x)->lock) -#endif -#if 0 -#define FQ_GLOBAL_THREAD_CTX_LOCKINIT(x) spin_init(&fq_tdctx_lock) -#endif - -#define FQ_THREAD_CTX_LOCK(x) fq_thread_ctx_ref((x)); \ - spin_lock_wr(&(x)->lock) -#if 0 -#define FQ_THREAD_IO_LOCK(x) fq_thread_io_ref((x)); \ - spin_lock_wr(&(x)->lock) -#endif -#if 0 -#define FQ_DISK_CTX_LOCK(x) fq_disk_ctx_ref((x)); \ - spin_lock_wr(&(x)->lock) -#endif -#if 0 -#define FQ_GLOBAL_THREAD_CTX_LOCK(x) spin_lock_wr(&fq_tdctx_lock) -#endif - -#define FQ_THREAD_CTX_UNLOCK(x) spin_unlock_wr(&(x)->lock); \ - fq_thread_ctx_unref((x)) - -#if 0 -#define FQ_THREAD_IO_UNLOCK(x) spin_unlock_wr(&(x)->lock); \ - fq_thread_io_unref((x)) -#endif -#if 0 -#define FQ_DISK_CTX_UNLOCK(x) spin_unlock_wr(&(x)->lock); \ - fq_disk_ctx_unref((x)) -#endif -#if 0 -#define FQ_GLOBAL_THREAD_CTX_UNLOCK(x) spin_unlock_wr(&fq_tdctx_lock) -#endif - #define FQ_PRIO_BIAS 5 #define FQ_PRIO_MAX 10 #define FQ_PRIO_MIN 1 @@ -120,41 +61,11 @@ struct disk; struct proc; -#define FQ_LINKED_DISK_CTX 0x01 -#define FQ_LINKED_THREAD_CTX 0x02 - -struct fq_thread_io { - TAILQ_ENTRY(fq_thread_io) link; - TAILQ_ENTRY(fq_thread_io) dlink; - TAILQ_HEAD(, bio) queue; /* IO queue (bio) */ - - struct lock lock; - struct disk *dp; - struct fq_disk_ctx *diskctx; - struct fq_thread_ctx *tdctx; - struct proc *p; - - int32_t qlength; /* IO queue length */ - int32_t flags; - - int refcount; - int32_t transactions; /* IOs completed so far during current interval */ - int32_t avg_latency; /* avg latency for current interval IOs */ - int32_t interval_transactions; /* IOs completed during last interval */ - int32_t interval_avg_latency; /* avg latency for last interval IOs */ - int32_t max_tp; /* rate limit of transactions per interval */ - int32_t issued; /* IOs issued to disk (but not completed) */ - - int rebalance; /* thread needs to rebalance w/ fq_balance_self */ -}; - struct fq_disk_ctx { + struct dsched_disk_ctx head; + struct thread *td; /* dispatcher thread td */ struct thread *td_balance; /* balancer thread td */ - struct disk *dp; /* back pointer to disk struct */ - struct lock lock; - int refcount; - int avg_rq_time; /* XXX: not yet used */ int32_t incomplete_tp; /* IOs issued but not completed */ int idle; /* disk idle ? */ @@ -169,39 +80,27 @@ struct fq_disk_ctx { int disk_busy; /* disk >90% busy during cur. interval */ int64_t budgetpb[FQ_PRIO_MAX+1];/* next interval budget for each thread in each prio */ - - /* list contains all fq_thread_io for this disk */ - TAILQ_HEAD(, fq_thread_io) fq_tdio_list; /* list of thread_io of disk */ - TAILQ_ENTRY(fq_disk_ctx) link; }; -struct fq_thread_ctx { - struct proc *p; - struct thread *td; - int dead; - struct spinlock lock; - int refcount; - TAILQ_HEAD(, fq_thread_io) fq_tdio_list; /* list of thread_io */ - TAILQ_ENTRY(fq_thread_ctx) link; -}; +struct fq_thread_io { + struct dsched_thread_io head; + int32_t transactions; /* IOs completed so far during current interval */ + int32_t avg_latency; /* avg latency for current interval IOs */ + int32_t interval_transactions; /* IOs completed during last interval */ + int32_t interval_avg_latency; /* avg latency for last interval IOs */ + int32_t max_tp; /* rate limit of transactions per interval */ + int32_t issued; /* IOs issued to disk (but not completed) */ + int rebalance; /* thread needs to rebalance w/ fq_balance_self */ +}; -struct fq_thread_io *fq_thread_io_alloc(struct disk *dp, struct fq_thread_ctx *tdctx); -struct fq_disk_ctx *fq_disk_ctx_alloc(struct disk *dp); -struct fq_thread_ctx *fq_thread_ctx_alloc(struct proc *p); void fq_balance_thread(struct fq_disk_ctx *diskctx); void fq_dispatcher(struct fq_disk_ctx *diskctx); biodone_t fq_completed; -void fq_disk_ctx_ref(struct fq_disk_ctx *diskctx); -void fq_thread_io_ref(struct fq_thread_io *tdio); -void fq_thread_ctx_ref(struct fq_thread_ctx *tdctx); -void fq_disk_ctx_unref(struct fq_disk_ctx *diskctx); -void fq_thread_io_unref(struct fq_thread_io *tdio); -void fq_thread_ctx_unref(struct fq_thread_ctx *tdctx); void fq_dispatch(struct fq_disk_ctx *diskctx, struct bio *bio, struct fq_thread_io *tdio); void fq_drain(struct fq_disk_ctx *diskctx, int mode); @@ -210,20 +109,11 @@ void fq_balance_self(struct fq_thread_io *tdio); struct dsched_fq_stats { - int32_t tdctx_allocations; - int32_t tdio_allocations; - int32_t diskctx_allocations; - int32_t procs_limited; int32_t transactions; int32_t transactions_completed; int32_t cancelled; - - int32_t no_tdctx; - - int32_t nthreads; - int32_t nprocs; }; #endif /* _DSCHED_FQ_H_ */ diff --git a/sys/dsched/fq/dsched_fq_core.c b/sys/dsched/fq/dsched_fq_core.c index 01addf6889..213f2f54e1 100644 --- a/sys/dsched/fq/dsched_fq_core.c +++ b/sys/dsched/fq/dsched_fq_core.c @@ -41,287 +41,33 @@ #include #include #include -#include #include #include -#include -#include -#include #include -#include -#include -#include -#include #include +#include #include -#include #include #include -MALLOC_DECLARE(M_DSCHEDFQ); +static int dsched_fq_version_maj = 1; +static int dsched_fq_version_min = 0; -static int dsched_fq_version_maj = 0; -static int dsched_fq_version_min = 8; +/* Make sure our structs fit */ +CTASSERT(sizeof(struct fq_thread_io) <= DSCHED_THREAD_IO_MAX_SZ); +CTASSERT(sizeof(struct fq_disk_ctx) <= DSCHED_DISK_CTX_MAX_SZ); struct dsched_fq_stats fq_stats; -struct objcache_malloc_args fq_disk_ctx_malloc_args = { - sizeof(struct fq_disk_ctx), M_DSCHEDFQ }; -struct objcache_malloc_args fq_thread_io_malloc_args = { - sizeof(struct fq_thread_io), M_DSCHEDFQ }; -struct objcache_malloc_args fq_thread_ctx_malloc_args = { - sizeof(struct fq_thread_ctx), M_DSCHEDFQ }; - -static struct objcache *fq_diskctx_cache; -static struct objcache *fq_tdctx_cache; -static struct objcache *fq_tdio_cache; - -TAILQ_HEAD(, fq_thread_ctx) dsched_tdctx_list = - TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); - -struct lock fq_tdctx_lock; - extern struct dsched_policy dsched_fq_policy; -void -fq_disk_ctx_ref(struct fq_disk_ctx *diskctx) -{ - int refcount; - - refcount = atomic_fetchadd_int(&diskctx->refcount, 1); - - KKASSERT(refcount >= 0); -} - -void -fq_thread_io_ref(struct fq_thread_io *tdio) -{ - int refcount; - - refcount = atomic_fetchadd_int(&tdio->refcount, 1); - - KKASSERT(refcount >= 0); -} - -void -fq_thread_ctx_ref(struct fq_thread_ctx *tdctx) -{ - int refcount; - - refcount = atomic_fetchadd_int(&tdctx->refcount, 1); - - KKASSERT(refcount >= 0); -} - -void -fq_disk_ctx_unref(struct fq_disk_ctx *diskctx) -{ - struct fq_thread_io *tdio, *tdio2; - int refcount; - - refcount = atomic_fetchadd_int(&diskctx->refcount, -1); - - - KKASSERT(refcount >= 0 || refcount <= -0x400); - - if (refcount == 1) { - atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ -#if 1 - kprintf("diskctx (%p) destruction started, trace:\n", diskctx); - print_backtrace(4); -#endif - lockmgr(&diskctx->lock, LK_EXCLUSIVE); - TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) { - TAILQ_REMOVE(&diskctx->fq_tdio_list, tdio, dlink); - tdio->flags &= ~FQ_LINKED_DISK_CTX; - fq_thread_io_unref(tdio); - } - lockmgr(&diskctx->lock, LK_RELEASE); - - objcache_put(fq_diskctx_cache, diskctx); - atomic_subtract_int(&fq_stats.diskctx_allocations, 1); - } -} - -void -fq_thread_io_unref(struct fq_thread_io *tdio) -{ - struct fq_thread_ctx *tdctx; - struct fq_disk_ctx *diskctx; - int refcount; - - refcount = atomic_fetchadd_int(&tdio->refcount, -1); - - KKASSERT(refcount >= 0 || refcount <= -0x400); - - if (refcount == 1) { - atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ -#if 0 - kprintf("tdio (%p) destruction started, trace:\n", tdio); - print_backtrace(8); -#endif - diskctx = tdio->diskctx; - KKASSERT(diskctx != NULL); - KKASSERT(tdio->qlength == 0); - - if (tdio->flags & FQ_LINKED_DISK_CTX) { - lockmgr(&diskctx->lock, LK_EXCLUSIVE); - - TAILQ_REMOVE(&diskctx->fq_tdio_list, tdio, dlink); - tdio->flags &= ~FQ_LINKED_DISK_CTX; - - lockmgr(&diskctx->lock, LK_RELEASE); - } - - if (tdio->flags & FQ_LINKED_THREAD_CTX) { - tdctx = tdio->tdctx; - KKASSERT(tdctx != NULL); - - spin_lock_wr(&tdctx->lock); - - TAILQ_REMOVE(&tdctx->fq_tdio_list, tdio, link); - tdio->flags &= ~FQ_LINKED_THREAD_CTX; - - spin_unlock_wr(&tdctx->lock); - } - - objcache_put(fq_tdio_cache, tdio); - atomic_subtract_int(&fq_stats.tdio_allocations, 1); -#if 0 - fq_disk_ctx_unref(diskctx); -#endif - } -} - -void -fq_thread_ctx_unref(struct fq_thread_ctx *tdctx) -{ - struct fq_thread_io *tdio, *tdio2; - int refcount; - - refcount = atomic_fetchadd_int(&tdctx->refcount, -1); - - KKASSERT(refcount >= 0 || refcount <= -0x400); - - if (refcount == 1) { - atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ -#if 0 - kprintf("tdctx (%p) destruction started, trace:\n", tdctx); - print_backtrace(8); -#endif - FQ_GLOBAL_THREAD_CTX_LOCK(); - - TAILQ_FOREACH_MUTABLE(tdio, &tdctx->fq_tdio_list, link, tdio2) { - TAILQ_REMOVE(&tdctx->fq_tdio_list, tdio, link); - tdio->flags &= ~FQ_LINKED_THREAD_CTX; - fq_thread_io_unref(tdio); - } - TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); - - FQ_GLOBAL_THREAD_CTX_UNLOCK(); - - objcache_put(fq_tdctx_cache, tdctx); - atomic_subtract_int(&fq_stats.tdctx_allocations, 1); - } -} - - -struct fq_thread_io * -fq_thread_io_alloc(struct disk *dp, struct fq_thread_ctx *tdctx) -{ - struct fq_thread_io *tdio; -#if 0 - fq_disk_ctx_ref(dsched_get_disk_priv(dp)); -#endif - tdio = objcache_get(fq_tdio_cache, M_WAITOK); - bzero(tdio, sizeof(struct fq_thread_io)); - - /* XXX: maybe we do need another ref for the disk list for tdio */ - fq_thread_io_ref(tdio); - - FQ_THREAD_IO_LOCKINIT(tdio); - tdio->dp = dp; - - tdio->diskctx = dsched_get_disk_priv(dp); - TAILQ_INIT(&tdio->queue); - - TAILQ_INSERT_TAIL(&tdio->diskctx->fq_tdio_list, tdio, dlink); - tdio->flags |= FQ_LINKED_DISK_CTX; - - if (tdctx) { - tdio->tdctx = tdctx; - tdio->p = tdctx->p; - - /* Put the tdio in the tdctx list */ - FQ_THREAD_CTX_LOCK(tdctx); - TAILQ_INSERT_TAIL(&tdctx->fq_tdio_list, tdio, link); - FQ_THREAD_CTX_UNLOCK(tdctx); - tdio->flags |= FQ_LINKED_THREAD_CTX; - } - - atomic_add_int(&fq_stats.tdio_allocations, 1); - return tdio; -} - - -struct fq_disk_ctx * -fq_disk_ctx_alloc(struct disk *dp) -{ - struct fq_disk_ctx *diskctx; - - diskctx = objcache_get(fq_diskctx_cache, M_WAITOK); - bzero(diskctx, sizeof(struct fq_disk_ctx)); - fq_disk_ctx_ref(diskctx); - diskctx->dp = dp; - diskctx->avg_rq_time = 0; - diskctx->incomplete_tp = 0; - FQ_DISK_CTX_LOCKINIT(diskctx); - TAILQ_INIT(&diskctx->fq_tdio_list); - - atomic_add_int(&fq_stats.diskctx_allocations, 1); - return diskctx; -} - - -struct fq_thread_ctx * -fq_thread_ctx_alloc(struct proc *p) -{ - struct fq_thread_ctx *tdctx; - struct fq_thread_io *tdio; - struct disk *dp = NULL; - - tdctx = objcache_get(fq_tdctx_cache, M_WAITOK); - bzero(tdctx, sizeof(struct fq_thread_ctx)); - fq_thread_ctx_ref(tdctx); -#if 0 - kprintf("fq_thread_ctx_alloc, new tdctx = %p\n", tdctx); -#endif - FQ_THREAD_CTX_LOCKINIT(tdctx); - TAILQ_INIT(&tdctx->fq_tdio_list); - tdctx->p = p; - - while ((dp = dsched_disk_enumerate(dp, &dsched_fq_policy))) { - tdio = fq_thread_io_alloc(dp, tdctx); -#if 0 - fq_thread_io_ref(tdio); -#endif - } - - FQ_GLOBAL_THREAD_CTX_LOCK(); - TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); - FQ_GLOBAL_THREAD_CTX_UNLOCK(); - - atomic_add_int(&fq_stats.tdctx_allocations, 1); - return tdctx; -} - - void fq_dispatcher(struct fq_disk_ctx *diskctx) { - struct fq_thread_ctx *tdctx; - struct fq_thread_io *tdio, *tdio2; + struct dsched_thread_ctx *tdctx; + struct dsched_thread_io *ds_tdio, *ds_tdio2; + struct fq_thread_io *tdio; struct bio *bio, *bio2; int idle; @@ -333,16 +79,13 @@ fq_dispatcher(struct fq_disk_ctx *diskctx) tdctx = dsched_get_thread_priv(curthread); KKASSERT(tdctx != NULL); - tdio = fq_thread_io_alloc(diskctx->dp, tdctx); -#if 0 - fq_thread_io_ref(tdio); -#endif + tdio = (struct fq_thread_io *)dsched_thread_io_alloc(diskctx->head.dp, tdctx, &dsched_fq_policy); - FQ_DISK_CTX_LOCK(diskctx); + DSCHED_DISK_CTX_LOCK(&diskctx->head); for(;;) { idle = 0; /* sleep ~60 ms */ - if ((lksleep(diskctx, &diskctx->lock, 0, "fq_dispatcher", hz/15) == 0)) { + if ((lksleep(diskctx, &diskctx->head.lock, 0, "fq_dispatcher", hz/15) == 0)) { /* * We've been woken up; this either means that we are * supposed to die away nicely or that the disk is idle. @@ -353,7 +96,7 @@ fq_dispatcher(struct fq_disk_ctx *diskctx) fq_drain(diskctx, FQ_DRAIN_FLUSH); /* Now we can safely unlock and exit */ - FQ_DISK_CTX_UNLOCK(diskctx); + DSCHED_DISK_CTX_UNLOCK(&diskctx->head); kprintf("fq_dispatcher is peacefully dying\n"); lwkt_exit(); /* NOTREACHED */ @@ -375,11 +118,12 @@ fq_dispatcher(struct fq_disk_ctx *diskctx) * to dispatch a few requests from each tdio as to ensure * real fairness. */ - TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) { - if (tdio->qlength == 0) + TAILQ_FOREACH_MUTABLE(ds_tdio, &diskctx->head.tdio_list, dlink, ds_tdio2) { + tdio = (struct fq_thread_io *)ds_tdio; + if (tdio->head.qlength == 0) continue; - FQ_THREAD_IO_LOCK(tdio); + DSCHED_THREAD_IO_LOCK(&tdio->head); if (atomic_cmpset_int(&tdio->rebalance, 1, 0)) fq_balance_self(tdio); /* @@ -391,15 +135,15 @@ fq_dispatcher(struct fq_disk_ctx *diskctx) tdio->max_tp += 5; } - TAILQ_FOREACH_MUTABLE(bio, &tdio->queue, link, bio2) { + TAILQ_FOREACH_MUTABLE(bio, &tdio->head.queue, link, bio2) { if (atomic_cmpset_int(&tdio->rebalance, 1, 0)) fq_balance_self(tdio); if ((tdio->max_tp > 0) && ((tdio->issued >= tdio->max_tp))) break; - TAILQ_REMOVE(&tdio->queue, bio, link); - --tdio->qlength; + TAILQ_REMOVE(&tdio->head.queue, bio, link); + --tdio->head.qlength; /* * beware that we do have an tdio reference @@ -407,7 +151,7 @@ fq_dispatcher(struct fq_disk_ctx *diskctx) */ fq_dispatch(diskctx, bio, tdio); } - FQ_THREAD_IO_UNLOCK(tdio); + DSCHED_THREAD_IO_UNLOCK(&tdio->head); } } @@ -416,22 +160,23 @@ fq_dispatcher(struct fq_disk_ctx *diskctx) void fq_balance_thread(struct fq_disk_ctx *diskctx) { - struct fq_thread_io *tdio, *tdio2; + struct dsched_thread_io *ds_tdio; + struct fq_thread_io *tdio; struct timeval tv, old_tv; int64_t total_budget, product; int64_t budget[FQ_PRIO_MAX+1]; int n, i, sum, total_disk_time; int lost_bits; - FQ_DISK_CTX_LOCK(diskctx); + DSCHED_DISK_CTX_LOCK(&diskctx->head); getmicrotime(&diskctx->start_interval); for (;;) { /* sleep ~1s */ - if ((lksleep(curthread, &diskctx->lock, 0, "fq_balancer", hz/2) == 0)) { + if ((lksleep(curthread, &diskctx->head.lock, 0, "fq_balancer", hz/2) == 0)) { if (__predict_false(diskctx->die)) { - FQ_DISK_CTX_UNLOCK(diskctx); + DSCHED_DISK_CTX_UNLOCK(&diskctx->head); lwkt_exit(); } } @@ -460,7 +205,8 @@ fq_balance_thread(struct fq_disk_ctx *diskctx) diskctx->idle_time = 0; lost_bits = 0; - TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) { + TAILQ_FOREACH(ds_tdio, &diskctx->head.tdio_list, dlink) { + tdio = (struct fq_thread_io *)ds_tdio; tdio->interval_avg_latency = tdio->avg_latency; tdio->interval_transactions = tdio->transactions; if (tdio->interval_transactions > 0) { @@ -473,12 +219,12 @@ fq_balance_thread(struct fq_disk_ctx *diskctx) total_budget >>= 1; } total_budget += product; - ++budget[(tdio->p) ? tdio->p->p_ionice : 0]; + ++budget[(tdio->head.p) ? tdio->head.p->p_ionice : 0]; KKASSERT(total_budget >= 0); dsched_debug(LOG_INFO, "%d) avg_latency = %d, transactions = %d, ioprio = %d\n", n, tdio->interval_avg_latency, tdio->interval_transactions, - (tdio->p) ? tdio->p->p_ionice : 0); + (tdio->head.p) ? tdio->head.p->p_ionice : 0); ++n; } else { tdio->max_tp = 0; @@ -525,7 +271,8 @@ fq_balance_thread(struct fq_disk_ctx *diskctx) } dsched_debug(4, "disk is %d%% busy\n", diskctx->disk_busy); - TAILQ_FOREACH(tdio, &diskctx->fq_tdio_list, dlink) { + TAILQ_FOREACH(ds_tdio, &diskctx->head.tdio_list, dlink) { + tdio = (struct fq_thread_io *)ds_tdio; tdio->rebalance = 1; } @@ -551,15 +298,15 @@ fq_balance_self(struct fq_thread_io *tdio) { transactions = (int64_t)tdio->interval_transactions; avg_latency = (int64_t)tdio->interval_avg_latency; - diskctx = tdio->diskctx; + diskctx = (struct fq_disk_ctx *)tdio->head.diskctx; #if 0 /* XXX: do we really require the lock? */ - FQ_DISK_CTX_LOCK_ASSERT(diskctx); + DSCHED_DISK_CTX_LOCK_ASSERT(diskctx); #endif used_budget = ((int64_t)avg_latency * transactions); - budget = diskctx->budgetpb[(tdio->p) ? tdio->p->p_ionice : 0]; + budget = diskctx->budgetpb[(tdio->head.p) ? tdio->head.p->p_ionice : 0]; if (used_budget > 0) { dsched_debug(LOG_INFO, @@ -589,63 +336,53 @@ do_fqstats(SYSCTL_HANDLER_ARGS) return (sysctl_handle_opaque(oidp, &fq_stats, sizeof(struct dsched_fq_stats), req)); } - -SYSCTL_PROC(_kern, OID_AUTO, fq_stats, CTLTYPE_OPAQUE|CTLFLAG_RD, - 0, sizeof(struct dsched_fq_stats), do_fqstats, "fq_stats", - "dsched_fq statistics"); - - -static void -fq_init(void) -{ - -} - -static void -fq_uninit(void) -{ - -} - -static void -fq_earlyinit(void) +static int +fq_mod_handler(module_t mod, int type, void *unused) { - fq_tdio_cache = objcache_create("fq-tdio-cache", 0, 0, - NULL, NULL, NULL, - objcache_malloc_alloc, - objcache_malloc_free, - &fq_thread_io_malloc_args ); - - fq_tdctx_cache = objcache_create("fq-tdctx-cache", 0, 0, - NULL, NULL, NULL, - objcache_malloc_alloc, - objcache_malloc_free, - &fq_thread_ctx_malloc_args ); - - FQ_GLOBAL_THREAD_CTX_LOCKINIT(); + static struct sysctl_ctx_list sysctl_ctx; + static struct sysctl_oid *oid; + static char version[16]; + int error; - fq_diskctx_cache = objcache_create("fq-diskctx-cache", 0, 0, - NULL, NULL, NULL, - objcache_malloc_alloc, - objcache_malloc_free, - &fq_disk_ctx_malloc_args ); - - bzero(&fq_stats, sizeof(struct dsched_fq_stats)); - - dsched_register(&dsched_fq_policy); - - kprintf("FQ scheduler policy version %d.%d loaded\n", + ksnprintf(version, sizeof(version), "%d.%d", dsched_fq_version_maj, dsched_fq_version_min); -} -static void -fq_earlyuninit(void) -{ - return; -} + switch (type) { + case MOD_LOAD: + bzero(&fq_stats, sizeof(struct dsched_fq_stats)); + if ((error = dsched_register(&dsched_fq_policy))) + return (error); + + sysctl_ctx_init(&sysctl_ctx); + oid = SYSCTL_ADD_NODE(&sysctl_ctx, + SYSCTL_STATIC_CHILDREN(_dsched), + OID_AUTO, + "fq", + CTLFLAG_RD, 0, ""); + + SYSCTL_ADD_PROC(&sysctl_ctx, SYSCTL_CHILDREN(oid), + OID_AUTO, "stats", CTLTYPE_OPAQUE|CTLFLAG_RD, + 0, 0, do_fqstats, "S,dsched_fq_stats", "fq statistics"); + + SYSCTL_ADD_STRING(&sysctl_ctx, SYSCTL_CHILDREN(oid), + OID_AUTO, "version", CTLFLAG_RD, version, 0, "fq version"); + + kprintf("FQ scheduler policy version %d.%d loaded\n", + dsched_fq_version_maj, dsched_fq_version_min); + break; + + case MOD_UNLOAD: + if ((error = dsched_unregister(&dsched_fq_policy))) + return (error); + sysctl_ctx_free(&sysctl_ctx); + kprintf("FQ scheduler policy unloaded\n"); + break; + + default: + break; + } -SYSINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, fq_init, NULL); -SYSUNINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, fq_uninit, NULL); + return 0; +} -SYSINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, fq_earlyinit, NULL); -SYSUNINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, fq_earlyuninit, NULL); +DSCHED_POLICY_MODULE(dsched_fq, fq_mod_handler); diff --git a/sys/dsched/fq/dsched_fq_diskops.c b/sys/dsched/fq/dsched_fq_diskops.c index 84c49e20b9..965eae82df 100644 --- a/sys/dsched/fq/dsched_fq_diskops.c +++ b/sys/dsched/fq/dsched_fq_diskops.c @@ -41,90 +41,42 @@ #include #include #include -#include #include #include -#include -#include -#include #include -#include -#include -#include -#include #include +#include #include -#include #include #include - -MALLOC_DEFINE(M_DSCHEDFQ, "dschedfq", "fq dsched allocs"); - static dsched_prepare_t fq_prepare; static dsched_teardown_t fq_teardown; -static dsched_flush_t fq_flush; static dsched_cancel_t fq_cancel; static dsched_queue_t fq_queue; -/* These are in _procops */ -dsched_new_buf_t fq_new_buf; -dsched_new_proc_t fq_new_proc; -dsched_new_thread_t fq_new_thread; -dsched_exit_buf_t fq_exit_buf; -dsched_exit_proc_t fq_exit_proc; -dsched_exit_thread_t fq_exit_thread; - extern struct dsched_fq_stats fq_stats; -extern struct lock fq_tdctx_lock; -extern TAILQ_HEAD(, fq_thread_ctx) dsched_tdctx_list; -extern struct callout fq_callout; struct dsched_policy dsched_fq_policy = { .name = "fq", .prepare = fq_prepare, .teardown = fq_teardown, - .flush = fq_flush, .cancel_all = fq_cancel, - .bio_queue = fq_queue, - - .new_buf = fq_new_buf, - .new_proc = fq_new_proc, - .new_thread = fq_new_thread, - .exit_buf = fq_exit_buf, - .exit_proc = fq_exit_proc, - .exit_thread = fq_exit_thread, + .bio_queue = fq_queue }; - - static int -fq_prepare(struct disk *dp) +fq_prepare(struct dsched_disk_ctx *ds_diskctx) { - struct fq_disk_ctx *diskctx; - struct fq_thread_ctx *tdctx; - struct fq_thread_io *tdio; + struct fq_disk_ctx *diskctx = (struct fq_disk_ctx *)ds_diskctx; struct thread *td_core, *td_balance; - diskctx = fq_disk_ctx_alloc(dp); - fq_disk_ctx_ref(diskctx); - dsched_set_disk_priv(dp, diskctx); - - FQ_GLOBAL_THREAD_CTX_LOCK(); - TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { - tdio = fq_thread_io_alloc(dp, tdctx); -#if 0 - fq_thread_io_ref(tdio); -#endif - } - FQ_GLOBAL_THREAD_CTX_UNLOCK(); - lwkt_create((void (*)(void *))fq_dispatcher, diskctx, &td_core, NULL, - TDF_MPSAFE, -1, "fq_dispatch_%s", dp->d_cdev->si_name); + TDF_MPSAFE, -1, "fq_dispatch_%s", ds_diskctx->dp->d_cdev->si_name); lwkt_create((void (*)(void *))fq_balance_thread, diskctx, &td_balance, - NULL, TDF_MPSAFE, -1, "fq_balance_%s", dp->d_cdev->si_name); + NULL, TDF_MPSAFE, -1, "fq_balance_%s", ds_diskctx->dp->d_cdev->si_name); diskctx->td_balance = td_balance; return 0; @@ -133,11 +85,9 @@ fq_prepare(struct disk *dp) static void -fq_teardown(struct disk *dp) +fq_teardown(struct dsched_disk_ctx *ds_diskctx) { - struct fq_disk_ctx *diskctx; - - diskctx = dsched_get_disk_priv(dp); + struct fq_disk_ctx *diskctx = (struct fq_disk_ctx *)ds_diskctx; KKASSERT(diskctx != NULL); /* Basically kill the dispatcher thread */ @@ -150,11 +100,6 @@ fq_teardown(struct disk *dp) tsleep(diskctx, 0, "fq_dispatcher", hz/10); /* wait 100 ms */ wakeup(diskctx->td_balance); wakeup(diskctx); - - fq_disk_ctx_unref(diskctx); /* from prepare */ - fq_disk_ctx_unref(diskctx); /* from alloc */ - - dsched_set_disk_priv(dp, NULL); } @@ -162,17 +107,19 @@ fq_teardown(struct disk *dp) void fq_drain(struct fq_disk_ctx *diskctx, int mode) { - struct fq_thread_io *tdio, *tdio2; + struct dsched_thread_io *ds_tdio, *ds_tdio2; + struct fq_thread_io *tdio; struct bio *bio, *bio2; - TAILQ_FOREACH_MUTABLE(tdio, &diskctx->fq_tdio_list, dlink, tdio2) { - if (tdio->qlength == 0) + TAILQ_FOREACH_MUTABLE(ds_tdio, &diskctx->head.tdio_list, dlink, ds_tdio2) { + tdio = (struct fq_thread_io *)ds_tdio; + if (tdio->head.qlength == 0) continue; - FQ_THREAD_IO_LOCK(tdio); - TAILQ_FOREACH_MUTABLE(bio, &tdio->queue, link, bio2) { - TAILQ_REMOVE(&tdio->queue, bio, link); - --tdio->qlength; + DSCHED_THREAD_IO_LOCK(&tdio->head); + TAILQ_FOREACH_MUTABLE(bio, &tdio->head.queue, link, bio2) { + TAILQ_REMOVE(&tdio->head.queue, bio, link); + --tdio->head.qlength; if (__predict_false(mode == FQ_DRAIN_CANCEL)) { /* FQ_DRAIN_CANCEL */ dsched_cancel_bio(bio); @@ -180,93 +127,48 @@ fq_drain(struct fq_disk_ctx *diskctx, int mode) /* Release ref acquired on fq_queue */ /* XXX: possible failure point */ - fq_thread_io_unref(tdio); + dsched_thread_io_unref(&tdio->head); } else { /* FQ_DRAIN_FLUSH */ fq_dispatch(diskctx, bio, tdio); } } - FQ_THREAD_IO_UNLOCK(tdio); + DSCHED_THREAD_IO_UNLOCK(&tdio->head); } return; } - static void -fq_flush(struct disk *dp, struct bio *bio) +fq_cancel(struct dsched_disk_ctx *ds_diskctx) { - /* we don't do anything here */ -} + struct fq_disk_ctx *diskctx = (struct fq_disk_ctx *)ds_diskctx; - -static void -fq_cancel(struct disk *dp) -{ - struct fq_disk_ctx *diskctx; - - diskctx = dsched_get_disk_priv(dp); KKASSERT(diskctx != NULL); /* * all bios not in flight are queued in their respective tdios. * good thing we have a list of tdios per disk diskctx. */ - FQ_DISK_CTX_LOCK(diskctx); + DSCHED_DISK_CTX_LOCK(&diskctx->head); fq_drain(diskctx, FQ_DRAIN_CANCEL); - FQ_DISK_CTX_UNLOCK(diskctx); + DSCHED_DISK_CTX_UNLOCK(&diskctx->head); } static int -fq_queue(struct disk *dp, struct bio *obio) +fq_queue(struct dsched_disk_ctx *ds_diskctx, struct dsched_thread_io *ds_tdio, struct bio *obio) { struct bio *bio, *bio2; - struct fq_thread_ctx *tdctx; struct fq_thread_io *tdio; struct fq_disk_ctx *diskctx; - int found = 0; int max_tp, transactions; /* We don't handle flushes, let dsched dispatch them */ if (__predict_false(obio->bio_buf->b_cmd == BUF_CMD_FLUSH)) return (EINVAL); - /* get tdctx and tdio */ - tdctx = dsched_get_buf_priv(obio->bio_buf); - - /* - * XXX: hack. we don't want the assert because some null-tdctxs are - * leaking through; just dispatch them. These come from the - * mi_startup() mess, which does the initial root mount. - */ -#if 0 - KKASSERT(tdctx != NULL); -#endif - if (tdctx == NULL) { - /* We don't handle this case, let dsched dispatch */ - atomic_add_int(&fq_stats.no_tdctx, 1); - return (EINVAL); - } - - - FQ_THREAD_CTX_LOCK(tdctx); -#if 0 - kprintf("fq_queue, tdctx = %p\n", tdctx); -#endif - KKASSERT(!TAILQ_EMPTY(&tdctx->fq_tdio_list)); - TAILQ_FOREACH(tdio, &tdctx->fq_tdio_list, link) { - if (tdio->dp == dp) { - fq_thread_io_ref(tdio); - found = 1; - break; - } - } - FQ_THREAD_CTX_UNLOCK(tdctx); - dsched_clr_buf_priv(obio->bio_buf); - fq_thread_ctx_unref(tdctx); /* acquired on new_buf */ - - KKASSERT(found == 1); - diskctx = dsched_get_disk_priv(dp); + tdio = (struct fq_thread_io *)ds_tdio; + diskctx = (struct fq_disk_ctx *)ds_diskctx; if (atomic_cmpset_int(&tdio->rebalance, 1, 0)) fq_balance_self(tdio); @@ -280,19 +182,19 @@ fq_queue(struct disk *dp, struct bio *obio) * Process pending bios from previous _queue() actions that * have been rate-limited and hence queued in the tdio. */ - KKASSERT(tdio->qlength >= 0); + KKASSERT(tdio->head.qlength >= 0); - if (tdio->qlength > 0) { - FQ_THREAD_IO_LOCK(tdio); + if (tdio->head.qlength > 0) { + DSCHED_THREAD_IO_LOCK(&tdio->head); - TAILQ_FOREACH_MUTABLE(bio, &tdio->queue, link, bio2) { + TAILQ_FOREACH_MUTABLE(bio, &tdio->head.queue, link, bio2) { /* Rebalance ourselves if required */ if (atomic_cmpset_int(&tdio->rebalance, 1, 0)) fq_balance_self(tdio); if ((tdio->max_tp > 0) && (tdio->issued >= tdio->max_tp)) break; - TAILQ_REMOVE(&tdio->queue, bio, link); - --tdio->qlength; + TAILQ_REMOVE(&tdio->head.queue, bio, link); + --tdio->head.qlength; /* * beware that we do have an tdio reference from the @@ -300,11 +202,11 @@ fq_queue(struct disk *dp, struct bio *obio) */ fq_dispatch(diskctx, bio, tdio); } - FQ_THREAD_IO_UNLOCK(tdio); + DSCHED_THREAD_IO_UNLOCK(&tdio->head); } /* Nothing is pending from previous IO, so just pass it down */ - fq_thread_io_ref(tdio); + dsched_thread_io_ref(&tdio->head); fq_dispatch(diskctx, obio, tdio); } else { @@ -315,8 +217,8 @@ fq_queue(struct disk *dp, struct bio *obio) * we just queue requests instead of * despatching them. */ - FQ_THREAD_IO_LOCK(tdio); - fq_thread_io_ref(tdio); + DSCHED_THREAD_IO_LOCK(&tdio->head); + dsched_thread_io_ref(&tdio->head); /* * Prioritize reads by inserting them at the front of the @@ -327,15 +229,14 @@ fq_queue(struct disk *dp, struct bio *obio) * actually been written yet. */ if (obio->bio_buf->b_cmd == BUF_CMD_READ) - TAILQ_INSERT_HEAD(&tdio->queue, obio, link); + TAILQ_INSERT_HEAD(&tdio->head.queue, obio, link); else - TAILQ_INSERT_TAIL(&tdio->queue, obio, link); + TAILQ_INSERT_TAIL(&tdio->head.queue, obio, link); - ++tdio->qlength; - FQ_THREAD_IO_UNLOCK(tdio); + ++tdio->head.qlength; + DSCHED_THREAD_IO_UNLOCK(&tdio->head); } - fq_thread_io_unref(tdio); return 0; } @@ -360,7 +261,7 @@ fq_completed(struct bio *bp) KKASSERT(tdio != NULL); KKASSERT(diskctx != NULL); - fq_disk_ctx_ref(diskctx); + dsched_disk_ctx_ref(&diskctx->head); atomic_subtract_int(&diskctx->incomplete_tp, 1); if (!(bp->bio_buf->b_flags & B_ERROR)) { @@ -396,9 +297,9 @@ fq_completed(struct bio *bp) atomic_add_int(&fq_stats.transactions_completed, 1); } - fq_disk_ctx_unref(diskctx); + dsched_disk_ctx_unref(&diskctx->head); /* decrease the ref count that was bumped for us on dispatch */ - fq_thread_io_unref(tdio); + dsched_thread_io_unref(&tdio->head); obio = pop_bio(bp); biodone(obio); @@ -417,7 +318,7 @@ fq_dispatch(struct fq_disk_ctx *diskctx, struct bio *bio, (tv.tv_usec - diskctx->start_idle.tv_usec))); diskctx->idle = 0; } - dsched_strategy_async(diskctx->dp, bio, fq_completed, tdio); + dsched_strategy_async(diskctx->head.dp, bio, fq_completed, tdio); atomic_add_int(&tdio->issued, 1); atomic_add_int(&diskctx->incomplete_tp, 1); diff --git a/sys/dsched/fq/dsched_fq_procops.c b/sys/dsched/fq/dsched_fq_procops.c deleted file mode 100644 index 6cc680f7e2..0000000000 --- a/sys/dsched/fq/dsched_fq_procops.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Alex Hornung - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -MALLOC_DECLARE(M_DSCHEDFQ); - -dsched_new_buf_t fq_new_buf; -dsched_new_proc_t fq_new_proc; -dsched_new_thread_t fq_new_thread; -dsched_exit_buf_t fq_exit_buf; -dsched_exit_proc_t fq_exit_proc; -dsched_exit_thread_t fq_exit_thread; - -extern struct dsched_fq_stats fq_stats; - -void -fq_new_buf(struct buf *bp) -{ - struct fq_thread_ctx *tdctx = NULL; - - if (curproc != NULL) { - tdctx = dsched_get_proc_priv(curproc); - } else { - /* This is a kernel thread, so no proc info is available */ - tdctx = dsched_get_thread_priv(curthread); - } - -#if 0 - /* - * XXX: hack. we don't want this assert because we aren't catching all - * threads. mi_startup() is still getting away without an tdctx. - */ - - /* by now we should have an tdctx. if not, something bad is going on */ - KKASSERT(tdctx != NULL); -#endif - - if (tdctx) { - fq_thread_ctx_ref(tdctx); - } - dsched_set_buf_priv(bp, tdctx); - -} - -void -fq_exit_buf(struct buf *bp) -{ - struct fq_thread_ctx *tdctx; - - tdctx = dsched_get_buf_priv(bp); - if (tdctx != NULL) { - dsched_clr_buf_priv(bp); - fq_thread_ctx_unref(tdctx); - } -} - -void -fq_new_proc(struct proc *p) -{ - struct fq_thread_ctx *tdctx; - - KKASSERT(p != NULL); - - tdctx = fq_thread_ctx_alloc(p); - fq_thread_ctx_ref(tdctx); - dsched_set_proc_priv(p, tdctx); - atomic_add_int(&fq_stats.nprocs, 1); - tdctx->p = p; -} - -void -fq_new_thread(struct thread *td) -{ - struct fq_thread_ctx *tdctx; - - KKASSERT(td != NULL); - - tdctx = fq_thread_ctx_alloc(NULL); - fq_thread_ctx_ref(tdctx); - dsched_set_thread_priv(td, tdctx); - atomic_add_int(&fq_stats.nthreads, 1); - tdctx->td = td; -} - -void -fq_exit_proc(struct proc *p) -{ - struct fq_thread_ctx *tdctx; - - KKASSERT(p != NULL); - - tdctx = dsched_get_proc_priv(p); - KKASSERT(tdctx != NULL); -#if 0 - kprintf("exit_proc: tdctx = %p\n", tdctx); -#endif - tdctx->dead = 0x1337; - dsched_set_proc_priv(p, 0); - fq_thread_ctx_unref(tdctx); /* one for alloc, */ - fq_thread_ctx_unref(tdctx); /* one for ref */ - atomic_subtract_int(&fq_stats.nprocs, 1); -} - -void -fq_exit_thread(struct thread *td) -{ - struct fq_thread_ctx *tdctx; - - KKASSERT(td != NULL); - - tdctx = dsched_get_thread_priv(td); - KKASSERT(tdctx != NULL); -#if 0 - kprintf("exit_thread: tdctx = %p\n", tdctx); -#endif - tdctx->dead = 0x1337; - dsched_set_thread_priv(td, 0); - fq_thread_ctx_unref(tdctx); /* one for alloc, */ - fq_thread_ctx_unref(tdctx); /* one for ref */ - atomic_subtract_int(&fq_stats.nthreads, 1); -} diff --git a/sys/kern/kern_dsched.c b/sys/kern/kern_dsched.c index ac63398205..55b1935cf8 100644 --- a/sys/kern/kern_dsched.c +++ b/sys/kern/kern_dsched.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -53,19 +52,13 @@ #include #include +MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); + static dsched_prepare_t default_prepare; static dsched_teardown_t default_teardown; -static dsched_flush_t default_flush; static dsched_cancel_t default_cancel; static dsched_queue_t default_queue; -static dsched_new_buf_t *default_new_buf; -static dsched_new_proc_t *default_new_proc; -static dsched_new_thread_t *default_new_thread; -static dsched_exit_buf_t *default_exit_buf; -static dsched_exit_proc_t *default_exit_proc; -static dsched_exit_thread_t *default_exit_thread; - static d_open_t dsched_dev_open; static d_close_t dsched_dev_close; static d_ioctl_t dsched_dev_ioctl; @@ -75,12 +68,30 @@ static int dsched_dev_list_disk(struct dsched_ioctl *data); static int dsched_dev_list_policies(struct dsched_ioctl *data); static int dsched_dev_handle_switch(char *disk, char *policy); +static int dsched_inited = 0; struct lock dsched_lock; static int dsched_debug_enable = 0; -static int dsched_test1 = 0; static cdev_t dsched_dev; +struct dsched_stats dsched_stats; + +struct objcache_malloc_args dsched_disk_ctx_malloc_args = { + DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; +struct objcache_malloc_args dsched_thread_io_malloc_args = { + DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; +struct objcache_malloc_args dsched_thread_ctx_malloc_args = { + DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; + +static struct objcache *dsched_diskctx_cache; +static struct objcache *dsched_tdctx_cache; +static struct objcache *dsched_tdio_cache; + +TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = + TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); + +struct lock dsched_tdctx_lock; + static struct dsched_policy_head dsched_policy_list = TAILQ_HEAD_INITIALIZER(dsched_policy_list); @@ -89,12 +100,10 @@ static struct dsched_policy dsched_default_policy = { .prepare = default_prepare, .teardown = default_teardown, - .flush = default_flush, .cancel_all = default_cancel, - .bio_queue = default_queue, + .bio_queue = default_queue }; - static struct dev_ops dsched_dev_ops = { { "dsched", 0, 0 }, .d_open = dsched_dev_open, @@ -179,8 +188,9 @@ dsched_disk_destroy_callback(struct disk *dp) old_policy = dp->d_sched_policy; dp->d_sched_policy = &dsched_default_policy; - old_policy->cancel_all(dp); - old_policy->teardown(dp); + old_policy->cancel_all(dsched_get_disk_priv(dp)); + old_policy->teardown(dsched_get_disk_priv(dp)); + policy_destroy(dp); atomic_subtract_int(&old_policy->ref_count, 1); KKASSERT(old_policy->ref_count >= 0); @@ -191,15 +201,45 @@ dsched_disk_destroy_callback(struct disk *dp) void dsched_queue(struct disk *dp, struct bio *bio) { - int error = 0; - error = dp->d_sched_policy->bio_queue(dp, bio); + struct dsched_thread_ctx *tdctx; + struct dsched_thread_io *tdio; + struct dsched_disk_ctx *diskctx; - if (error) { - if (bio->bio_buf->b_cmd == BUF_CMD_FLUSH) { - dp->d_sched_policy->flush(dp, bio); + int found = 0, error = 0; + + tdctx = dsched_get_buf_priv(bio->bio_buf); + if (tdctx == NULL) { + /* We don't handle this case, let dsched dispatch */ + atomic_add_int(&dsched_stats.no_tdctx, 1); + dsched_strategy_raw(dp, bio); + return; + } + + DSCHED_THREAD_CTX_LOCK(tdctx); + + KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); + TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { + if (tdio->dp == dp) { + dsched_thread_io_ref(tdio); + found = 1; + break; } + } + + DSCHED_THREAD_CTX_UNLOCK(tdctx); + dsched_clr_buf_priv(bio->bio_buf); + dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ + + KKASSERT(found == 1); + diskctx = dsched_get_disk_priv(dp); + dsched_disk_ctx_ref(diskctx); + error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); + + if (error) { dsched_strategy_raw(dp, bio); } + dsched_disk_ctx_unref(diskctx); + dsched_thread_io_unref(tdio); } @@ -218,40 +258,14 @@ dsched_register(struct dsched_policy *d_policy) policy = dsched_find_policy(d_policy->name); if (!policy) { - if ((d_policy->new_buf != NULL) || (d_policy->new_proc != NULL) || - (d_policy->new_thread != NULL)) { - /* - * Policy policy has hooks for proc/thread/buf creation, - * so check if there are already hooks for those present - * and if so, stop right now. - */ - if ((default_new_buf != NULL) || (default_new_proc != NULL) || - (default_new_thread != NULL) || (default_exit_proc != NULL) || - (default_exit_thread != NULL)) { - dsched_debug(LOG_ERR, "A policy with " - "proc/thread/buf hooks is already in use!"); - error = 1; - goto done; - } - - /* If everything is fine, just register the hooks */ - default_new_buf = d_policy->new_buf; - default_new_proc = d_policy->new_proc; - default_new_thread = d_policy->new_thread; - default_exit_buf = d_policy->exit_buf; - default_exit_proc = d_policy->exit_proc; - default_exit_thread = d_policy->exit_thread; - } - TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); atomic_add_int(&d_policy->ref_count, 1); } else { dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", d_policy->name); - error = 1; + error = EEXIST; } -done: lockmgr(&dsched_lock, LK_RELEASE); return error; } @@ -269,11 +283,13 @@ dsched_unregister(struct dsched_policy *d_policy) policy = dsched_find_policy(d_policy->name); if (policy) { - if (policy->ref_count > 1) - return 1; + if (policy->ref_count > 1) { + lockmgr(&dsched_lock, LK_RELEASE); + return EBUSY; + } TAILQ_REMOVE(&dsched_policy_list, policy, link); atomic_subtract_int(&policy->ref_count, 1); - KKASSERT(policy->ref_count >= 0); + KKASSERT(policy->ref_count == 0); } lockmgr(&dsched_lock, LK_RELEASE); return 0; @@ -297,15 +313,16 @@ dsched_switch(struct disk *dp, struct dsched_policy *new_policy) lockmgr(&dsched_lock, LK_EXCLUSIVE); old_policy = dp->d_sched_policy; - atomic_subtract_int(&dp->d_sched_policy->ref_count, 1); - KKASSERT(dp->d_sched_policy->ref_count >= 0); + atomic_subtract_int(&old_policy->ref_count, 1); + KKASSERT(old_policy->ref_count >= 0); dp->d_sched_policy = &dsched_default_policy; - old_policy->teardown(dp); + old_policy->teardown(dsched_get_disk_priv(dp)); + policy_destroy(dp); /* Bring everything back to life */ dsched_set_policy(dp, new_policy); - lockmgr(&dsched_lock, LK_RELEASE); + lockmgr(&dsched_lock, LK_RELEASE); return 0; } @@ -325,7 +342,8 @@ dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) locked = 1; } - new_policy->prepare(dp); + policy_new(dp, new_policy); + new_policy->prepare(dsched_get_disk_priv(dp)); dp->d_sched_policy = new_policy; atomic_add_int(&new_policy->ref_count, 1); kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, @@ -459,6 +477,7 @@ dsched_strategy_sync(struct disk *dp, struct bio *bio) bp->b_resid = nbp->b_resid; bp->b_error = nbp->b_error; biodone(bio); + relpbuf(nbp, NULL); } void @@ -477,84 +496,429 @@ dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *p dev_dstrategy(dp->d_rawdev, nbio); } +void +dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) +{ + int refcount; + + refcount = atomic_fetchadd_int(&diskctx->refcount, 1); + + KKASSERT(refcount >= 0); +} + +void +dsched_thread_io_ref(struct dsched_thread_io *tdio) +{ + int refcount; + + refcount = atomic_fetchadd_int(&tdio->refcount, 1); + + KKASSERT(refcount >= 0); +} + +void +dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) +{ + int refcount; + + refcount = atomic_fetchadd_int(&tdctx->refcount, 1); + + KKASSERT(refcount >= 0); +} + +void +dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) +{ + struct dsched_thread_io *tdio, *tdio2; + int refcount; + + refcount = atomic_fetchadd_int(&diskctx->refcount, -1); + + + KKASSERT(refcount >= 0 || refcount <= -0x400); + + if (refcount == 1) { + atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ +#if 0 + kprintf("diskctx (%p) destruction started, trace:\n", diskctx); + print_backtrace(4); +#endif + lockmgr(&diskctx->lock, LK_EXCLUSIVE); + TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { + TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); + tdio->flags &= ~DSCHED_LINKED_DISK_CTX; + dsched_thread_io_unref(tdio); + } + lockmgr(&diskctx->lock, LK_RELEASE); + if (diskctx->dp->d_sched_policy->destroy_diskctx) + diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); + objcache_put(dsched_diskctx_cache, diskctx); + atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); + } +} + +void +dsched_thread_io_unref(struct dsched_thread_io *tdio) +{ + struct dsched_thread_ctx *tdctx; + struct dsched_disk_ctx *diskctx; + int refcount; + + refcount = atomic_fetchadd_int(&tdio->refcount, -1); + + KKASSERT(refcount >= 0 || refcount <= -0x400); + + if (refcount == 1) { + atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ +#if 0 + kprintf("tdio (%p) destruction started, trace:\n", tdio); + print_backtrace(8); +#endif + diskctx = tdio->diskctx; + KKASSERT(diskctx != NULL); + KKASSERT(tdio->qlength == 0); + + if (tdio->flags & DSCHED_LINKED_DISK_CTX) { + lockmgr(&diskctx->lock, LK_EXCLUSIVE); + + TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); + tdio->flags &= ~DSCHED_LINKED_DISK_CTX; + + lockmgr(&diskctx->lock, LK_RELEASE); + } + + if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { + tdctx = tdio->tdctx; + KKASSERT(tdctx != NULL); + + lockmgr(&tdctx->lock, LK_EXCLUSIVE); + + TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); + tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; + + lockmgr(&tdctx->lock, LK_RELEASE); + } + if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) + tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); + objcache_put(dsched_tdio_cache, tdio); + atomic_subtract_int(&dsched_stats.tdio_allocations, 1); +#if 0 + dsched_disk_ctx_unref(diskctx); +#endif + } +} + +void +dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) +{ + struct dsched_thread_io *tdio, *tdio2; + int refcount; + + refcount = atomic_fetchadd_int(&tdctx->refcount, -1); + + KKASSERT(refcount >= 0 || refcount <= -0x400); + + if (refcount == 1) { + atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ +#if 0 + kprintf("tdctx (%p) destruction started, trace:\n", tdctx); + print_backtrace(8); +#endif + DSCHED_GLOBAL_THREAD_CTX_LOCK(); + + TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { + TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); + tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; + dsched_thread_io_unref(tdio); + } + TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); + + DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); + + objcache_put(dsched_tdctx_cache, tdctx); + atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); + } +} + + +struct dsched_thread_io * +dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, + struct dsched_policy *pol) +{ + struct dsched_thread_io *tdio; +#if 0 + dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); +#endif + tdio = objcache_get(dsched_tdio_cache, M_WAITOK); + bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); + + /* XXX: maybe we do need another ref for the disk list for tdio */ + dsched_thread_io_ref(tdio); + + DSCHED_THREAD_IO_LOCKINIT(tdio); + tdio->dp = dp; + + tdio->diskctx = dsched_get_disk_priv(dp); + TAILQ_INIT(&tdio->queue); + + if (pol->new_tdio) + pol->new_tdio(tdio); + + TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); + tdio->flags |= DSCHED_LINKED_DISK_CTX; + + if (tdctx) { + tdio->tdctx = tdctx; + tdio->p = tdctx->p; + + /* Put the tdio in the tdctx list */ + DSCHED_THREAD_CTX_LOCK(tdctx); + TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); + DSCHED_THREAD_CTX_UNLOCK(tdctx); + tdio->flags |= DSCHED_LINKED_THREAD_CTX; + } + + atomic_add_int(&dsched_stats.tdio_allocations, 1); + return tdio; +} + + +struct dsched_disk_ctx * +dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) +{ + struct dsched_disk_ctx *diskctx; + + diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); + bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); + dsched_disk_ctx_ref(diskctx); + diskctx->dp = dp; + DSCHED_DISK_CTX_LOCKINIT(diskctx); + TAILQ_INIT(&diskctx->tdio_list); + + atomic_add_int(&dsched_stats.diskctx_allocations, 1); + if (pol->new_diskctx) + pol->new_diskctx(diskctx); + return diskctx; +} + + +struct dsched_thread_ctx * +dsched_thread_ctx_alloc(struct proc *p) +{ + struct dsched_thread_ctx *tdctx; + struct dsched_thread_io *tdio; + struct disk *dp = NULL; + + tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); + bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); + dsched_thread_ctx_ref(tdctx); +#if 0 + kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); +#endif + DSCHED_THREAD_CTX_LOCKINIT(tdctx); + TAILQ_INIT(&tdctx->tdio_list); + tdctx->p = p; + + /* XXX */ + while ((dp = disk_enumerate(dp))) { + tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); + } + + DSCHED_GLOBAL_THREAD_CTX_LOCK(); + TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); + DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); + + atomic_add_int(&dsched_stats.tdctx_allocations, 1); + /* XXX: no callback here */ + return tdctx; +} + +void +policy_new(struct disk *dp, struct dsched_policy *pol) { + struct dsched_thread_ctx *tdctx; + struct dsched_disk_ctx *diskctx; + struct dsched_thread_io *tdio; + + diskctx = dsched_disk_ctx_alloc(dp, pol); + dsched_disk_ctx_ref(diskctx); + dsched_set_disk_priv(dp, diskctx); + + DSCHED_GLOBAL_THREAD_CTX_LOCK(); + TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { + tdio = dsched_thread_io_alloc(dp, tdctx, pol); + } + DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); + +} + +void +policy_destroy(struct disk *dp) { + struct dsched_disk_ctx *diskctx; + + diskctx = dsched_get_disk_priv(dp); + KKASSERT(diskctx != NULL); + + dsched_disk_ctx_unref(diskctx); /* from prepare */ + dsched_disk_ctx_unref(diskctx); /* from alloc */ + + dsched_set_disk_priv(dp, NULL); +} + void dsched_new_buf(struct buf *bp) { - if (default_new_buf != NULL) - default_new_buf(bp); + struct dsched_thread_ctx *tdctx = NULL; + + if (dsched_inited == 0) + return; + + if (curproc != NULL) { + tdctx = dsched_get_proc_priv(curproc); + } else { + /* This is a kernel thread, so no proc info is available */ + tdctx = dsched_get_thread_priv(curthread); + } + +#if 0 + /* + * XXX: hack. we don't want this assert because we aren't catching all + * threads. mi_startup() is still getting away without an tdctx. + */ + + /* by now we should have an tdctx. if not, something bad is going on */ + KKASSERT(tdctx != NULL); +#endif + + if (tdctx) { + dsched_thread_ctx_ref(tdctx); + } + dsched_set_buf_priv(bp, tdctx); } void dsched_exit_buf(struct buf *bp) { - if (default_exit_buf != NULL) - default_exit_buf(bp); + struct dsched_thread_ctx *tdctx; + + tdctx = dsched_get_buf_priv(bp); + if (tdctx != NULL) { + dsched_clr_buf_priv(bp); + dsched_thread_ctx_unref(tdctx); + } } void dsched_new_proc(struct proc *p) { - if (default_new_proc != NULL) - default_new_proc(p); + struct dsched_thread_ctx *tdctx; + + if (dsched_inited == 0) + return; + + KKASSERT(p != NULL); + + tdctx = dsched_thread_ctx_alloc(p); + tdctx->p = p; + dsched_thread_ctx_ref(tdctx); + + dsched_set_proc_priv(p, tdctx); + atomic_add_int(&dsched_stats.nprocs, 1); } void dsched_new_thread(struct thread *td) { - if (default_new_thread != NULL) - default_new_thread(td); + struct dsched_thread_ctx *tdctx; + + if (dsched_inited == 0) + return; + + KKASSERT(td != NULL); + + tdctx = dsched_thread_ctx_alloc(NULL); + tdctx->td = td; + dsched_thread_ctx_ref(tdctx); + + dsched_set_thread_priv(td, tdctx); + atomic_add_int(&dsched_stats.nthreads, 1); } void dsched_exit_proc(struct proc *p) { - if (default_exit_proc != NULL) - default_exit_proc(p); + struct dsched_thread_ctx *tdctx; + + if (dsched_inited == 0) + return; + + KKASSERT(p != NULL); + + tdctx = dsched_get_proc_priv(p); + KKASSERT(tdctx != NULL); + + tdctx->dead = 0xDEAD; + dsched_set_proc_priv(p, 0); + + dsched_thread_ctx_unref(tdctx); /* one for alloc, */ + dsched_thread_ctx_unref(tdctx); /* one for ref */ + atomic_subtract_int(&dsched_stats.nprocs, 1); } void dsched_exit_thread(struct thread *td) { - if (default_exit_thread != NULL) - default_exit_thread(td); + struct dsched_thread_ctx *tdctx; + + if (dsched_inited == 0) + return; + + KKASSERT(td != NULL); + + tdctx = dsched_get_thread_priv(td); + KKASSERT(tdctx != NULL); + + tdctx->dead = 0xDEAD; + dsched_set_thread_priv(td, 0); + + dsched_thread_ctx_unref(tdctx); /* one for alloc, */ + dsched_thread_ctx_unref(tdctx); /* one for ref */ + atomic_subtract_int(&dsched_stats.nthreads, 1); } +/* DEFAULT NOOP POLICY */ + static int -default_prepare(struct disk *dp) +default_prepare(struct dsched_disk_ctx *diskctx) { return 0; } static void -default_teardown(struct disk *dp) -{ - -} - -static void -default_flush(struct disk *dp, struct bio *bio) +default_teardown(struct dsched_disk_ctx *diskctx) { } static void -default_cancel(struct disk *dp) +default_cancel(struct dsched_disk_ctx *diskctx) { } static int -default_queue(struct disk *dp, struct bio *bio) +default_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, + struct bio *bio) { - dsched_strategy_raw(dp, bio); + dsched_strategy_raw(diskctx->dp, bio); #if 0 - dsched_strategy_async(dp, bio, default_completed, NULL); + dsched_strategy_async(diskctx->dp, bio, default_completed, NULL); #endif return 0; } + /* * dsched device stuff */ @@ -703,6 +1067,11 @@ dsched_dev_ioctl(struct dev_ioctl_args *ap) return(error); } + + + + + /* * SYSINIT stuff */ @@ -711,8 +1080,32 @@ dsched_dev_ioctl(struct dev_ioctl_args *ap) static void dsched_init(void) { + dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &dsched_thread_io_malloc_args ); + + dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &dsched_thread_ctx_malloc_args ); + + dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &dsched_disk_ctx_malloc_args ); + + bzero(&dsched_stats, sizeof(struct dsched_stats)); + lockinit(&dsched_lock, "dsched lock", 0, 0); + DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); + dsched_register(&dsched_default_policy); + + dsched_inited = 1; } static void @@ -737,15 +1130,24 @@ dsched_dev_uninit(void) destroy_dev(dsched_dev); } -SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_FIRST, dsched_init, NULL); -SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_ANY, dsched_uninit, NULL); +SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); +SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL); SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL); /* * SYSCTL stuff */ -SYSCTL_INT(_kern, OID_AUTO, dsched_debug, CTLFLAG_RW, &dsched_debug_enable, - 0, "Enable dsched debugging"); -SYSCTL_INT(_kern, OID_AUTO, dsched_test1, CTLFLAG_RW, &dsched_test1, - 0, "Switch dsched test1 method"); +static int +do_dsched_stats(SYSCTL_HANDLER_ARGS) +{ + return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); +} + +SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, + "Disk Scheduler Framework (dsched) magic"); +SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, + 0, "Enable dsched debugging"); +SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, + 0, sizeof(struct dsched_stats), do_dsched_stats, "dsched_stats", + "dsched statistics"); diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index f79554e9fd..23acd0a5b1 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -523,6 +523,8 @@ disk_create(int unit, struct disk *dp, struct dev_ops *raw_ops) dp->d_cdev->si_disk = dp; + dsched_disk_create_callback(dp, raw_ops->head.name, unit); + lwkt_gettoken(&ilock, &disklist_token); LIST_INSERT_HEAD(&disklist, dp, d_list); lwkt_reltoken(&ilock); @@ -531,7 +533,6 @@ disk_create(int unit, struct disk *dp, struct dev_ops *raw_ops) "disk_create (end): %s%d\n", raw_ops->head.name, unit); - dsched_disk_create_callback(dp, raw_ops->head.name, unit); return (dp->d_rawdev); } diff --git a/sys/sys/dsched.h b/sys/sys/dsched.h index dc81978906..749d897e43 100644 --- a/sys/sys/dsched.h +++ b/sys/sys/dsched.h @@ -54,6 +54,9 @@ #ifndef _SYS_MSGPORT_H_ #include #endif +#ifndef _SYS_SYSCTL_H_ +#include +#endif #define DSCHED_POLICY_NAME_LENGTH 64 @@ -79,17 +82,58 @@ #define dsched_get_bio_stime(bio) ((bio)?((bio)->bio_caller_info3.lvalue):0) -typedef int dsched_prepare_t(struct disk *dp); -typedef void dsched_teardown_t(struct disk *dp); -typedef void dsched_flush_t(struct disk *dp, struct bio *bio); -typedef void dsched_cancel_t(struct disk *dp); -typedef int dsched_queue_t(struct disk *dp, struct bio *bio); -typedef void dsched_new_buf_t(struct buf *bp); -typedef void dsched_new_proc_t(struct proc *p); -typedef void dsched_new_thread_t(struct thread *td); -typedef void dsched_exit_buf_t(struct buf *bp); -typedef void dsched_exit_proc_t(struct proc *p); -typedef void dsched_exit_thread_t(struct thread *td); +struct dsched_thread_ctx { + TAILQ_ENTRY(dsched_thread_ctx) link; + + TAILQ_HEAD(, dsched_thread_io) tdio_list; /* list of thread_io */ + struct lock lock; + + int32_t refcount; + + struct proc *p; + struct thread *td; + int32_t dead; +}; + +struct dsched_disk_ctx { + TAILQ_ENTRY(dsched_disk_ctx) link; + + TAILQ_HEAD(, dsched_thread_io) tdio_list; /* list of thread_io of disk */ + struct lock lock; + + int32_t refcount; + + struct disk *dp; /* back pointer to disk struct */ +}; + +struct dsched_thread_io { + TAILQ_ENTRY(dsched_thread_io) link; + TAILQ_ENTRY(dsched_thread_io) dlink; + + TAILQ_HEAD(, bio) queue; /* IO queue (bio) */ + struct lock lock; + int32_t qlength;/* IO queue length */ + + int32_t refcount; + + int32_t flags; + + struct disk *dp; + struct dsched_disk_ctx *diskctx; + struct dsched_thread_ctx *tdctx; + struct proc *p; +}; + +typedef int dsched_prepare_t(struct dsched_disk_ctx *diskctx); +typedef void dsched_teardown_t(struct dsched_disk_ctx *diskctx); +typedef void dsched_cancel_t(struct dsched_disk_ctx *diskctx); +typedef int dsched_queue_t(struct dsched_disk_ctx *diskctx, + struct dsched_thread_io *tdio, struct bio *bio); + +typedef void dsched_new_tdio_t(struct dsched_thread_io *tdio); +typedef void dsched_new_diskctx_t(struct dsched_disk_ctx *diskctx); +typedef void dsched_destroy_tdio_t(struct dsched_thread_io *tdio); +typedef void dsched_destroy_diskctx_t(struct dsched_disk_ctx *diskctx); struct dsched_policy { @@ -101,20 +145,58 @@ struct dsched_policy { dsched_prepare_t *prepare; dsched_teardown_t *teardown; - dsched_flush_t *flush; dsched_cancel_t *cancel_all; dsched_queue_t *bio_queue; - dsched_new_buf_t *new_buf; - dsched_new_proc_t *new_proc; - dsched_new_thread_t *new_thread; - dsched_exit_buf_t *exit_buf; - dsched_exit_proc_t *exit_proc; - dsched_exit_thread_t *exit_thread; + dsched_new_tdio_t *new_tdio; + dsched_new_diskctx_t *new_diskctx; + dsched_destroy_tdio_t *destroy_tdio; + dsched_destroy_diskctx_t *destroy_diskctx; }; TAILQ_HEAD(dsched_policy_head, dsched_policy); + +#define DSCHED_THREAD_IO_LOCKINIT(x) lockinit(&(x)->lock, "tdiobioq", 0, LK_CANRECURSE) +#define DSCHED_THREAD_IO_LOCK(x) dsched_thread_io_ref((x)); \ + lockmgr(&(x)->lock, LK_EXCLUSIVE) +#define DSCHED_THREAD_IO_UNLOCK(x) lockmgr(&(x)->lock, LK_RELEASE); \ + dsched_thread_io_unref((x)) + +#define DSCHED_DISK_CTX_LOCKINIT(x) lockinit(&(x)->lock, "tdiodiskq", 0, LK_CANRECURSE) +#define DSCHED_DISK_CTX_LOCK(x) dsched_disk_ctx_ref((x)); \ + lockmgr(&(x)->lock, LK_EXCLUSIVE) +#define DSCHED_DISK_CTX_UNLOCK(x) lockmgr(&(x)->lock, LK_RELEASE); \ + dsched_disk_ctx_unref((x)) +#define DSCHED_DISK_CTX_LOCK_ASSERT(x) KKASSERT(lockstatus(&(x)->lock, curthread) == LK_EXCLUSIVE) + +#define DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(x) lockinit(&dsched_tdctx_lock, "tdctxglob", 0, LK_CANRECURSE) +#define DSCHED_GLOBAL_THREAD_CTX_LOCK(x) lockmgr(&dsched_tdctx_lock, LK_EXCLUSIVE) +#define DSCHED_GLOBAL_THREAD_CTX_UNLOCK(x) lockmgr(&dsched_tdctx_lock, LK_RELEASE) + +#define DSCHED_THREAD_CTX_LOCKINIT(x) lockinit(&(x)->lock, "tdctx", 0, LK_CANRECURSE) +#define DSCHED_THREAD_CTX_LOCK(x) dsched_thread_ctx_ref((x)); \ + lockmgr(&(x)->lock, LK_EXCLUSIVE) +#define DSCHED_THREAD_CTX_UNLOCK(x) lockmgr(&(x)->lock, LK_RELEASE);\ + dsched_thread_ctx_unref((x)) + +#define DSCHED_LINKED_DISK_CTX 0x01 +#define DSCHED_LINKED_THREAD_CTX 0x02 + +#define DSCHED_THREAD_CTX_MAX_SZ sizeof(struct dsched_thread_ctx) +#define DSCHED_THREAD_IO_MAX_SZ 256 +#define DSCHED_DISK_CTX_MAX_SZ 256 + +#define DSCHED_POLICY_MODULE(name, evh) \ +static moduledata_t name##_mod = { \ + #name, \ + evh, \ + NULL \ +}; \ +DECLARE_MODULE(name, name##_mod, SI_SUB_PRE_DRIVERS, SI_ORDER_MIDDLE) + +SYSCTL_DECL(_dsched); + void dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit); void dsched_disk_destroy_callback(struct disk *dp); void dsched_queue(struct disk *dp, struct bio *bio); @@ -131,6 +213,30 @@ void dsched_strategy_raw(struct disk *dp, struct bio *bp); void dsched_strategy_sync(struct disk *dp, struct bio *bp); void dsched_strategy_async(struct disk *dp, struct bio *bp, biodone_t *done, void *priv); int dsched_debug(int level, char *fmt, ...) __printflike(2, 3); + +void policy_new(struct disk *dp, struct dsched_policy *pol); +void policy_destroy(struct disk *dp); + +void dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx); +void dsched_thread_io_ref(struct dsched_thread_io *tdio); +void dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx); +void dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx); +void dsched_thread_io_unref(struct dsched_thread_io *tdio); +void dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx); + +struct dsched_thread_io *dsched_thread_io_alloc(struct disk *dp, + struct dsched_thread_ctx *tdctx, struct dsched_policy *pol); +struct dsched_disk_ctx *dsched_disk_ctx_alloc(struct disk *dp, + struct dsched_policy *pol); +struct dsched_thread_ctx *dsched_thread_ctx_alloc(struct proc *p); + +typedef void dsched_new_buf_t(struct buf *bp); +typedef void dsched_new_proc_t(struct proc *p); +typedef void dsched_new_thread_t(struct thread *td); +typedef void dsched_exit_buf_t(struct buf *bp); +typedef void dsched_exit_proc_t(struct proc *p); +typedef void dsched_exit_thread_t(struct thread *td); + dsched_new_buf_t dsched_new_buf; dsched_new_proc_t dsched_new_proc; dsched_new_thread_t dsched_new_thread; @@ -153,4 +259,15 @@ struct dsched_ioctl { char pol_name[DSCHED_NAME_LENGTH]; }; +struct dsched_stats { + int32_t tdctx_allocations; + int32_t tdio_allocations; + int32_t diskctx_allocations; + + int32_t no_tdctx; + + int32_t nthreads; + int32_t nprocs; +}; + #endif /* _SYS_DSCHED_H_ */ diff --git a/test/dsched/Makefile b/test/dsched/Makefile new file mode 100644 index 0000000000..fa470ac718 --- /dev/null +++ b/test/dsched/Makefile @@ -0,0 +1,11 @@ +TARGETS= dschedstats + +CFLAGS= -O -g + +all: $(TARGETS) + +dschedstats: dsched_stats.c + $(CC) $(CFLAGS) dsched_stats.c -o ${.TARGET} + +clean: + rm -f $(TARGETS) diff --git a/test/dsched/dsched_stats.c b/test/dsched/dsched_stats.c new file mode 100644 index 0000000000..7a40a33922 --- /dev/null +++ b/test/dsched/dsched_stats.c @@ -0,0 +1,48 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../../sys/sys/dsched.h" + + +int main(void) +{ + struct dsched_stats dsched_stats; + size_t n = sizeof(struct dsched_stats); + + if (sysctlbyname("dsched.stats", &dsched_stats, &n, NULL, 0) != 0) + err(1, "sysctlbyname"); + + printf( "Allocations\n" + "thread_ctx:\t%d\n" + "thread_io:\t%d\n" + "disk_ctx:\t%d\n" + "---------------------------------------------\n" + "Procs/Threads tracked\n" + "procs:\t\t%d\n" + "threads:\t%d\n" + "---------------------------------------------\n" + "Transactions\n" + "w/o thread_ctx:\t%d\n", + + dsched_stats.tdctx_allocations, + dsched_stats.tdio_allocations, + dsched_stats.diskctx_allocations, + + dsched_stats.nprocs, + dsched_stats.nthreads, + + dsched_stats.no_tdctx + ); + + + return 0; +} diff --git a/test/dsched_fq/fqstats.c b/test/dsched_fq/fqstats.c index 52b07f5322..58e6463984 100644 --- a/test/dsched_fq/fqstats.c +++ b/test/dsched_fq/fqstats.c @@ -18,38 +18,22 @@ int main(void) struct dsched_fq_stats fq_stats; size_t n = sizeof(struct dsched_fq_stats); - if (sysctlbyname("kern.fq_stats", &fq_stats, &n, NULL, 0) != 0) + if (sysctlbyname("dsched.fq.stats", &fq_stats, &n, NULL, 0) != 0) err(1, "sysctlbyname"); - printf( "Allocations\n" - "thread_ctx:\t%d\n" - "thread_io:\t%d\n" - "disk_ctx:\t%d\n" - "---------------------------------------------\n" - "Procs/Threads tracked\n" - "procs:\t\t%d\n" - "threads:\t%d\n" - "---------------------------------------------\n" - "Proccesses\n" + printf( "Proccesses\n" "Rate limited:\t%d\n" "---------------------------------------------\n" "Transactions\n" "Issued:\t\t%d\n" "Completed:\t%d\n" - "without thread_ctx:\t%d\n", - - fq_stats.tdctx_allocations, - fq_stats.tdio_allocations, - fq_stats.diskctx_allocations, - - fq_stats.nprocs, - fq_stats.nthreads, + "Cancelled:\t%d\n", fq_stats.procs_limited, fq_stats.transactions, fq_stats.transactions_completed, - fq_stats.no_tdctx + fq_stats.cancelled );