2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/diskslice.h>
43 #include <sys/malloc.h>
44 #include <machine/md_var.h>
45 #include <sys/ctype.h>
46 #include <sys/syslog.h>
47 #include <sys/device.h>
48 #include <sys/msgport.h>
49 #include <sys/msgport2.h>
51 #include <sys/dsched.h>
52 #include <sys/fcntl.h>
53 #include <machine/varargs.h>
55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs");
57 static dsched_prepare_t noop_prepare;
58 static dsched_teardown_t noop_teardown;
59 static dsched_cancel_t noop_cancel;
60 static dsched_queue_t noop_queue;
62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name);
64 static int dsched_inited = 0;
65 static int default_set = 0;
67 struct lock dsched_lock;
68 static int dsched_debug_enable = 0;
70 struct dsched_stats dsched_stats;
72 struct objcache_malloc_args dsched_disk_ctx_malloc_args = {
73 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED };
74 struct objcache_malloc_args dsched_thread_io_malloc_args = {
75 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED };
76 struct objcache_malloc_args dsched_thread_ctx_malloc_args = {
77 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED };
79 static struct objcache *dsched_diskctx_cache;
80 static struct objcache *dsched_tdctx_cache;
81 static struct objcache *dsched_tdio_cache;
83 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list =
84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list);
86 struct lock dsched_tdctx_lock;
88 static struct dsched_policy_head dsched_policy_list =
89 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
91 static struct dsched_policy dsched_noop_policy = {
94 .prepare = noop_prepare,
95 .teardown = noop_teardown,
96 .cancel_all = noop_cancel,
97 .bio_queue = noop_queue
100 static struct dsched_policy *default_policy = &dsched_noop_policy;
103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
107 dsched_debug(int level, char *fmt, ...)
112 if (level <= dsched_debug_enable)
120 * Called on disk_create()
121 * tries to read which policy to use from loader.conf, if there's
122 * none specified, the default policy is used.
125 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
127 char tunable_key[SPECNAMELEN + 48];
128 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
129 struct dsched_policy *policy = NULL;
131 /* Also look for serno stuff? */
132 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
133 lockmgr(&dsched_lock, LK_EXCLUSIVE);
135 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d",
137 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
138 sizeof(sched_policy)) != 0) {
139 policy = dsched_find_policy(sched_policy);
142 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
144 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
145 sizeof(sched_policy)) != 0)) {
146 policy = dsched_find_policy(sched_policy);
149 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default");
150 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
151 sizeof(sched_policy)) != 0)) {
152 policy = dsched_find_policy(sched_policy);
157 dsched_debug(0, "No policy for %s%d specified, "
158 "or policy not found\n", head_name, unit);
160 dsched_set_policy(dp, default_policy);
162 dsched_set_policy(dp, policy);
165 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit);
166 dsched_sysctl_add_disk(
167 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
170 lockmgr(&dsched_lock, LK_RELEASE);
174 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
175 * there's any policy associated with the serial number of the device.
178 dsched_disk_update_callback(struct disk *dp, struct disk_info *info)
180 char tunable_key[SPECNAMELEN + 48];
181 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
182 struct dsched_policy *policy = NULL;
184 if (info->d_serialno == NULL)
187 lockmgr(&dsched_lock, LK_EXCLUSIVE);
189 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
192 if((TUNABLE_STR_FETCH(tunable_key, sched_policy,
193 sizeof(sched_policy)) != 0)) {
194 policy = dsched_find_policy(sched_policy);
198 dsched_switch(dp, policy);
201 dsched_sysctl_add_disk(
202 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
205 lockmgr(&dsched_lock, LK_RELEASE);
209 * Called on disk_destroy()
210 * shuts down the scheduler core and cancels all remaining bios
213 dsched_disk_destroy_callback(struct disk *dp)
215 struct dsched_policy *old_policy;
216 struct dsched_disk_ctx *diskctx;
218 lockmgr(&dsched_lock, LK_EXCLUSIVE);
220 diskctx = dsched_get_disk_priv(dp);
222 old_policy = dp->d_sched_policy;
223 dp->d_sched_policy = &dsched_noop_policy;
224 old_policy->cancel_all(dsched_get_disk_priv(dp));
225 old_policy->teardown(dsched_get_disk_priv(dp));
227 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED)
228 sysctl_ctx_free(&diskctx->sysctl_ctx);
231 atomic_subtract_int(&old_policy->ref_count, 1);
232 KKASSERT(old_policy->ref_count >= 0);
234 lockmgr(&dsched_lock, LK_RELEASE);
239 dsched_queue(struct disk *dp, struct bio *bio)
241 struct dsched_thread_ctx *tdctx;
242 struct dsched_thread_io *tdio;
243 struct dsched_disk_ctx *diskctx;
245 int found = 0, error = 0;
247 tdctx = dsched_get_buf_priv(bio->bio_buf);
249 /* We don't handle this case, let dsched dispatch */
250 atomic_add_int(&dsched_stats.no_tdctx, 1);
251 dsched_strategy_raw(dp, bio);
255 DSCHED_THREAD_CTX_LOCK(tdctx);
257 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list));
258 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) {
259 if (tdio->dp == dp) {
260 dsched_thread_io_ref(tdio);
266 DSCHED_THREAD_CTX_UNLOCK(tdctx);
267 dsched_clr_buf_priv(bio->bio_buf);
268 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */
270 KKASSERT(found == 1);
271 diskctx = dsched_get_disk_priv(dp);
272 dsched_disk_ctx_ref(diskctx);
273 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio);
276 dsched_strategy_raw(dp, bio);
278 dsched_disk_ctx_unref(diskctx);
279 dsched_thread_io_unref(tdio);
284 * Called from each module_init or module_attach of each policy
285 * registers the policy in the local policy list.
288 dsched_register(struct dsched_policy *d_policy)
290 struct dsched_policy *policy;
293 lockmgr(&dsched_lock, LK_EXCLUSIVE);
295 policy = dsched_find_policy(d_policy->name);
298 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
299 atomic_add_int(&d_policy->ref_count, 1);
301 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
306 lockmgr(&dsched_lock, LK_RELEASE);
311 * Called from each module_detach of each policy
312 * unregisters the policy
315 dsched_unregister(struct dsched_policy *d_policy)
317 struct dsched_policy *policy;
319 lockmgr(&dsched_lock, LK_EXCLUSIVE);
320 policy = dsched_find_policy(d_policy->name);
323 if (policy->ref_count > 1) {
324 lockmgr(&dsched_lock, LK_RELEASE);
327 TAILQ_REMOVE(&dsched_policy_list, policy, link);
328 atomic_subtract_int(&policy->ref_count, 1);
329 KKASSERT(policy->ref_count == 0);
331 lockmgr(&dsched_lock, LK_RELEASE);
337 * switches the policy by first removing the old one and then
338 * enabling the new one.
341 dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
343 struct dsched_policy *old_policy;
345 /* If we are asked to set the same policy, do nothing */
346 if (dp->d_sched_policy == new_policy)
349 /* lock everything down, diskwise */
350 lockmgr(&dsched_lock, LK_EXCLUSIVE);
351 old_policy = dp->d_sched_policy;
353 atomic_subtract_int(&old_policy->ref_count, 1);
354 KKASSERT(old_policy->ref_count >= 0);
356 dp->d_sched_policy = &dsched_noop_policy;
357 old_policy->teardown(dsched_get_disk_priv(dp));
360 /* Bring everything back to life */
361 dsched_set_policy(dp, new_policy);
362 lockmgr(&dsched_lock, LK_RELEASE);
368 * Loads a given policy and attaches it to the specified disk.
369 * Also initializes the core for the policy
372 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
376 /* Check if it is locked already. if not, we acquire the devfs lock */
377 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
378 lockmgr(&dsched_lock, LK_EXCLUSIVE);
382 policy_new(dp, new_policy);
383 new_policy->prepare(dsched_get_disk_priv(dp));
384 dp->d_sched_policy = new_policy;
385 atomic_add_int(&new_policy->ref_count, 1);
386 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
389 /* If we acquired the lock, we also get rid of it */
391 lockmgr(&dsched_lock, LK_RELEASE);
394 struct dsched_policy*
395 dsched_find_policy(char *search)
397 struct dsched_policy *policy;
398 struct dsched_policy *policy_found = NULL;
401 /* Check if it is locked already. if not, we acquire the devfs lock */
402 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
403 lockmgr(&dsched_lock, LK_EXCLUSIVE);
407 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
408 if (!strcmp(policy->name, search)) {
409 policy_found = policy;
414 /* If we acquired the lock, we also get rid of it */
416 lockmgr(&dsched_lock, LK_RELEASE);
422 dsched_find_disk(char *search)
424 struct disk *dp_found = NULL;
425 struct disk *dp = NULL;
427 while((dp = disk_enumerate(dp))) {
428 if (!strcmp(dp->d_cdev->si_name, search)) {
438 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy)
440 while ((dp = disk_enumerate(dp))) {
441 if (dp->d_sched_policy == policy)
448 struct dsched_policy *
449 dsched_policy_enumerate(struct dsched_policy *pol)
452 return (TAILQ_FIRST(&dsched_policy_list));
454 return (TAILQ_NEXT(pol, link));
458 dsched_cancel_bio(struct bio *bp)
460 bp->bio_buf->b_error = ENXIO;
461 bp->bio_buf->b_flags |= B_ERROR;
462 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
468 dsched_strategy_raw(struct disk *dp, struct bio *bp)
471 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
474 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
475 if(bp->bio_track != NULL) {
476 dsched_debug(LOG_INFO,
477 "dsched_strategy_raw sees non-NULL bio_track!! "
479 bp->bio_track = NULL;
481 dev_dstrategy(dp->d_rawdev, bp);
485 dsched_strategy_sync(struct disk *dp, struct bio *bio)
487 struct buf *bp, *nbp;
495 nbp->b_cmd = bp->b_cmd;
496 nbp->b_bufsize = bp->b_bufsize;
497 nbp->b_runningbufspace = bp->b_runningbufspace;
498 nbp->b_bcount = bp->b_bcount;
499 nbp->b_resid = bp->b_resid;
500 nbp->b_data = bp->b_data;
503 * Buffers undergoing device I/O do not need a kvabase/size.
505 nbp->b_kvabase = bp->b_kvabase;
506 nbp->b_kvasize = bp->b_kvasize;
508 nbp->b_dirtyend = bp->b_dirtyend;
510 nbio->bio_done = biodone_sync;
511 nbio->bio_flags |= BIO_SYNC;
512 nbio->bio_track = NULL;
514 nbio->bio_caller_info1.ptr = dp;
515 nbio->bio_offset = bio->bio_offset;
517 dev_dstrategy(dp->d_rawdev, nbio);
518 biowait(nbio, "dschedsync");
519 bp->b_resid = nbp->b_resid;
520 bp->b_error = nbp->b_error;
523 nbp->b_kvabase = NULL;
530 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
534 nbio = push_bio(bio);
535 nbio->bio_done = done;
536 nbio->bio_offset = bio->bio_offset;
538 dsched_set_bio_dp(nbio, dp);
539 dsched_set_bio_priv(nbio, priv);
541 getmicrotime(&nbio->bio_caller_info3.tv);
542 dev_dstrategy(dp->d_rawdev, nbio);
546 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx)
550 refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
552 KKASSERT(refcount >= 0);
556 dsched_thread_io_ref(struct dsched_thread_io *tdio)
560 refcount = atomic_fetchadd_int(&tdio->refcount, 1);
562 KKASSERT(refcount >= 0);
566 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx)
570 refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
572 KKASSERT(refcount >= 0);
576 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx)
578 struct dsched_thread_io *tdio, *tdio2;
581 refcount = atomic_fetchadd_int(&diskctx->refcount, -1);
584 KKASSERT(refcount >= 0 || refcount <= -0x400);
587 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */
589 kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
592 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
593 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) {
594 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
595 tdio->flags &= ~DSCHED_LINKED_DISK_CTX;
596 dsched_thread_io_unref(tdio);
598 lockmgr(&diskctx->lock, LK_RELEASE);
599 if (diskctx->dp->d_sched_policy->destroy_diskctx)
600 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx);
601 objcache_put(dsched_diskctx_cache, diskctx);
602 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1);
607 dsched_thread_io_unref(struct dsched_thread_io *tdio)
609 struct dsched_thread_ctx *tdctx;
610 struct dsched_disk_ctx *diskctx;
613 refcount = atomic_fetchadd_int(&tdio->refcount, -1);
615 KKASSERT(refcount >= 0 || refcount <= -0x400);
618 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */
620 kprintf("tdio (%p) destruction started, trace:\n", tdio);
623 diskctx = tdio->diskctx;
624 KKASSERT(diskctx != NULL);
625 KKASSERT(tdio->qlength == 0);
627 if (tdio->flags & DSCHED_LINKED_DISK_CTX) {
628 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
630 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
631 tdio->flags &= ~DSCHED_LINKED_DISK_CTX;
633 lockmgr(&diskctx->lock, LK_RELEASE);
636 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) {
638 KKASSERT(tdctx != NULL);
640 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
642 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
643 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX;
645 lockmgr(&tdctx->lock, LK_RELEASE);
647 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio)
648 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);
649 objcache_put(dsched_tdio_cache, tdio);
650 atomic_subtract_int(&dsched_stats.tdio_allocations, 1);
652 dsched_disk_ctx_unref(diskctx);
658 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx)
660 struct dsched_thread_io *tdio, *tdio2;
663 refcount = atomic_fetchadd_int(&tdctx->refcount, -1);
665 KKASSERT(refcount >= 0 || refcount <= -0x400);
668 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */
670 kprintf("tdctx (%p) destruction started, trace:\n", tdctx);
673 DSCHED_GLOBAL_THREAD_CTX_LOCK();
675 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) {
676 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
677 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX;
678 dsched_thread_io_unref(tdio);
680 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link);
682 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
684 objcache_put(dsched_tdctx_cache, tdctx);
685 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1);
690 struct dsched_thread_io *
691 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx,
692 struct dsched_policy *pol)
694 struct dsched_thread_io *tdio;
696 dsched_disk_ctx_ref(dsched_get_disk_priv(dp));
698 tdio = objcache_get(dsched_tdio_cache, M_WAITOK);
699 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ);
701 /* XXX: maybe we do need another ref for the disk list for tdio */
702 dsched_thread_io_ref(tdio);
704 DSCHED_THREAD_IO_LOCKINIT(tdio);
707 tdio->diskctx = dsched_get_disk_priv(dp);
708 TAILQ_INIT(&tdio->queue);
713 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink);
714 tdio->flags |= DSCHED_LINKED_DISK_CTX;
720 /* Put the tdio in the tdctx list */
721 DSCHED_THREAD_CTX_LOCK(tdctx);
722 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link);
723 DSCHED_THREAD_CTX_UNLOCK(tdctx);
724 tdio->flags |= DSCHED_LINKED_THREAD_CTX;
727 atomic_add_int(&dsched_stats.tdio_allocations, 1);
732 struct dsched_disk_ctx *
733 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol)
735 struct dsched_disk_ctx *diskctx;
737 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK);
738 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ);
739 dsched_disk_ctx_ref(diskctx);
741 DSCHED_DISK_CTX_LOCKINIT(diskctx);
742 TAILQ_INIT(&diskctx->tdio_list);
744 atomic_add_int(&dsched_stats.diskctx_allocations, 1);
745 if (pol->new_diskctx)
746 pol->new_diskctx(diskctx);
751 struct dsched_thread_ctx *
752 dsched_thread_ctx_alloc(struct proc *p)
754 struct dsched_thread_ctx *tdctx;
755 struct dsched_thread_io *tdio;
756 struct disk *dp = NULL;
758 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK);
759 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ);
760 dsched_thread_ctx_ref(tdctx);
762 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx);
764 DSCHED_THREAD_CTX_LOCKINIT(tdctx);
765 TAILQ_INIT(&tdctx->tdio_list);
769 while ((dp = disk_enumerate(dp))) {
770 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy);
773 DSCHED_GLOBAL_THREAD_CTX_LOCK();
774 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link);
775 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
777 atomic_add_int(&dsched_stats.tdctx_allocations, 1);
778 /* XXX: no callback here */
783 policy_new(struct disk *dp, struct dsched_policy *pol) {
784 struct dsched_thread_ctx *tdctx;
785 struct dsched_disk_ctx *diskctx;
786 struct dsched_thread_io *tdio;
788 diskctx = dsched_disk_ctx_alloc(dp, pol);
789 dsched_disk_ctx_ref(diskctx);
790 dsched_set_disk_priv(dp, diskctx);
792 DSCHED_GLOBAL_THREAD_CTX_LOCK();
793 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) {
794 tdio = dsched_thread_io_alloc(dp, tdctx, pol);
796 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
801 policy_destroy(struct disk *dp) {
802 struct dsched_disk_ctx *diskctx;
804 diskctx = dsched_get_disk_priv(dp);
805 KKASSERT(diskctx != NULL);
807 dsched_disk_ctx_unref(diskctx); /* from prepare */
808 dsched_disk_ctx_unref(diskctx); /* from alloc */
810 dsched_set_disk_priv(dp, NULL);
814 dsched_new_buf(struct buf *bp)
816 struct dsched_thread_ctx *tdctx = NULL;
818 if (dsched_inited == 0)
821 if (curproc != NULL) {
822 tdctx = dsched_get_proc_priv(curproc);
824 /* This is a kernel thread, so no proc info is available */
825 tdctx = dsched_get_thread_priv(curthread);
830 * XXX: hack. we don't want this assert because we aren't catching all
831 * threads. mi_startup() is still getting away without an tdctx.
834 /* by now we should have an tdctx. if not, something bad is going on */
835 KKASSERT(tdctx != NULL);
839 dsched_thread_ctx_ref(tdctx);
841 dsched_set_buf_priv(bp, tdctx);
845 dsched_exit_buf(struct buf *bp)
847 struct dsched_thread_ctx *tdctx;
849 tdctx = dsched_get_buf_priv(bp);
851 dsched_clr_buf_priv(bp);
852 dsched_thread_ctx_unref(tdctx);
857 dsched_new_proc(struct proc *p)
859 struct dsched_thread_ctx *tdctx;
861 if (dsched_inited == 0)
866 tdctx = dsched_thread_ctx_alloc(p);
868 dsched_thread_ctx_ref(tdctx);
870 dsched_set_proc_priv(p, tdctx);
871 atomic_add_int(&dsched_stats.nprocs, 1);
876 dsched_new_thread(struct thread *td)
878 struct dsched_thread_ctx *tdctx;
880 if (dsched_inited == 0)
883 KKASSERT(td != NULL);
885 tdctx = dsched_thread_ctx_alloc(NULL);
887 dsched_thread_ctx_ref(tdctx);
889 dsched_set_thread_priv(td, tdctx);
890 atomic_add_int(&dsched_stats.nthreads, 1);
894 dsched_exit_proc(struct proc *p)
896 struct dsched_thread_ctx *tdctx;
898 if (dsched_inited == 0)
903 tdctx = dsched_get_proc_priv(p);
904 KKASSERT(tdctx != NULL);
906 tdctx->dead = 0xDEAD;
907 dsched_set_proc_priv(p, 0);
909 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
910 dsched_thread_ctx_unref(tdctx); /* one for ref */
911 atomic_subtract_int(&dsched_stats.nprocs, 1);
916 dsched_exit_thread(struct thread *td)
918 struct dsched_thread_ctx *tdctx;
920 if (dsched_inited == 0)
923 KKASSERT(td != NULL);
925 tdctx = dsched_get_thread_priv(td);
926 KKASSERT(tdctx != NULL);
928 tdctx->dead = 0xDEAD;
929 dsched_set_thread_priv(td, 0);
931 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
932 dsched_thread_ctx_unref(tdctx); /* one for ref */
933 atomic_subtract_int(&dsched_stats.nthreads, 1);
936 struct dsched_thread_io *
937 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx,
938 struct dsched_policy *pol) {
939 struct dsched_thread_ctx *tdctx;
940 struct dsched_thread_io *tdio;
942 tdctx = dsched_get_thread_priv(curthread);
943 KKASSERT(tdctx != NULL);
945 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol);
949 /* DEFAULT NOOP POLICY */
952 noop_prepare(struct dsched_disk_ctx *diskctx)
958 noop_teardown(struct dsched_disk_ctx *diskctx)
964 noop_cancel(struct dsched_disk_ctx *diskctx)
970 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
973 dsched_strategy_raw(diskctx->dp, bio);
975 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL);
986 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0,
988 objcache_malloc_alloc,
989 objcache_malloc_free,
990 &dsched_thread_io_malloc_args );
992 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0,
994 objcache_malloc_alloc,
995 objcache_malloc_free,
996 &dsched_thread_ctx_malloc_args );
998 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0,
1000 objcache_malloc_alloc,
1001 objcache_malloc_free,
1002 &dsched_disk_ctx_malloc_args );
1004 bzero(&dsched_stats, sizeof(struct dsched_stats));
1006 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE);
1007 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
1009 dsched_register(&dsched_noop_policy);
1019 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL);
1020 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL);
1026 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS)
1028 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req));
1032 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS)
1034 struct dsched_policy *pol = NULL;
1035 int error, first = 1;
1037 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1039 while ((pol = dsched_policy_enumerate(pol))) {
1041 error = SYSCTL_OUT(req, " ", 1);
1047 error = SYSCTL_OUT(req, pol->name, strlen(pol->name));
1053 lockmgr(&dsched_lock, LK_RELEASE);
1055 error = SYSCTL_OUT(req, "", 1);
1061 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS)
1063 char buf[DSCHED_POLICY_NAME_LENGTH];
1064 struct dsched_disk_ctx *diskctx = arg1;
1065 struct dsched_policy *pol = NULL;
1068 if (diskctx == NULL) {
1072 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1074 pol = diskctx->dp->d_sched_policy;
1075 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1077 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1078 if (error || req->newptr == NULL) {
1079 lockmgr(&dsched_lock, LK_RELEASE);
1083 pol = dsched_find_policy(buf);
1085 lockmgr(&dsched_lock, LK_RELEASE);
1089 dsched_switch(diskctx->dp, pol);
1091 lockmgr(&dsched_lock, LK_RELEASE);
1097 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS)
1099 char buf[DSCHED_POLICY_NAME_LENGTH];
1100 struct dsched_policy *pol = NULL;
1103 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1105 pol = default_policy;
1106 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1108 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1109 if (error || req->newptr == NULL) {
1110 lockmgr(&dsched_lock, LK_RELEASE);
1114 pol = dsched_find_policy(buf);
1116 lockmgr(&dsched_lock, LK_RELEASE);
1121 default_policy = pol;
1123 lockmgr(&dsched_lock, LK_RELEASE);
1128 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL,
1129 "Disk Scheduler Framework (dsched) magic");
1130 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL,
1131 "List of disks and their policies");
1132 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable,
1133 0, "Enable dsched debugging");
1134 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
1135 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats",
1136 "dsched statistics");
1137 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD,
1138 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies");
1139 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW,
1140 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy");
1143 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name)
1145 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) {
1146 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED;
1147 sysctl_ctx_init(&diskctx->sysctl_ctx);
1150 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy),
1151 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW,
1152 diskctx, 0, sysctl_dsched_policy, "A", "policy");