2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/diskslice.h>
43 #include <sys/malloc.h>
44 #include <machine/md_var.h>
45 #include <sys/ctype.h>
46 #include <sys/syslog.h>
47 #include <sys/device.h>
48 #include <sys/msgport.h>
49 #include <sys/msgport2.h>
51 #include <sys/dsched.h>
52 #include <sys/fcntl.h>
53 #include <machine/varargs.h>
55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs");
57 static dsched_prepare_t default_prepare;
58 static dsched_teardown_t default_teardown;
59 static dsched_cancel_t default_cancel;
60 static dsched_queue_t default_queue;
62 static d_open_t dsched_dev_open;
63 static d_close_t dsched_dev_close;
64 static d_ioctl_t dsched_dev_ioctl;
66 static int dsched_dev_list_disks(struct dsched_ioctl *data);
67 static int dsched_dev_list_disk(struct dsched_ioctl *data);
68 static int dsched_dev_list_policies(struct dsched_ioctl *data);
69 static int dsched_dev_handle_switch(char *disk, char *policy);
71 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name);
73 static int dsched_inited = 0;
75 struct lock dsched_lock;
76 static int dsched_debug_enable = 0;
77 static cdev_t dsched_dev;
79 struct dsched_stats dsched_stats;
81 struct objcache_malloc_args dsched_disk_ctx_malloc_args = {
82 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED };
83 struct objcache_malloc_args dsched_thread_io_malloc_args = {
84 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED };
85 struct objcache_malloc_args dsched_thread_ctx_malloc_args = {
86 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED };
88 static struct objcache *dsched_diskctx_cache;
89 static struct objcache *dsched_tdctx_cache;
90 static struct objcache *dsched_tdio_cache;
92 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list =
93 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list);
95 struct lock dsched_tdctx_lock;
97 static struct dsched_policy_head dsched_policy_list =
98 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
100 static struct dsched_policy dsched_default_policy = {
103 .prepare = default_prepare,
104 .teardown = default_teardown,
105 .cancel_all = default_cancel,
106 .bio_queue = default_queue
109 static struct dev_ops dsched_dev_ops = {
111 .d_open = dsched_dev_open,
112 .d_close = dsched_dev_close,
113 .d_ioctl = dsched_dev_ioctl
117 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
121 dsched_debug(int level, char *fmt, ...)
126 if (level <= dsched_debug_enable)
134 * Called on disk_create()
135 * tries to read which policy to use from loader.conf, if there's
136 * none specified, the default policy is used.
139 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
141 char tunable_key[SPECNAMELEN + 48];
142 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
143 struct dsched_policy *policy = NULL;
145 /* Also look for serno stuff? */
146 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
147 lockmgr(&dsched_lock, LK_EXCLUSIVE);
149 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d",
151 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
152 sizeof(sched_policy)) != 0) {
153 policy = dsched_find_policy(sched_policy);
156 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
158 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
159 sizeof(sched_policy)) != 0)) {
160 policy = dsched_find_policy(sched_policy);
163 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default");
164 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
165 sizeof(sched_policy)) != 0)) {
166 policy = dsched_find_policy(sched_policy);
170 dsched_debug(0, "No policy for %s%d specified, "
171 "or policy not found\n", head_name, unit);
172 dsched_set_policy(dp, &dsched_default_policy);
174 dsched_set_policy(dp, policy);
177 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit);
178 dsched_sysctl_add_disk(
179 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
182 lockmgr(&dsched_lock, LK_RELEASE);
186 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
187 * there's any policy associated with the serial number of the device.
190 dsched_disk_update_callback(struct disk *dp, struct disk_info *info)
192 char tunable_key[SPECNAMELEN + 48];
193 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
194 struct dsched_policy *policy = NULL;
196 if (info->d_serialno == NULL)
199 lockmgr(&dsched_lock, LK_EXCLUSIVE);
201 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
204 if((TUNABLE_STR_FETCH(tunable_key, sched_policy,
205 sizeof(sched_policy)) != 0)) {
206 policy = dsched_find_policy(sched_policy);
210 dsched_switch(dp, policy);
213 dsched_sysctl_add_disk(
214 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
217 lockmgr(&dsched_lock, LK_RELEASE);
221 * Called on disk_destroy()
222 * shuts down the scheduler core and cancels all remaining bios
225 dsched_disk_destroy_callback(struct disk *dp)
227 struct dsched_policy *old_policy;
228 struct dsched_disk_ctx *diskctx;
230 lockmgr(&dsched_lock, LK_EXCLUSIVE);
232 diskctx = dsched_get_disk_priv(dp);
234 old_policy = dp->d_sched_policy;
235 dp->d_sched_policy = &dsched_default_policy;
236 old_policy->cancel_all(dsched_get_disk_priv(dp));
237 old_policy->teardown(dsched_get_disk_priv(dp));
239 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED)
240 sysctl_ctx_free(&diskctx->sysctl_ctx);
243 atomic_subtract_int(&old_policy->ref_count, 1);
244 KKASSERT(old_policy->ref_count >= 0);
246 lockmgr(&dsched_lock, LK_RELEASE);
251 dsched_queue(struct disk *dp, struct bio *bio)
253 struct dsched_thread_ctx *tdctx;
254 struct dsched_thread_io *tdio;
255 struct dsched_disk_ctx *diskctx;
257 int found = 0, error = 0;
259 tdctx = dsched_get_buf_priv(bio->bio_buf);
261 /* We don't handle this case, let dsched dispatch */
262 atomic_add_int(&dsched_stats.no_tdctx, 1);
263 dsched_strategy_raw(dp, bio);
267 DSCHED_THREAD_CTX_LOCK(tdctx);
269 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list));
270 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) {
271 if (tdio->dp == dp) {
272 dsched_thread_io_ref(tdio);
278 DSCHED_THREAD_CTX_UNLOCK(tdctx);
279 dsched_clr_buf_priv(bio->bio_buf);
280 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */
282 KKASSERT(found == 1);
283 diskctx = dsched_get_disk_priv(dp);
284 dsched_disk_ctx_ref(diskctx);
285 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio);
288 dsched_strategy_raw(dp, bio);
290 dsched_disk_ctx_unref(diskctx);
291 dsched_thread_io_unref(tdio);
296 * Called from each module_init or module_attach of each policy
297 * registers the policy in the local policy list.
300 dsched_register(struct dsched_policy *d_policy)
302 struct dsched_policy *policy;
305 lockmgr(&dsched_lock, LK_EXCLUSIVE);
307 policy = dsched_find_policy(d_policy->name);
310 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
311 atomic_add_int(&d_policy->ref_count, 1);
313 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
318 lockmgr(&dsched_lock, LK_RELEASE);
323 * Called from each module_detach of each policy
324 * unregisters the policy
327 dsched_unregister(struct dsched_policy *d_policy)
329 struct dsched_policy *policy;
331 lockmgr(&dsched_lock, LK_EXCLUSIVE);
332 policy = dsched_find_policy(d_policy->name);
335 if (policy->ref_count > 1) {
336 lockmgr(&dsched_lock, LK_RELEASE);
339 TAILQ_REMOVE(&dsched_policy_list, policy, link);
340 atomic_subtract_int(&policy->ref_count, 1);
341 KKASSERT(policy->ref_count == 0);
343 lockmgr(&dsched_lock, LK_RELEASE);
349 * switches the policy by first removing the old one and then
350 * enabling the new one.
353 dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
355 struct dsched_policy *old_policy;
357 /* If we are asked to set the same policy, do nothing */
358 if (dp->d_sched_policy == new_policy)
361 /* lock everything down, diskwise */
362 lockmgr(&dsched_lock, LK_EXCLUSIVE);
363 old_policy = dp->d_sched_policy;
365 atomic_subtract_int(&old_policy->ref_count, 1);
366 KKASSERT(old_policy->ref_count >= 0);
368 dp->d_sched_policy = &dsched_default_policy;
369 old_policy->teardown(dsched_get_disk_priv(dp));
372 /* Bring everything back to life */
373 dsched_set_policy(dp, new_policy);
374 lockmgr(&dsched_lock, LK_RELEASE);
380 * Loads a given policy and attaches it to the specified disk.
381 * Also initializes the core for the policy
384 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
388 /* Check if it is locked already. if not, we acquire the devfs lock */
389 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
390 lockmgr(&dsched_lock, LK_EXCLUSIVE);
394 policy_new(dp, new_policy);
395 new_policy->prepare(dsched_get_disk_priv(dp));
396 dp->d_sched_policy = new_policy;
397 atomic_add_int(&new_policy->ref_count, 1);
398 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
401 /* If we acquired the lock, we also get rid of it */
403 lockmgr(&dsched_lock, LK_RELEASE);
406 struct dsched_policy*
407 dsched_find_policy(char *search)
409 struct dsched_policy *policy;
410 struct dsched_policy *policy_found = NULL;
413 /* Check if it is locked already. if not, we acquire the devfs lock */
414 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
415 lockmgr(&dsched_lock, LK_EXCLUSIVE);
419 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
420 if (!strcmp(policy->name, search)) {
421 policy_found = policy;
426 /* If we acquired the lock, we also get rid of it */
428 lockmgr(&dsched_lock, LK_RELEASE);
434 dsched_find_disk(char *search)
436 struct disk *dp_found = NULL;
437 struct disk *dp = NULL;
439 while((dp = disk_enumerate(dp))) {
440 if (!strcmp(dp->d_cdev->si_name, search)) {
450 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy)
452 while ((dp = disk_enumerate(dp))) {
453 if (dp->d_sched_policy == policy)
460 struct dsched_policy *
461 dsched_policy_enumerate(struct dsched_policy *pol)
464 return (TAILQ_FIRST(&dsched_policy_list));
466 return (TAILQ_NEXT(pol, link));
470 dsched_cancel_bio(struct bio *bp)
472 bp->bio_buf->b_error = ENXIO;
473 bp->bio_buf->b_flags |= B_ERROR;
474 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
480 dsched_strategy_raw(struct disk *dp, struct bio *bp)
483 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
486 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
487 if(bp->bio_track != NULL) {
488 dsched_debug(LOG_INFO,
489 "dsched_strategy_raw sees non-NULL bio_track!! "
491 bp->bio_track = NULL;
493 dev_dstrategy(dp->d_rawdev, bp);
497 dsched_strategy_sync(struct disk *dp, struct bio *bio)
499 struct buf *bp, *nbp;
507 nbp->b_cmd = bp->b_cmd;
508 nbp->b_bufsize = bp->b_bufsize;
509 nbp->b_runningbufspace = bp->b_runningbufspace;
510 nbp->b_bcount = bp->b_bcount;
511 nbp->b_resid = bp->b_resid;
512 nbp->b_data = bp->b_data;
513 nbp->b_kvabase = bp->b_kvabase;
514 nbp->b_kvasize = bp->b_kvasize;
515 nbp->b_dirtyend = bp->b_dirtyend;
517 nbio->bio_done = biodone_sync;
518 nbio->bio_flags |= BIO_SYNC;
519 nbio->bio_track = NULL;
521 nbio->bio_caller_info1.ptr = dp;
522 nbio->bio_offset = bio->bio_offset;
524 dev_dstrategy(dp->d_rawdev, nbio);
525 biowait(nbio, "dschedsync");
526 bp->b_resid = nbp->b_resid;
527 bp->b_error = nbp->b_error;
533 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
537 nbio = push_bio(bio);
538 nbio->bio_done = done;
539 nbio->bio_offset = bio->bio_offset;
541 dsched_set_bio_dp(nbio, dp);
542 dsched_set_bio_priv(nbio, priv);
544 getmicrotime(&nbio->bio_caller_info3.tv);
545 dev_dstrategy(dp->d_rawdev, nbio);
549 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx)
553 refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
555 KKASSERT(refcount >= 0);
559 dsched_thread_io_ref(struct dsched_thread_io *tdio)
563 refcount = atomic_fetchadd_int(&tdio->refcount, 1);
565 KKASSERT(refcount >= 0);
569 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx)
573 refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
575 KKASSERT(refcount >= 0);
579 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx)
581 struct dsched_thread_io *tdio, *tdio2;
584 refcount = atomic_fetchadd_int(&diskctx->refcount, -1);
587 KKASSERT(refcount >= 0 || refcount <= -0x400);
590 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */
592 kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
595 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
596 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) {
597 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
598 tdio->flags &= ~DSCHED_LINKED_DISK_CTX;
599 dsched_thread_io_unref(tdio);
601 lockmgr(&diskctx->lock, LK_RELEASE);
602 if (diskctx->dp->d_sched_policy->destroy_diskctx)
603 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx);
604 objcache_put(dsched_diskctx_cache, diskctx);
605 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1);
610 dsched_thread_io_unref(struct dsched_thread_io *tdio)
612 struct dsched_thread_ctx *tdctx;
613 struct dsched_disk_ctx *diskctx;
616 refcount = atomic_fetchadd_int(&tdio->refcount, -1);
618 KKASSERT(refcount >= 0 || refcount <= -0x400);
621 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */
623 kprintf("tdio (%p) destruction started, trace:\n", tdio);
626 diskctx = tdio->diskctx;
627 KKASSERT(diskctx != NULL);
628 KKASSERT(tdio->qlength == 0);
630 if (tdio->flags & DSCHED_LINKED_DISK_CTX) {
631 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
633 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
634 tdio->flags &= ~DSCHED_LINKED_DISK_CTX;
636 lockmgr(&diskctx->lock, LK_RELEASE);
639 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) {
641 KKASSERT(tdctx != NULL);
643 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
645 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
646 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX;
648 lockmgr(&tdctx->lock, LK_RELEASE);
650 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio)
651 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);
652 objcache_put(dsched_tdio_cache, tdio);
653 atomic_subtract_int(&dsched_stats.tdio_allocations, 1);
655 dsched_disk_ctx_unref(diskctx);
661 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx)
663 struct dsched_thread_io *tdio, *tdio2;
666 refcount = atomic_fetchadd_int(&tdctx->refcount, -1);
668 KKASSERT(refcount >= 0 || refcount <= -0x400);
671 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */
673 kprintf("tdctx (%p) destruction started, trace:\n", tdctx);
676 DSCHED_GLOBAL_THREAD_CTX_LOCK();
678 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) {
679 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
680 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX;
681 dsched_thread_io_unref(tdio);
683 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link);
685 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
687 objcache_put(dsched_tdctx_cache, tdctx);
688 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1);
693 struct dsched_thread_io *
694 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx,
695 struct dsched_policy *pol)
697 struct dsched_thread_io *tdio;
699 dsched_disk_ctx_ref(dsched_get_disk_priv(dp));
701 tdio = objcache_get(dsched_tdio_cache, M_WAITOK);
702 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ);
704 /* XXX: maybe we do need another ref for the disk list for tdio */
705 dsched_thread_io_ref(tdio);
707 DSCHED_THREAD_IO_LOCKINIT(tdio);
710 tdio->diskctx = dsched_get_disk_priv(dp);
711 TAILQ_INIT(&tdio->queue);
716 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink);
717 tdio->flags |= DSCHED_LINKED_DISK_CTX;
723 /* Put the tdio in the tdctx list */
724 DSCHED_THREAD_CTX_LOCK(tdctx);
725 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link);
726 DSCHED_THREAD_CTX_UNLOCK(tdctx);
727 tdio->flags |= DSCHED_LINKED_THREAD_CTX;
730 atomic_add_int(&dsched_stats.tdio_allocations, 1);
735 struct dsched_disk_ctx *
736 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol)
738 struct dsched_disk_ctx *diskctx;
740 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK);
741 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ);
742 dsched_disk_ctx_ref(diskctx);
744 DSCHED_DISK_CTX_LOCKINIT(diskctx);
745 TAILQ_INIT(&diskctx->tdio_list);
747 atomic_add_int(&dsched_stats.diskctx_allocations, 1);
748 if (pol->new_diskctx)
749 pol->new_diskctx(diskctx);
754 struct dsched_thread_ctx *
755 dsched_thread_ctx_alloc(struct proc *p)
757 struct dsched_thread_ctx *tdctx;
758 struct dsched_thread_io *tdio;
759 struct disk *dp = NULL;
761 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK);
762 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ);
763 dsched_thread_ctx_ref(tdctx);
765 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx);
767 DSCHED_THREAD_CTX_LOCKINIT(tdctx);
768 TAILQ_INIT(&tdctx->tdio_list);
772 while ((dp = disk_enumerate(dp))) {
773 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy);
776 DSCHED_GLOBAL_THREAD_CTX_LOCK();
777 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link);
778 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
780 atomic_add_int(&dsched_stats.tdctx_allocations, 1);
781 /* XXX: no callback here */
786 policy_new(struct disk *dp, struct dsched_policy *pol) {
787 struct dsched_thread_ctx *tdctx;
788 struct dsched_disk_ctx *diskctx;
789 struct dsched_thread_io *tdio;
791 diskctx = dsched_disk_ctx_alloc(dp, pol);
792 dsched_disk_ctx_ref(diskctx);
793 dsched_set_disk_priv(dp, diskctx);
795 DSCHED_GLOBAL_THREAD_CTX_LOCK();
796 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) {
797 tdio = dsched_thread_io_alloc(dp, tdctx, pol);
799 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
804 policy_destroy(struct disk *dp) {
805 struct dsched_disk_ctx *diskctx;
807 diskctx = dsched_get_disk_priv(dp);
808 KKASSERT(diskctx != NULL);
810 dsched_disk_ctx_unref(diskctx); /* from prepare */
811 dsched_disk_ctx_unref(diskctx); /* from alloc */
813 dsched_set_disk_priv(dp, NULL);
817 dsched_new_buf(struct buf *bp)
819 struct dsched_thread_ctx *tdctx = NULL;
821 if (dsched_inited == 0)
824 if (curproc != NULL) {
825 tdctx = dsched_get_proc_priv(curproc);
827 /* This is a kernel thread, so no proc info is available */
828 tdctx = dsched_get_thread_priv(curthread);
833 * XXX: hack. we don't want this assert because we aren't catching all
834 * threads. mi_startup() is still getting away without an tdctx.
837 /* by now we should have an tdctx. if not, something bad is going on */
838 KKASSERT(tdctx != NULL);
842 dsched_thread_ctx_ref(tdctx);
844 dsched_set_buf_priv(bp, tdctx);
848 dsched_exit_buf(struct buf *bp)
850 struct dsched_thread_ctx *tdctx;
852 tdctx = dsched_get_buf_priv(bp);
854 dsched_clr_buf_priv(bp);
855 dsched_thread_ctx_unref(tdctx);
860 dsched_new_proc(struct proc *p)
862 struct dsched_thread_ctx *tdctx;
864 if (dsched_inited == 0)
869 tdctx = dsched_thread_ctx_alloc(p);
871 dsched_thread_ctx_ref(tdctx);
873 dsched_set_proc_priv(p, tdctx);
874 atomic_add_int(&dsched_stats.nprocs, 1);
879 dsched_new_thread(struct thread *td)
881 struct dsched_thread_ctx *tdctx;
883 if (dsched_inited == 0)
886 KKASSERT(td != NULL);
888 tdctx = dsched_thread_ctx_alloc(NULL);
890 dsched_thread_ctx_ref(tdctx);
892 dsched_set_thread_priv(td, tdctx);
893 atomic_add_int(&dsched_stats.nthreads, 1);
897 dsched_exit_proc(struct proc *p)
899 struct dsched_thread_ctx *tdctx;
901 if (dsched_inited == 0)
906 tdctx = dsched_get_proc_priv(p);
907 KKASSERT(tdctx != NULL);
909 tdctx->dead = 0xDEAD;
910 dsched_set_proc_priv(p, 0);
912 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
913 dsched_thread_ctx_unref(tdctx); /* one for ref */
914 atomic_subtract_int(&dsched_stats.nprocs, 1);
919 dsched_exit_thread(struct thread *td)
921 struct dsched_thread_ctx *tdctx;
923 if (dsched_inited == 0)
926 KKASSERT(td != NULL);
928 tdctx = dsched_get_thread_priv(td);
929 KKASSERT(tdctx != NULL);
931 tdctx->dead = 0xDEAD;
932 dsched_set_thread_priv(td, 0);
934 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
935 dsched_thread_ctx_unref(tdctx); /* one for ref */
936 atomic_subtract_int(&dsched_stats.nthreads, 1);
939 /* DEFAULT NOOP POLICY */
942 default_prepare(struct dsched_disk_ctx *diskctx)
948 default_teardown(struct dsched_disk_ctx *diskctx)
954 default_cancel(struct dsched_disk_ctx *diskctx)
960 default_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
963 dsched_strategy_raw(diskctx->dp, bio);
965 dsched_strategy_async(diskctx->dp, bio, default_completed, NULL);
972 * dsched device stuff
976 dsched_dev_list_disks(struct dsched_ioctl *data)
978 struct disk *dp = NULL;
981 for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++);
986 strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name));
988 if (dp->d_sched_policy) {
989 strncpy(data->pol_name, dp->d_sched_policy->name,
990 sizeof(data->pol_name));
992 strncpy(data->pol_name, "N/A (error)", 12);
999 dsched_dev_list_disk(struct dsched_ioctl *data)
1001 struct disk *dp = NULL;
1004 while ((dp = disk_enumerate(dp))) {
1005 if (!strncmp(dp->d_cdev->si_name, data->dev_name,
1006 sizeof(data->dev_name))) {
1007 KKASSERT(dp->d_sched_policy != NULL);
1010 strncpy(data->pol_name, dp->d_sched_policy->name,
1011 sizeof(data->pol_name));
1022 dsched_dev_list_policies(struct dsched_ioctl *data)
1024 struct dsched_policy *pol = NULL;
1027 for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++);
1032 strncpy(data->pol_name, pol->name, sizeof(data->pol_name));
1037 dsched_dev_handle_switch(char *disk, char *policy)
1040 struct dsched_policy *pol;
1042 dp = dsched_find_disk(disk);
1043 pol = dsched_find_policy(policy);
1045 if ((dp == NULL) || (pol == NULL))
1048 return (dsched_switch(dp, pol));
1052 dsched_dev_open(struct dev_open_args *ap)
1055 * Only allow read-write access.
1057 if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0))
1061 * We don't allow nonblocking access.
1063 if ((ap->a_oflags & O_NONBLOCK) != 0) {
1064 kprintf("dsched_dev: can't do nonblocking access\n");
1072 dsched_dev_close(struct dev_close_args *ap)
1078 dsched_dev_ioctl(struct dev_ioctl_args *ap)
1081 struct dsched_ioctl *data;
1084 data = (struct dsched_ioctl *)ap->a_data;
1087 case DSCHED_SET_DEVICE_POLICY:
1088 if (dsched_dev_handle_switch(data->dev_name, data->pol_name))
1089 error = ENOENT; /* No such file or directory */
1092 case DSCHED_LIST_DISK:
1093 if (dsched_dev_list_disk(data) != 0) {
1094 error = EINVAL; /* Invalid argument */
1098 case DSCHED_LIST_DISKS:
1099 if (dsched_dev_list_disks(data) != 0) {
1100 error = EINVAL; /* Invalid argument */
1104 case DSCHED_LIST_POLICIES:
1105 if (dsched_dev_list_policies(data) != 0) {
1106 error = EINVAL; /* Invalid argument */
1112 error = ENOTTY; /* Inappropriate ioctl for device */
1132 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0,
1134 objcache_malloc_alloc,
1135 objcache_malloc_free,
1136 &dsched_thread_io_malloc_args );
1138 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0,
1140 objcache_malloc_alloc,
1141 objcache_malloc_free,
1142 &dsched_thread_ctx_malloc_args );
1144 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0,
1146 objcache_malloc_alloc,
1147 objcache_malloc_free,
1148 &dsched_disk_ctx_malloc_args );
1150 bzero(&dsched_stats, sizeof(struct dsched_stats));
1152 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE);
1153 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
1155 dsched_register(&dsched_default_policy);
1166 dsched_dev_init(void)
1168 dsched_dev = make_dev(&dsched_dev_ops,
1177 dsched_dev_uninit(void)
1179 destroy_dev(dsched_dev);
1182 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL);
1183 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL);
1184 SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL);
1185 SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL);
1191 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS)
1193 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req));
1197 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS)
1199 struct dsched_policy *pol = NULL;
1200 int error, first = 1;
1202 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1204 while ((pol = dsched_policy_enumerate(pol))) {
1206 error = SYSCTL_OUT(req, " ", 1);
1212 error = SYSCTL_OUT(req, pol->name, strlen(pol->name));
1218 lockmgr(&dsched_lock, LK_RELEASE);
1220 error = SYSCTL_OUT(req, "", 1);
1226 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS)
1228 char buf[DSCHED_POLICY_NAME_LENGTH];
1229 struct dsched_disk_ctx *diskctx = arg1;
1230 struct dsched_policy *pol = NULL;
1233 if (diskctx == NULL) {
1237 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1239 pol = diskctx->dp->d_sched_policy;
1240 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1242 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1243 if (error || req->newptr == NULL) {
1244 lockmgr(&dsched_lock, LK_RELEASE);
1248 pol = dsched_find_policy(buf);
1250 lockmgr(&dsched_lock, LK_RELEASE);
1254 dsched_switch(diskctx->dp, pol);
1256 lockmgr(&dsched_lock, LK_RELEASE);
1261 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL,
1262 "Disk Scheduler Framework (dsched) magic");
1263 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL,
1264 "List of disks and their policies");
1265 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable,
1266 0, "Enable dsched debugging");
1267 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
1268 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats",
1269 "dsched statistics");
1270 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD,
1271 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies");
1274 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name)
1276 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) {
1277 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED;
1278 sysctl_ctx_init(&diskctx->sysctl_ctx);
1281 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy),
1282 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW,
1283 diskctx, 0, sysctl_dsched_policy, "A", "policy");