2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/diskslice.h>
43 #include <sys/malloc.h>
44 #include <sys/sysctl.h>
45 #include <machine/md_var.h>
46 #include <sys/ctype.h>
47 #include <sys/syslog.h>
48 #include <sys/device.h>
49 #include <sys/msgport.h>
50 #include <sys/msgport2.h>
52 #include <sys/dsched.h>
53 #include <sys/fcntl.h>
54 #include <machine/varargs.h>
56 MALLOC_DEFINE(M_DSCHED, "dsched", "Disk Scheduler Framework allocations");
58 static dsched_prepare_t default_prepare;
59 static dsched_teardown_t default_teardown;
60 static dsched_flush_t default_flush;
61 static dsched_cancel_t default_cancel;
62 static dsched_queue_t default_queue;
64 static biodone_t default_completed;
67 dsched_new_buf_t *default_new_buf;
68 dsched_new_proc_t *default_new_proc;
69 dsched_new_thread_t *default_new_thread;
70 dsched_exit_proc_t *default_exit_proc;
71 dsched_exit_thread_t *default_exit_thread;
73 static d_open_t dsched_dev_open;
74 static d_close_t dsched_dev_close;
75 static d_ioctl_t dsched_dev_ioctl;
77 static int dsched_dev_list_disks(struct dsched_ioctl *data);
78 static int dsched_dev_list_disk(struct dsched_ioctl *data);
79 static int dsched_dev_list_policies(struct dsched_ioctl *data);
80 static int dsched_dev_handle_switch(char *disk, char *policy);
83 struct lock dsched_lock;
84 static int dsched_debug_enable = 0;
85 static int dsched_test1 = 0;
86 static cdev_t dsched_dev;
88 static struct dsched_policy_head dsched_policy_list =
89 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
91 static struct dsched_ops dsched_default_ops = {
95 .prepare = default_prepare,
96 .teardown = default_teardown,
97 .flush = default_flush,
98 .cancel_all = default_cancel,
99 .bio_queue = default_queue,
103 static struct dev_ops dsched_dev_ops = {
105 .d_open = dsched_dev_open,
106 .d_close = dsched_dev_close,
107 .d_ioctl = dsched_dev_ioctl
111 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
115 dsched_debug(int level, char *fmt, ...)
120 if (level <= dsched_debug_enable)
128 * Called on disk_create()
129 * tries to read which policy to use from loader.conf, if there's
130 * none specified, the default policy is used.
133 dsched_create(struct disk *dp, const char *head_name, int unit)
135 char tunable_key[SPECNAMELEN + 11];
136 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
137 struct dsched_policy *policy = NULL;
139 /* Also look for serno stuff? */
140 /* kprintf("dsched_create() for disk %s%d\n", head_name, unit); */
141 lockmgr(&dsched_lock, LK_EXCLUSIVE);
143 ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_%s%d",
145 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
146 sizeof(sched_policy)) != 0) {
147 policy = dsched_find_policy(sched_policy);
150 ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_%s*",
152 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
153 sizeof(sched_policy)) != 0)) {
154 policy = dsched_find_policy(sched_policy);
157 ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_*");
158 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
159 sizeof(sched_policy)) != 0)) {
160 policy = dsched_find_policy(sched_policy);
164 dsched_debug(0, "No policy for %s%d specified, "
165 "or policy not found\n", head_name, unit);
166 dsched_set_policy(dp, &dsched_default_ops);
168 dsched_set_policy(dp, policy->d_ops);
171 lockmgr(&dsched_lock, LK_RELEASE);
175 * Called on disk_destroy()
176 * shuts down the scheduler core and cancels all remaining bios
179 dsched_destroy(struct disk *dp)
181 struct dsched_ops *old_ops;
183 lockmgr(&dsched_lock, LK_EXCLUSIVE);
185 old_ops = dp->d_sched_ops;
186 dp->d_sched_ops = &dsched_default_ops;
187 old_ops->cancel_all(dp);
188 old_ops->teardown(dp);
189 atomic_subtract_int(&old_ops->head.ref_count, 1);
190 KKASSERT(old_ops->head.ref_count >= 0);
192 lockmgr(&dsched_lock, LK_RELEASE);
197 dsched_queue(struct disk *dp, struct bio *bio)
200 error = dp->d_sched_ops->bio_queue(dp, bio);
203 if (bio->bio_buf->b_cmd == BUF_CMD_FLUSH) {
204 dp->d_sched_ops->flush(dp, bio);
206 dsched_strategy_raw(dp, bio);
212 * Called from each module_init or module_attach of each policy
213 * registers the policy in the local policy list.
216 dsched_register(struct dsched_ops *d_ops)
218 struct dsched_policy *policy;
221 lockmgr(&dsched_lock, LK_EXCLUSIVE);
223 policy = dsched_find_policy(d_ops->head.name);
226 if ((d_ops->new_buf != NULL) || (d_ops->new_proc != NULL) ||
227 (d_ops->new_thread != NULL)) {
229 * Policy ops has hooks for proc/thread/buf creation,
230 * so check if there are already hooks for those present
231 * and if so, stop right now.
233 if ((default_new_buf != NULL) || (default_new_proc != NULL) ||
234 (default_new_thread != NULL) || (default_exit_proc != NULL) ||
235 (default_exit_thread != NULL)) {
236 dsched_debug(LOG_ERR, "A policy with "
237 "proc/thread/buf hooks is already in use!");
242 /* If everything is fine, just register the hooks */
243 default_new_buf = d_ops->new_buf;
244 default_new_proc = d_ops->new_proc;
245 default_new_thread = d_ops->new_thread;
246 default_exit_proc = d_ops->exit_proc;
247 default_exit_thread = d_ops->exit_thread;
250 policy = kmalloc(sizeof(struct dsched_policy), M_DSCHED, M_WAITOK);
251 policy->d_ops = d_ops;
252 TAILQ_INSERT_TAIL(&dsched_policy_list, policy, link);
253 atomic_add_int(&policy->d_ops->head.ref_count, 1);
255 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
261 lockmgr(&dsched_lock, LK_RELEASE);
266 * Called from each module_detach of each policy
267 * unregisters the policy
270 dsched_unregister(struct dsched_ops *d_ops)
272 struct dsched_policy *policy;
274 lockmgr(&dsched_lock, LK_EXCLUSIVE);
275 policy = dsched_find_policy(d_ops->head.name);
278 if (policy->d_ops->head.ref_count > 1)
280 TAILQ_REMOVE(&dsched_policy_list, policy, link);
281 atomic_subtract_int(&policy->d_ops->head.ref_count, 1);
282 KKASSERT(policy->d_ops->head.ref_count >= 0);
283 kfree(policy, M_DSCHED);
285 lockmgr(&dsched_lock, LK_RELEASE);
291 * switches the policy by first removing the old one and then
292 * enabling the new one.
295 dsched_switch(struct disk *dp, struct dsched_ops *new_ops)
297 struct dsched_ops *old_ops;
299 /* If we are asked to set the same policy, do nothing */
300 if (dp->d_sched_ops == new_ops)
303 /* lock everything down, diskwise */
304 lockmgr(&dsched_lock, LK_EXCLUSIVE);
305 old_ops = dp->d_sched_ops;
307 atomic_subtract_int(&dp->d_sched_ops->head.ref_count, 1);
308 KKASSERT(dp->d_sched_ops->head.ref_count >= 0);
310 dp->d_sched_ops = &dsched_default_ops;
311 old_ops->teardown(dp);
313 /* Bring everything back to life */
314 dsched_set_policy(dp, new_ops);
315 lockmgr(&dsched_lock, LK_RELEASE);
321 * Loads a given policy and attaches it to the specified disk.
322 * Also initializes the core for the policy
325 dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops)
329 /* Check if it is locked already. if not, we acquire the devfs lock */
330 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
331 lockmgr(&dsched_lock, LK_EXCLUSIVE);
335 new_ops->prepare(dp);
336 dp->d_sched_ops = new_ops;
337 atomic_add_int(&new_ops->head.ref_count, 1);
338 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
341 /* If we acquired the lock, we also get rid of it */
343 lockmgr(&dsched_lock, LK_RELEASE);
346 struct dsched_policy*
347 dsched_find_policy(char *search)
349 struct dsched_policy *policy;
350 struct dsched_policy *policy_found = NULL;
353 /* Check if it is locked already. if not, we acquire the devfs lock */
354 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
355 lockmgr(&dsched_lock, LK_EXCLUSIVE);
359 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
360 if (!strcmp(policy->d_ops->head.name, search)) {
361 policy_found = policy;
366 /* If we acquired the lock, we also get rid of it */
368 lockmgr(&dsched_lock, LK_RELEASE);
374 dsched_find_disk(char *search)
376 struct disk *dp_found = NULL;
377 struct disk *dp = NULL;
379 while((dp = disk_enumerate(dp))) {
380 if (!strcmp(dp->d_cdev->si_name, search)) {
390 dsched_disk_enumerate(struct disk *dp, struct dsched_ops *ops)
392 while ((dp = disk_enumerate(dp))) {
393 if (dp->d_sched_ops == ops)
400 struct dsched_policy *
401 dsched_policy_enumerate(struct dsched_policy *pol)
404 return (TAILQ_FIRST(&dsched_policy_list));
406 return (TAILQ_NEXT(pol, link));
410 dsched_cancel_bio(struct bio *bp)
412 bp->bio_buf->b_error = ENXIO;
413 bp->bio_buf->b_flags |= B_ERROR;
414 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
420 dsched_strategy_raw(struct disk *dp, struct bio *bp)
423 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
426 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
427 if(bp->bio_track != NULL) {
428 dsched_debug(LOG_INFO,
429 "dsched_strategy_raw sees non-NULL bio_track!! "
430 "bio: %x\n", (uint32_t)bp);
431 bp->bio_track = NULL;
433 dev_dstrategy(dp->d_rawdev, bp);
437 dsched_strategy_sync(struct disk *dp, struct bio *bio)
439 struct buf *bp, *nbp;
447 nbp->b_cmd = bp->b_cmd;
448 nbp->b_bufsize = bp->b_bufsize;
449 nbp->b_runningbufspace = bp->b_runningbufspace;
450 nbp->b_bcount = bp->b_bcount;
451 nbp->b_resid = bp->b_resid;
452 nbp->b_data = bp->b_data;
453 nbp->b_kvabase = bp->b_kvabase;
454 nbp->b_kvasize = bp->b_kvasize;
455 nbp->b_dirtyend = bp->b_dirtyend;
457 nbio->bio_done = biodone_sync;
458 nbio->bio_flags |= BIO_SYNC;
459 nbio->bio_track = NULL;
461 nbio->bio_caller_info1.ptr = dp;
462 nbio->bio_offset = bio->bio_offset;
464 dev_dstrategy(dp->d_rawdev, nbio);
465 biowait(nbio, "dschedsync");
466 bp->b_resid = nbp->b_resid;
467 bp->b_error = nbp->b_error;
472 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
476 nbio = push_bio(bio);
477 nbio->bio_done = done;
478 nbio->bio_offset = bio->bio_offset;
480 dsched_set_bio_dp(nbio, dp);
481 dsched_set_bio_priv(nbio, priv);
483 getmicrotime(&nbio->bio_caller_info3.tv);
484 dev_dstrategy(dp->d_rawdev, nbio);
488 dsched_new_buf(struct buf *bp)
490 if (default_new_buf != NULL)
496 dsched_new_proc(struct proc *p)
498 if (default_new_proc != NULL)
504 dsched_new_thread(struct thread *td)
506 if (default_new_thread != NULL)
507 default_new_thread(td);
511 dsched_exit_proc(struct proc *p)
513 if (default_exit_proc != NULL)
514 default_exit_proc(p);
519 dsched_exit_thread(struct thread *td)
521 if (default_exit_thread != NULL)
522 default_exit_thread(td);
526 default_prepare(struct disk *dp)
532 default_teardown(struct disk *dp)
538 default_flush(struct disk *dp, struct bio *bio)
544 default_cancel(struct disk *dp)
550 default_queue(struct disk *dp, struct bio *bio)
552 dsched_strategy_raw(dp, bio);
554 dsched_strategy_async(dp, bio, default_completed, NULL);
561 default_completed(struct bio *bp)
571 * dsched device stuff
575 dsched_dev_list_disks(struct dsched_ioctl *data)
577 struct disk *dp = NULL;
580 for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++);
585 strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name));
587 if (dp->d_sched_ops) {
588 strncpy(data->pol_name, dp->d_sched_ops->head.name,
589 sizeof(data->pol_name));
591 strncpy(data->pol_name, "N/A (error)", 12);
598 dsched_dev_list_disk(struct dsched_ioctl *data)
600 struct disk *dp = NULL;
603 while ((dp = disk_enumerate(dp))) {
604 if (!strncmp(dp->d_cdev->si_name, data->dev_name,
605 sizeof(data->dev_name))) {
606 KKASSERT(dp->d_sched_ops != NULL);
609 strncpy(data->pol_name, dp->d_sched_ops->head.name,
610 sizeof(data->pol_name));
621 dsched_dev_list_policies(struct dsched_ioctl *data)
623 struct dsched_policy *pol = NULL;
626 for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++);
631 strncpy(data->pol_name, pol->d_ops->head.name, sizeof(data->pol_name));
636 dsched_dev_handle_switch(char *disk, char *policy)
639 struct dsched_policy *pol;
641 dp = dsched_find_disk(disk);
642 pol = dsched_find_policy(policy);
644 if ((dp == NULL) || (pol == NULL))
647 return (dsched_switch(dp, pol->d_ops));
651 dsched_dev_open(struct dev_open_args *ap)
654 * Only allow read-write access.
656 if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0))
660 * We don't allow nonblocking access.
662 if ((ap->a_oflags & O_NONBLOCK) != 0) {
663 kprintf("dsched_dev: can't do nonblocking access\n");
671 dsched_dev_close(struct dev_close_args *ap)
677 dsched_dev_ioctl(struct dev_ioctl_args *ap)
680 struct dsched_ioctl *data;
683 data = (struct dsched_ioctl *)ap->a_data;
686 case DSCHED_SET_DEVICE_POLICY:
687 if (dsched_dev_handle_switch(data->dev_name, data->pol_name))
688 error = ENOENT; /* No such file or directory */
691 case DSCHED_LIST_DISK:
692 if (dsched_dev_list_disk(data) != 0) {
693 error = EINVAL; /* Invalid argument */
697 case DSCHED_LIST_DISKS:
698 if (dsched_dev_list_disks(data) != 0) {
699 error = EINVAL; /* Invalid argument */
703 case DSCHED_LIST_POLICIES:
704 if (dsched_dev_list_policies(data) != 0) {
705 error = EINVAL; /* Invalid argument */
711 error = ENOTTY; /* Inappropriate ioctl for device */
726 lockinit(&dsched_lock, "dsched lock", 0, 0);
727 dsched_register(&dsched_default_ops);
736 dsched_dev_init(void)
738 dsched_dev = make_dev(&dsched_dev_ops,
747 dsched_dev_uninit(void)
749 destroy_dev(dsched_dev);
752 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_FIRST, dsched_init, NULL);
753 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_ANY, dsched_uninit, NULL);
754 SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL);
755 SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL);
760 SYSCTL_INT(_kern, OID_AUTO, dsched_debug, CTLFLAG_RW, &dsched_debug_enable,
761 0, "Enable dsched debugging");
762 SYSCTL_INT(_kern, OID_AUTO, dsched_test1, CTLFLAG_RW, &dsched_test1,
763 0, "Switch dsched test1 method");