2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/diskslice.h>
43 #include <sys/malloc.h>
44 #include <sys/sysctl.h>
45 #include <machine/md_var.h>
46 #include <sys/ctype.h>
47 #include <sys/syslog.h>
48 #include <sys/device.h>
49 #include <sys/msgport.h>
50 #include <sys/msgport2.h>
52 #include <sys/dsched.h>
53 #include <sys/fcntl.h>
54 #include <machine/varargs.h>
56 MALLOC_DEFINE(M_DSCHED, "dsched", "Disk Scheduler Framework allocations");
58 static dsched_prepare_t default_prepare;
59 static dsched_teardown_t default_teardown;
60 static dsched_flush_t default_flush;
61 static dsched_cancel_t default_cancel;
62 static dsched_queue_t default_queue;
64 static biodone_t default_completed;
67 dsched_new_buf_t *default_new_buf;
68 dsched_new_proc_t *default_new_proc;
69 dsched_new_thread_t *default_new_thread;
70 dsched_exit_buf_t *default_exit_buf;
71 dsched_exit_proc_t *default_exit_proc;
72 dsched_exit_thread_t *default_exit_thread;
74 static d_open_t dsched_dev_open;
75 static d_close_t dsched_dev_close;
76 static d_ioctl_t dsched_dev_ioctl;
78 static int dsched_dev_list_disks(struct dsched_ioctl *data);
79 static int dsched_dev_list_disk(struct dsched_ioctl *data);
80 static int dsched_dev_list_policies(struct dsched_ioctl *data);
81 static int dsched_dev_handle_switch(char *disk, char *policy);
84 struct lock dsched_lock;
85 static int dsched_debug_enable = 0;
86 static int dsched_test1 = 0;
87 static cdev_t dsched_dev;
89 static struct dsched_policy_head dsched_policy_list =
90 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
92 static struct dsched_ops dsched_default_ops = {
96 .prepare = default_prepare,
97 .teardown = default_teardown,
98 .flush = default_flush,
99 .cancel_all = default_cancel,
100 .bio_queue = default_queue,
104 static struct dev_ops dsched_dev_ops = {
106 .d_open = dsched_dev_open,
107 .d_close = dsched_dev_close,
108 .d_ioctl = dsched_dev_ioctl
112 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
116 dsched_debug(int level, char *fmt, ...)
121 if (level <= dsched_debug_enable)
129 * Called on disk_create()
130 * tries to read which policy to use from loader.conf, if there's
131 * none specified, the default policy is used.
134 dsched_create(struct disk *dp, const char *head_name, int unit)
136 char tunable_key[SPECNAMELEN + 11];
137 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
138 struct dsched_policy *policy = NULL;
140 /* Also look for serno stuff? */
141 /* kprintf("dsched_create() for disk %s%d\n", head_name, unit); */
142 lockmgr(&dsched_lock, LK_EXCLUSIVE);
144 ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_%s%d",
146 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
147 sizeof(sched_policy)) != 0) {
148 policy = dsched_find_policy(sched_policy);
151 ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_%s*",
153 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
154 sizeof(sched_policy)) != 0)) {
155 policy = dsched_find_policy(sched_policy);
158 ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_*");
159 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
160 sizeof(sched_policy)) != 0)) {
161 policy = dsched_find_policy(sched_policy);
165 dsched_debug(0, "No policy for %s%d specified, "
166 "or policy not found\n", head_name, unit);
167 dsched_set_policy(dp, &dsched_default_ops);
169 dsched_set_policy(dp, policy->d_ops);
172 lockmgr(&dsched_lock, LK_RELEASE);
176 * Called on disk_destroy()
177 * shuts down the scheduler core and cancels all remaining bios
180 dsched_destroy(struct disk *dp)
182 struct dsched_ops *old_ops;
184 lockmgr(&dsched_lock, LK_EXCLUSIVE);
186 old_ops = dp->d_sched_ops;
187 dp->d_sched_ops = &dsched_default_ops;
188 old_ops->cancel_all(dp);
189 old_ops->teardown(dp);
190 atomic_subtract_int(&old_ops->head.ref_count, 1);
191 KKASSERT(old_ops->head.ref_count >= 0);
193 lockmgr(&dsched_lock, LK_RELEASE);
198 dsched_queue(struct disk *dp, struct bio *bio)
201 error = dp->d_sched_ops->bio_queue(dp, bio);
204 if (bio->bio_buf->b_cmd == BUF_CMD_FLUSH) {
205 dp->d_sched_ops->flush(dp, bio);
207 dsched_strategy_raw(dp, bio);
213 * Called from each module_init or module_attach of each policy
214 * registers the policy in the local policy list.
217 dsched_register(struct dsched_ops *d_ops)
219 struct dsched_policy *policy;
222 lockmgr(&dsched_lock, LK_EXCLUSIVE);
224 policy = dsched_find_policy(d_ops->head.name);
227 if ((d_ops->new_buf != NULL) || (d_ops->new_proc != NULL) ||
228 (d_ops->new_thread != NULL)) {
230 * Policy ops has hooks for proc/thread/buf creation,
231 * so check if there are already hooks for those present
232 * and if so, stop right now.
234 if ((default_new_buf != NULL) || (default_new_proc != NULL) ||
235 (default_new_thread != NULL) || (default_exit_proc != NULL) ||
236 (default_exit_thread != NULL)) {
237 dsched_debug(LOG_ERR, "A policy with "
238 "proc/thread/buf hooks is already in use!");
243 /* If everything is fine, just register the hooks */
244 default_new_buf = d_ops->new_buf;
245 default_new_proc = d_ops->new_proc;
246 default_new_thread = d_ops->new_thread;
247 default_exit_buf = d_ops->exit_buf;
248 default_exit_proc = d_ops->exit_proc;
249 default_exit_thread = d_ops->exit_thread;
252 policy = kmalloc(sizeof(struct dsched_policy), M_DSCHED, M_WAITOK);
253 policy->d_ops = d_ops;
254 TAILQ_INSERT_TAIL(&dsched_policy_list, policy, link);
255 atomic_add_int(&policy->d_ops->head.ref_count, 1);
257 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
263 lockmgr(&dsched_lock, LK_RELEASE);
268 * Called from each module_detach of each policy
269 * unregisters the policy
272 dsched_unregister(struct dsched_ops *d_ops)
274 struct dsched_policy *policy;
276 lockmgr(&dsched_lock, LK_EXCLUSIVE);
277 policy = dsched_find_policy(d_ops->head.name);
280 if (policy->d_ops->head.ref_count > 1)
282 TAILQ_REMOVE(&dsched_policy_list, policy, link);
283 atomic_subtract_int(&policy->d_ops->head.ref_count, 1);
284 KKASSERT(policy->d_ops->head.ref_count >= 0);
285 kfree(policy, M_DSCHED);
287 lockmgr(&dsched_lock, LK_RELEASE);
293 * switches the policy by first removing the old one and then
294 * enabling the new one.
297 dsched_switch(struct disk *dp, struct dsched_ops *new_ops)
299 struct dsched_ops *old_ops;
301 /* If we are asked to set the same policy, do nothing */
302 if (dp->d_sched_ops == new_ops)
305 /* lock everything down, diskwise */
306 lockmgr(&dsched_lock, LK_EXCLUSIVE);
307 old_ops = dp->d_sched_ops;
309 atomic_subtract_int(&dp->d_sched_ops->head.ref_count, 1);
310 KKASSERT(dp->d_sched_ops->head.ref_count >= 0);
312 dp->d_sched_ops = &dsched_default_ops;
313 old_ops->teardown(dp);
315 /* Bring everything back to life */
316 dsched_set_policy(dp, new_ops);
317 lockmgr(&dsched_lock, LK_RELEASE);
323 * Loads a given policy and attaches it to the specified disk.
324 * Also initializes the core for the policy
327 dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops)
331 /* Check if it is locked already. if not, we acquire the devfs lock */
332 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
333 lockmgr(&dsched_lock, LK_EXCLUSIVE);
337 new_ops->prepare(dp);
338 dp->d_sched_ops = new_ops;
339 atomic_add_int(&new_ops->head.ref_count, 1);
340 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
343 /* If we acquired the lock, we also get rid of it */
345 lockmgr(&dsched_lock, LK_RELEASE);
348 struct dsched_policy*
349 dsched_find_policy(char *search)
351 struct dsched_policy *policy;
352 struct dsched_policy *policy_found = NULL;
355 /* Check if it is locked already. if not, we acquire the devfs lock */
356 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
357 lockmgr(&dsched_lock, LK_EXCLUSIVE);
361 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
362 if (!strcmp(policy->d_ops->head.name, search)) {
363 policy_found = policy;
368 /* If we acquired the lock, we also get rid of it */
370 lockmgr(&dsched_lock, LK_RELEASE);
376 dsched_find_disk(char *search)
378 struct disk *dp_found = NULL;
379 struct disk *dp = NULL;
381 while((dp = disk_enumerate(dp))) {
382 if (!strcmp(dp->d_cdev->si_name, search)) {
392 dsched_disk_enumerate(struct disk *dp, struct dsched_ops *ops)
394 while ((dp = disk_enumerate(dp))) {
395 if (dp->d_sched_ops == ops)
402 struct dsched_policy *
403 dsched_policy_enumerate(struct dsched_policy *pol)
406 return (TAILQ_FIRST(&dsched_policy_list));
408 return (TAILQ_NEXT(pol, link));
412 dsched_cancel_bio(struct bio *bp)
414 bp->bio_buf->b_error = ENXIO;
415 bp->bio_buf->b_flags |= B_ERROR;
416 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
422 dsched_strategy_raw(struct disk *dp, struct bio *bp)
425 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
428 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
429 if(bp->bio_track != NULL) {
430 dsched_debug(LOG_INFO,
431 "dsched_strategy_raw sees non-NULL bio_track!! "
432 "bio: %x\n", (uint32_t)bp);
433 bp->bio_track = NULL;
435 dev_dstrategy(dp->d_rawdev, bp);
439 dsched_strategy_sync(struct disk *dp, struct bio *bio)
441 struct buf *bp, *nbp;
449 nbp->b_cmd = bp->b_cmd;
450 nbp->b_bufsize = bp->b_bufsize;
451 nbp->b_runningbufspace = bp->b_runningbufspace;
452 nbp->b_bcount = bp->b_bcount;
453 nbp->b_resid = bp->b_resid;
454 nbp->b_data = bp->b_data;
455 nbp->b_kvabase = bp->b_kvabase;
456 nbp->b_kvasize = bp->b_kvasize;
457 nbp->b_dirtyend = bp->b_dirtyend;
459 nbio->bio_done = biodone_sync;
460 nbio->bio_flags |= BIO_SYNC;
461 nbio->bio_track = NULL;
463 nbio->bio_caller_info1.ptr = dp;
464 nbio->bio_offset = bio->bio_offset;
466 dev_dstrategy(dp->d_rawdev, nbio);
467 biowait(nbio, "dschedsync");
468 bp->b_resid = nbp->b_resid;
469 bp->b_error = nbp->b_error;
474 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
478 nbio = push_bio(bio);
479 nbio->bio_done = done;
480 nbio->bio_offset = bio->bio_offset;
482 dsched_set_bio_dp(nbio, dp);
483 dsched_set_bio_priv(nbio, priv);
485 getmicrotime(&nbio->bio_caller_info3.tv);
486 dev_dstrategy(dp->d_rawdev, nbio);
490 dsched_new_buf(struct buf *bp)
492 if (default_new_buf != NULL)
497 dsched_exit_buf(struct buf *bp)
499 if (default_exit_buf != NULL)
500 default_exit_buf(bp);
504 dsched_new_proc(struct proc *p)
506 if (default_new_proc != NULL)
512 dsched_new_thread(struct thread *td)
514 if (default_new_thread != NULL)
515 default_new_thread(td);
519 dsched_exit_proc(struct proc *p)
521 if (default_exit_proc != NULL)
522 default_exit_proc(p);
527 dsched_exit_thread(struct thread *td)
529 if (default_exit_thread != NULL)
530 default_exit_thread(td);
534 default_prepare(struct disk *dp)
540 default_teardown(struct disk *dp)
546 default_flush(struct disk *dp, struct bio *bio)
552 default_cancel(struct disk *dp)
558 default_queue(struct disk *dp, struct bio *bio)
560 dsched_strategy_raw(dp, bio);
562 dsched_strategy_async(dp, bio, default_completed, NULL);
569 default_completed(struct bio *bp)
579 * dsched device stuff
583 dsched_dev_list_disks(struct dsched_ioctl *data)
585 struct disk *dp = NULL;
588 for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++);
593 strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name));
595 if (dp->d_sched_ops) {
596 strncpy(data->pol_name, dp->d_sched_ops->head.name,
597 sizeof(data->pol_name));
599 strncpy(data->pol_name, "N/A (error)", 12);
606 dsched_dev_list_disk(struct dsched_ioctl *data)
608 struct disk *dp = NULL;
611 while ((dp = disk_enumerate(dp))) {
612 if (!strncmp(dp->d_cdev->si_name, data->dev_name,
613 sizeof(data->dev_name))) {
614 KKASSERT(dp->d_sched_ops != NULL);
617 strncpy(data->pol_name, dp->d_sched_ops->head.name,
618 sizeof(data->pol_name));
629 dsched_dev_list_policies(struct dsched_ioctl *data)
631 struct dsched_policy *pol = NULL;
634 for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++);
639 strncpy(data->pol_name, pol->d_ops->head.name, sizeof(data->pol_name));
644 dsched_dev_handle_switch(char *disk, char *policy)
647 struct dsched_policy *pol;
649 dp = dsched_find_disk(disk);
650 pol = dsched_find_policy(policy);
652 if ((dp == NULL) || (pol == NULL))
655 return (dsched_switch(dp, pol->d_ops));
659 dsched_dev_open(struct dev_open_args *ap)
662 * Only allow read-write access.
664 if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0))
668 * We don't allow nonblocking access.
670 if ((ap->a_oflags & O_NONBLOCK) != 0) {
671 kprintf("dsched_dev: can't do nonblocking access\n");
679 dsched_dev_close(struct dev_close_args *ap)
685 dsched_dev_ioctl(struct dev_ioctl_args *ap)
688 struct dsched_ioctl *data;
691 data = (struct dsched_ioctl *)ap->a_data;
694 case DSCHED_SET_DEVICE_POLICY:
695 if (dsched_dev_handle_switch(data->dev_name, data->pol_name))
696 error = ENOENT; /* No such file or directory */
699 case DSCHED_LIST_DISK:
700 if (dsched_dev_list_disk(data) != 0) {
701 error = EINVAL; /* Invalid argument */
705 case DSCHED_LIST_DISKS:
706 if (dsched_dev_list_disks(data) != 0) {
707 error = EINVAL; /* Invalid argument */
711 case DSCHED_LIST_POLICIES:
712 if (dsched_dev_list_policies(data) != 0) {
713 error = EINVAL; /* Invalid argument */
719 error = ENOTTY; /* Inappropriate ioctl for device */
734 lockinit(&dsched_lock, "dsched lock", 0, 0);
735 dsched_register(&dsched_default_ops);
744 dsched_dev_init(void)
746 dsched_dev = make_dev(&dsched_dev_ops,
755 dsched_dev_uninit(void)
757 destroy_dev(dsched_dev);
760 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_FIRST, dsched_init, NULL);
761 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_ANY, dsched_uninit, NULL);
762 SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL);
763 SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL);
768 SYSCTL_INT(_kern, OID_AUTO, dsched_debug, CTLFLAG_RW, &dsched_debug_enable,
769 0, "Enable dsched debugging");
770 SYSCTL_INT(_kern, OID_AUTO, dsched_test1, CTLFLAG_RW, &dsched_test1,
771 0, "Switch dsched test1 method");