2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/diskslice.h>
43 #include <sys/malloc.h>
44 #include <sys/sysctl.h>
45 #include <machine/md_var.h>
46 #include <sys/ctype.h>
47 #include <sys/syslog.h>
48 #include <sys/device.h>
49 #include <sys/msgport.h>
50 #include <sys/msgport2.h>
52 #include <sys/dsched.h>
53 #include <sys/fcntl.h>
54 #include <machine/varargs.h>
56 static dsched_prepare_t default_prepare;
57 static dsched_teardown_t default_teardown;
58 static dsched_flush_t default_flush;
59 static dsched_cancel_t default_cancel;
60 static dsched_queue_t default_queue;
62 static dsched_new_buf_t *default_new_buf;
63 static dsched_new_proc_t *default_new_proc;
64 static dsched_new_thread_t *default_new_thread;
65 static dsched_exit_buf_t *default_exit_buf;
66 static dsched_exit_proc_t *default_exit_proc;
67 static dsched_exit_thread_t *default_exit_thread;
69 static d_open_t dsched_dev_open;
70 static d_close_t dsched_dev_close;
71 static d_ioctl_t dsched_dev_ioctl;
73 static int dsched_dev_list_disks(struct dsched_ioctl *data);
74 static int dsched_dev_list_disk(struct dsched_ioctl *data);
75 static int dsched_dev_list_policies(struct dsched_ioctl *data);
76 static int dsched_dev_handle_switch(char *disk, char *policy);
79 struct lock dsched_lock;
80 static int dsched_debug_enable = 0;
81 static int dsched_test1 = 0;
82 static cdev_t dsched_dev;
84 static struct dsched_policy_head dsched_policy_list =
85 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
87 static struct dsched_policy dsched_default_policy = {
90 .prepare = default_prepare,
91 .teardown = default_teardown,
92 .flush = default_flush,
93 .cancel_all = default_cancel,
94 .bio_queue = default_queue,
98 static struct dev_ops dsched_dev_ops = {
100 .d_open = dsched_dev_open,
101 .d_close = dsched_dev_close,
102 .d_ioctl = dsched_dev_ioctl
106 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
110 dsched_debug(int level, char *fmt, ...)
115 if (level <= dsched_debug_enable)
123 * Called on disk_create()
124 * tries to read which policy to use from loader.conf, if there's
125 * none specified, the default policy is used.
128 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
130 char tunable_key[SPECNAMELEN + 48];
131 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
132 struct dsched_policy *policy = NULL;
134 /* Also look for serno stuff? */
135 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
136 lockmgr(&dsched_lock, LK_EXCLUSIVE);
138 ksnprintf(tunable_key, sizeof(tunable_key), "kern.dsched.policy.%s%d",
140 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
141 sizeof(sched_policy)) != 0) {
142 policy = dsched_find_policy(sched_policy);
145 ksnprintf(tunable_key, sizeof(tunable_key), "kern.dsched.policy.%s",
147 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
148 sizeof(sched_policy)) != 0)) {
149 policy = dsched_find_policy(sched_policy);
152 ksnprintf(tunable_key, sizeof(tunable_key), "kern.dsched.policy.default");
153 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
154 sizeof(sched_policy)) != 0)) {
155 policy = dsched_find_policy(sched_policy);
159 dsched_debug(0, "No policy for %s%d specified, "
160 "or policy not found\n", head_name, unit);
161 dsched_set_policy(dp, &dsched_default_policy);
163 dsched_set_policy(dp, policy);
166 lockmgr(&dsched_lock, LK_RELEASE);
170 * Called on disk_destroy()
171 * shuts down the scheduler core and cancels all remaining bios
174 dsched_disk_destroy_callback(struct disk *dp)
176 struct dsched_policy *old_policy;
178 lockmgr(&dsched_lock, LK_EXCLUSIVE);
180 old_policy = dp->d_sched_policy;
181 dp->d_sched_policy = &dsched_default_policy;
182 old_policy->cancel_all(dp);
183 old_policy->teardown(dp);
184 atomic_subtract_int(&old_policy->ref_count, 1);
185 KKASSERT(old_policy->ref_count >= 0);
187 lockmgr(&dsched_lock, LK_RELEASE);
192 dsched_queue(struct disk *dp, struct bio *bio)
195 error = dp->d_sched_policy->bio_queue(dp, bio);
198 if (bio->bio_buf->b_cmd == BUF_CMD_FLUSH) {
199 dp->d_sched_policy->flush(dp, bio);
201 dsched_strategy_raw(dp, bio);
207 * Called from each module_init or module_attach of each policy
208 * registers the policy in the local policy list.
211 dsched_register(struct dsched_policy *d_policy)
213 struct dsched_policy *policy;
216 lockmgr(&dsched_lock, LK_EXCLUSIVE);
218 policy = dsched_find_policy(d_policy->name);
221 if ((d_policy->new_buf != NULL) || (d_policy->new_proc != NULL) ||
222 (d_policy->new_thread != NULL)) {
224 * Policy policy has hooks for proc/thread/buf creation,
225 * so check if there are already hooks for those present
226 * and if so, stop right now.
228 if ((default_new_buf != NULL) || (default_new_proc != NULL) ||
229 (default_new_thread != NULL) || (default_exit_proc != NULL) ||
230 (default_exit_thread != NULL)) {
231 dsched_debug(LOG_ERR, "A policy with "
232 "proc/thread/buf hooks is already in use!");
237 /* If everything is fine, just register the hooks */
238 default_new_buf = d_policy->new_buf;
239 default_new_proc = d_policy->new_proc;
240 default_new_thread = d_policy->new_thread;
241 default_exit_buf = d_policy->exit_buf;
242 default_exit_proc = d_policy->exit_proc;
243 default_exit_thread = d_policy->exit_thread;
246 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
247 atomic_add_int(&d_policy->ref_count, 1);
249 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
255 lockmgr(&dsched_lock, LK_RELEASE);
260 * Called from each module_detach of each policy
261 * unregisters the policy
264 dsched_unregister(struct dsched_policy *d_policy)
266 struct dsched_policy *policy;
268 lockmgr(&dsched_lock, LK_EXCLUSIVE);
269 policy = dsched_find_policy(d_policy->name);
272 if (policy->ref_count > 1)
274 TAILQ_REMOVE(&dsched_policy_list, policy, link);
275 atomic_subtract_int(&policy->ref_count, 1);
276 KKASSERT(policy->ref_count >= 0);
278 lockmgr(&dsched_lock, LK_RELEASE);
284 * switches the policy by first removing the old one and then
285 * enabling the new one.
288 dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
290 struct dsched_policy *old_policy;
292 /* If we are asked to set the same policy, do nothing */
293 if (dp->d_sched_policy == new_policy)
296 /* lock everything down, diskwise */
297 lockmgr(&dsched_lock, LK_EXCLUSIVE);
298 old_policy = dp->d_sched_policy;
300 atomic_subtract_int(&dp->d_sched_policy->ref_count, 1);
301 KKASSERT(dp->d_sched_policy->ref_count >= 0);
303 dp->d_sched_policy = &dsched_default_policy;
304 old_policy->teardown(dp);
306 /* Bring everything back to life */
307 dsched_set_policy(dp, new_policy);
308 lockmgr(&dsched_lock, LK_RELEASE);
314 * Loads a given policy and attaches it to the specified disk.
315 * Also initializes the core for the policy
318 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
322 /* Check if it is locked already. if not, we acquire the devfs lock */
323 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
324 lockmgr(&dsched_lock, LK_EXCLUSIVE);
328 new_policy->prepare(dp);
329 dp->d_sched_policy = new_policy;
330 atomic_add_int(&new_policy->ref_count, 1);
331 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
334 /* If we acquired the lock, we also get rid of it */
336 lockmgr(&dsched_lock, LK_RELEASE);
339 struct dsched_policy*
340 dsched_find_policy(char *search)
342 struct dsched_policy *policy;
343 struct dsched_policy *policy_found = NULL;
346 /* Check if it is locked already. if not, we acquire the devfs lock */
347 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
348 lockmgr(&dsched_lock, LK_EXCLUSIVE);
352 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
353 if (!strcmp(policy->name, search)) {
354 policy_found = policy;
359 /* If we acquired the lock, we also get rid of it */
361 lockmgr(&dsched_lock, LK_RELEASE);
367 dsched_find_disk(char *search)
369 struct disk *dp_found = NULL;
370 struct disk *dp = NULL;
372 while((dp = disk_enumerate(dp))) {
373 if (!strcmp(dp->d_cdev->si_name, search)) {
383 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy)
385 while ((dp = disk_enumerate(dp))) {
386 if (dp->d_sched_policy == policy)
393 struct dsched_policy *
394 dsched_policy_enumerate(struct dsched_policy *pol)
397 return (TAILQ_FIRST(&dsched_policy_list));
399 return (TAILQ_NEXT(pol, link));
403 dsched_cancel_bio(struct bio *bp)
405 bp->bio_buf->b_error = ENXIO;
406 bp->bio_buf->b_flags |= B_ERROR;
407 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
413 dsched_strategy_raw(struct disk *dp, struct bio *bp)
416 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
419 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
420 if(bp->bio_track != NULL) {
421 dsched_debug(LOG_INFO,
422 "dsched_strategy_raw sees non-NULL bio_track!! "
424 bp->bio_track = NULL;
426 dev_dstrategy(dp->d_rawdev, bp);
430 dsched_strategy_sync(struct disk *dp, struct bio *bio)
432 struct buf *bp, *nbp;
440 nbp->b_cmd = bp->b_cmd;
441 nbp->b_bufsize = bp->b_bufsize;
442 nbp->b_runningbufspace = bp->b_runningbufspace;
443 nbp->b_bcount = bp->b_bcount;
444 nbp->b_resid = bp->b_resid;
445 nbp->b_data = bp->b_data;
446 nbp->b_kvabase = bp->b_kvabase;
447 nbp->b_kvasize = bp->b_kvasize;
448 nbp->b_dirtyend = bp->b_dirtyend;
450 nbio->bio_done = biodone_sync;
451 nbio->bio_flags |= BIO_SYNC;
452 nbio->bio_track = NULL;
454 nbio->bio_caller_info1.ptr = dp;
455 nbio->bio_offset = bio->bio_offset;
457 dev_dstrategy(dp->d_rawdev, nbio);
458 biowait(nbio, "dschedsync");
459 bp->b_resid = nbp->b_resid;
460 bp->b_error = nbp->b_error;
465 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
469 nbio = push_bio(bio);
470 nbio->bio_done = done;
471 nbio->bio_offset = bio->bio_offset;
473 dsched_set_bio_dp(nbio, dp);
474 dsched_set_bio_priv(nbio, priv);
476 getmicrotime(&nbio->bio_caller_info3.tv);
477 dev_dstrategy(dp->d_rawdev, nbio);
481 dsched_new_buf(struct buf *bp)
483 if (default_new_buf != NULL)
488 dsched_exit_buf(struct buf *bp)
490 if (default_exit_buf != NULL)
491 default_exit_buf(bp);
495 dsched_new_proc(struct proc *p)
497 if (default_new_proc != NULL)
503 dsched_new_thread(struct thread *td)
505 if (default_new_thread != NULL)
506 default_new_thread(td);
510 dsched_exit_proc(struct proc *p)
512 if (default_exit_proc != NULL)
513 default_exit_proc(p);
518 dsched_exit_thread(struct thread *td)
520 if (default_exit_thread != NULL)
521 default_exit_thread(td);
525 default_prepare(struct disk *dp)
531 default_teardown(struct disk *dp)
537 default_flush(struct disk *dp, struct bio *bio)
543 default_cancel(struct disk *dp)
549 default_queue(struct disk *dp, struct bio *bio)
551 dsched_strategy_raw(dp, bio);
553 dsched_strategy_async(dp, bio, default_completed, NULL);
559 * dsched device stuff
563 dsched_dev_list_disks(struct dsched_ioctl *data)
565 struct disk *dp = NULL;
568 for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++);
573 strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name));
575 if (dp->d_sched_policy) {
576 strncpy(data->pol_name, dp->d_sched_policy->name,
577 sizeof(data->pol_name));
579 strncpy(data->pol_name, "N/A (error)", 12);
586 dsched_dev_list_disk(struct dsched_ioctl *data)
588 struct disk *dp = NULL;
591 while ((dp = disk_enumerate(dp))) {
592 if (!strncmp(dp->d_cdev->si_name, data->dev_name,
593 sizeof(data->dev_name))) {
594 KKASSERT(dp->d_sched_policy != NULL);
597 strncpy(data->pol_name, dp->d_sched_policy->name,
598 sizeof(data->pol_name));
609 dsched_dev_list_policies(struct dsched_ioctl *data)
611 struct dsched_policy *pol = NULL;
614 for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++);
619 strncpy(data->pol_name, pol->name, sizeof(data->pol_name));
624 dsched_dev_handle_switch(char *disk, char *policy)
627 struct dsched_policy *pol;
629 dp = dsched_find_disk(disk);
630 pol = dsched_find_policy(policy);
632 if ((dp == NULL) || (pol == NULL))
635 return (dsched_switch(dp, pol));
639 dsched_dev_open(struct dev_open_args *ap)
642 * Only allow read-write access.
644 if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0))
648 * We don't allow nonblocking access.
650 if ((ap->a_oflags & O_NONBLOCK) != 0) {
651 kprintf("dsched_dev: can't do nonblocking access\n");
659 dsched_dev_close(struct dev_close_args *ap)
665 dsched_dev_ioctl(struct dev_ioctl_args *ap)
668 struct dsched_ioctl *data;
671 data = (struct dsched_ioctl *)ap->a_data;
674 case DSCHED_SET_DEVICE_POLICY:
675 if (dsched_dev_handle_switch(data->dev_name, data->pol_name))
676 error = ENOENT; /* No such file or directory */
679 case DSCHED_LIST_DISK:
680 if (dsched_dev_list_disk(data) != 0) {
681 error = EINVAL; /* Invalid argument */
685 case DSCHED_LIST_DISKS:
686 if (dsched_dev_list_disks(data) != 0) {
687 error = EINVAL; /* Invalid argument */
691 case DSCHED_LIST_POLICIES:
692 if (dsched_dev_list_policies(data) != 0) {
693 error = EINVAL; /* Invalid argument */
699 error = ENOTTY; /* Inappropriate ioctl for device */
714 lockinit(&dsched_lock, "dsched lock", 0, 0);
715 dsched_register(&dsched_default_policy);
724 dsched_dev_init(void)
726 dsched_dev = make_dev(&dsched_dev_ops,
735 dsched_dev_uninit(void)
737 destroy_dev(dsched_dev);
740 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_FIRST, dsched_init, NULL);
741 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_ANY, dsched_uninit, NULL);
742 SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL);
743 SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL);
748 SYSCTL_INT(_kern, OID_AUTO, dsched_debug, CTLFLAG_RW, &dsched_debug_enable,
749 0, "Enable dsched debugging");
750 SYSCTL_INT(_kern, OID_AUTO, dsched_test1, CTLFLAG_RW, &dsched_test1,
751 0, "Switch dsched test1 method");