From: Alex Hornung Date: Thu, 25 Feb 2010 08:53:01 +0000 (+0000) Subject: dsched - import the I/O scheduler framework dsched X-Git-Tag: v2.7.1~30 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/b80a9543de1d55c64ee78c872036a316178ee866 dsched - import the I/O scheduler framework dsched * Import my branch of the I/O scheduler framework. This commit only introduces the core dsched framework, without binding it into the system and without any policy except for the default one. --- diff --git a/sys/kern/subr_dsched.c b/sys/kern/subr_dsched.c new file mode 100644 index 0000000000..ea59974648 --- /dev/null +++ b/sys/kern/subr_dsched.c @@ -0,0 +1,763 @@ +/* + * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_DSCHED, "dsched", "Disk Scheduler Framework allocations"); + +static dsched_prepare_t default_prepare; +static dsched_teardown_t default_teardown; +static dsched_flush_t default_flush; +static dsched_cancel_t default_cancel; +static dsched_queue_t default_queue; +#if 0 +static biodone_t default_completed; +#endif + +dsched_new_buf_t *default_new_buf; +dsched_new_proc_t *default_new_proc; +dsched_new_thread_t *default_new_thread; +dsched_exit_proc_t *default_exit_proc; +dsched_exit_thread_t *default_exit_thread; + +static d_open_t dsched_dev_open; +static d_close_t dsched_dev_close; +static d_ioctl_t dsched_dev_ioctl; + +static int dsched_dev_list_disks(struct dsched_ioctl *data); +static int dsched_dev_list_disk(struct dsched_ioctl *data); +static int dsched_dev_list_policies(struct dsched_ioctl *data); +static int dsched_dev_handle_switch(char *disk, char *policy); + + +struct lock dsched_lock; +static int dsched_debug_enable = 0; +static int dsched_test1 = 0; +static cdev_t dsched_dev; + +static struct dsched_policy_head dsched_policy_list = + TAILQ_HEAD_INITIALIZER(dsched_policy_list); + +static struct dsched_ops dsched_default_ops = { + .head = { + .name = "default" + }, + .prepare = default_prepare, + .teardown = default_teardown, + .flush = default_flush, + .cancel_all = default_cancel, + .bio_queue = default_queue, +}; + + +static struct dev_ops dsched_dev_ops = { + { "dsched", 0, 0 }, + .d_open = dsched_dev_open, + .d_close = dsched_dev_close, + .d_ioctl = dsched_dev_ioctl +}; + +/* + * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function + * using kvprintf + */ +int +dsched_debug(int level, char *fmt, ...) +{ + __va_list ap; + + __va_start(ap, fmt); + if (level <= dsched_debug_enable) + kvprintf(fmt, ap); + __va_end(ap); + + return 0; +} + +/* + * Called on disk_create() + * tries to read which policy to use from loader.conf, if there's + * none specified, the default policy is used. + */ +void +dsched_create(struct disk *dp, const char *head_name, int unit) +{ + char tunable_key[SPECNAMELEN + 11]; + char sched_policy[DSCHED_POLICY_NAME_LENGTH]; + struct dsched_policy *policy = NULL; + + /* Also look for serno stuff? */ + /* kprintf("dsched_create() for disk %s%d\n", head_name, unit); */ + lockmgr(&dsched_lock, LK_EXCLUSIVE); + + ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_%s%d", + head_name, unit); + if (TUNABLE_STR_FETCH(tunable_key, sched_policy, + sizeof(sched_policy)) != 0) { + policy = dsched_find_policy(sched_policy); + } + + ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_%s*", + head_name); + if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, + sizeof(sched_policy)) != 0)) { + policy = dsched_find_policy(sched_policy); + } + + ksnprintf(tunable_key, sizeof(tunable_key), "scheduler_*"); + if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, + sizeof(sched_policy)) != 0)) { + policy = dsched_find_policy(sched_policy); + } + + if (!policy) { + dsched_debug(0, "No policy for %s%d specified, " + "or policy not found\n", head_name, unit); + dsched_set_policy(dp, &dsched_default_ops); + } else { + dsched_set_policy(dp, policy->d_ops); + } + + lockmgr(&dsched_lock, LK_RELEASE); +} + +/* + * Called on disk_destroy() + * shuts down the scheduler core and cancels all remaining bios + */ +void +dsched_destroy(struct disk *dp) +{ + struct dsched_ops *old_ops; + + lockmgr(&dsched_lock, LK_EXCLUSIVE); + + old_ops = dp->d_sched_ops; + dp->d_sched_ops = &dsched_default_ops; + old_ops->cancel_all(dp); + old_ops->teardown(dp); + atomic_subtract_int(&old_ops->head.ref_count, 1); + KKASSERT(old_ops->head.ref_count >= 0); + + lockmgr(&dsched_lock, LK_RELEASE); +} + + +void +dsched_queue(struct disk *dp, struct bio *bio) +{ + int error = 0; + error = dp->d_sched_ops->bio_queue(dp, bio); + + if (error) { + if (bio->bio_buf->b_cmd == BUF_CMD_FLUSH) { + dp->d_sched_ops->flush(dp, bio); + } + dsched_strategy_raw(dp, bio); + } +} + + +/* + * Called from each module_init or module_attach of each policy + * registers the policy in the local policy list. + */ +int +dsched_register(struct dsched_ops *d_ops) +{ + struct dsched_policy *policy; + int error = 0; + + lockmgr(&dsched_lock, LK_EXCLUSIVE); + + policy = dsched_find_policy(d_ops->head.name); + + if (!policy) { + if ((d_ops->new_buf != NULL) || (d_ops->new_proc != NULL) || + (d_ops->new_thread != NULL)) { + /* + * Policy ops has hooks for proc/thread/buf creation, + * so check if there are already hooks for those present + * and if so, stop right now. + */ + if ((default_new_buf != NULL) || (default_new_proc != NULL) || + (default_new_thread != NULL) || (default_exit_proc != NULL) || + (default_exit_thread != NULL)) { + dsched_debug(LOG_ERR, "A policy with " + "proc/thread/buf hooks is already in use!"); + error = 1; + goto done; + } + + /* If everything is fine, just register the hooks */ + default_new_buf = d_ops->new_buf; + default_new_proc = d_ops->new_proc; + default_new_thread = d_ops->new_thread; + default_exit_proc = d_ops->exit_proc; + default_exit_thread = d_ops->exit_thread; + } + + policy = kmalloc(sizeof(struct dsched_policy), M_DSCHED, M_WAITOK); + policy->d_ops = d_ops; + TAILQ_INSERT_TAIL(&dsched_policy_list, policy, link); + atomic_add_int(&policy->d_ops->head.ref_count, 1); + } else { + dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", + d_ops->head.name); + error = 1; + } + +done: + lockmgr(&dsched_lock, LK_RELEASE); + return error; +} + +/* + * Called from each module_detach of each policy + * unregisters the policy + */ +int +dsched_unregister(struct dsched_ops *d_ops) +{ + struct dsched_policy *policy; + + lockmgr(&dsched_lock, LK_EXCLUSIVE); + policy = dsched_find_policy(d_ops->head.name); + + if (policy) { + if (policy->d_ops->head.ref_count > 1) + return 1; + TAILQ_REMOVE(&dsched_policy_list, policy, link); + atomic_subtract_int(&policy->d_ops->head.ref_count, 1); + KKASSERT(policy->d_ops->head.ref_count >= 0); + kfree(policy, M_DSCHED); + } + lockmgr(&dsched_lock, LK_RELEASE); + return 0; +} + + +/* + * switches the policy by first removing the old one and then + * enabling the new one. + */ +int +dsched_switch(struct disk *dp, struct dsched_ops *new_ops) +{ + struct dsched_ops *old_ops; + + /* If we are asked to set the same policy, do nothing */ + if (dp->d_sched_ops == new_ops) + return 0; + + /* lock everything down, diskwise */ + lockmgr(&dsched_lock, LK_EXCLUSIVE); + old_ops = dp->d_sched_ops; + + atomic_subtract_int(&dp->d_sched_ops->head.ref_count, 1); + KKASSERT(dp->d_sched_ops->head.ref_count >= 0); + + dp->d_sched_ops = &dsched_default_ops; + old_ops->teardown(dp); + + /* Bring everything back to life */ + dsched_set_policy(dp, new_ops); + lockmgr(&dsched_lock, LK_RELEASE); + return 0; +} + + +/* + * Loads a given policy and attaches it to the specified disk. + * Also initializes the core for the policy + */ +void +dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops) +{ + int locked = 0; + + /* Check if it is locked already. if not, we acquire the devfs lock */ + if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { + lockmgr(&dsched_lock, LK_EXCLUSIVE); + locked = 1; + } + + new_ops->prepare(dp); + dp->d_sched_ops = new_ops; + atomic_add_int(&new_ops->head.ref_count, 1); + kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, + new_ops->head.name); + + /* If we acquired the lock, we also get rid of it */ + if (locked) + lockmgr(&dsched_lock, LK_RELEASE); +} + +struct dsched_policy* +dsched_find_policy(char *search) +{ + struct dsched_policy *policy; + struct dsched_policy *policy_found = NULL; + int locked = 0; + + /* Check if it is locked already. if not, we acquire the devfs lock */ + if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { + lockmgr(&dsched_lock, LK_EXCLUSIVE); + locked = 1; + } + + TAILQ_FOREACH(policy, &dsched_policy_list, link) { + if (!strcmp(policy->d_ops->head.name, search)) { + policy_found = policy; + break; + } + } + + /* If we acquired the lock, we also get rid of it */ + if (locked) + lockmgr(&dsched_lock, LK_RELEASE); + + return policy_found; +} + +struct disk* +dsched_find_disk(char *search) +{ + struct disk *dp_found = NULL; + struct disk *dp = NULL; + + while((dp = disk_enumerate(dp))) { + if (!strcmp(dp->d_cdev->si_name, search)) { + dp_found = dp; + break; + } + } + + return dp_found; +} + +struct disk* +dsched_disk_enumerate(struct disk *dp, struct dsched_ops *ops) +{ + while ((dp = disk_enumerate(dp))) { + if (dp->d_sched_ops == ops) + return dp; + } + + return NULL; +} + +struct dsched_policy * +dsched_policy_enumerate(struct dsched_policy *pol) +{ + if (!pol) + return (TAILQ_FIRST(&dsched_policy_list)); + else + return (TAILQ_NEXT(pol, link)); +} + +void +dsched_cancel_bio(struct bio *bp) +{ + bp->bio_buf->b_error = ENXIO; + bp->bio_buf->b_flags |= B_ERROR; + bp->bio_buf->b_resid = bp->bio_buf->b_bcount; + + biodone(bp); +} + +void +dsched_strategy_raw(struct disk *dp, struct bio *bp) +{ + /* + * Ideally, this stuff shouldn't be needed... but just in case, we leave it in + * to avoid panics + */ + KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); + if(bp->bio_track != NULL) { + dsched_debug(LOG_INFO, + "dsched_strategy_raw sees non-NULL bio_track!! " + "bio: %x\n", (uint32_t)bp); + bp->bio_track = NULL; + } + dev_dstrategy(dp->d_rawdev, bp); +} + +void +dsched_strategy_sync(struct disk *dp, struct bio *bio) +{ + struct buf *bp, *nbp; + struct bio *nbio; + + bp = bio->bio_buf; + + nbp = getpbuf(NULL); + nbio = &nbp->b_bio1; + + nbp->b_cmd = bp->b_cmd; + nbp->b_bufsize = bp->b_bufsize; + nbp->b_runningbufspace = bp->b_runningbufspace; + nbp->b_bcount = bp->b_bcount; + nbp->b_resid = bp->b_resid; + nbp->b_data = bp->b_data; + nbp->b_kvabase = bp->b_kvabase; + nbp->b_kvasize = bp->b_kvasize; + nbp->b_dirtyend = bp->b_dirtyend; + + nbio->bio_done = biodone_sync; + nbio->bio_flags |= BIO_SYNC; + nbio->bio_track = NULL; + + nbio->bio_caller_info1.ptr = dp; + nbio->bio_offset = bio->bio_offset; + + dev_dstrategy(dp->d_rawdev, nbio); + biowait(nbio, "dschedsync"); + bp->b_resid = nbp->b_resid; + bp->b_error = nbp->b_error; + biodone(bio); +} + +void +dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) +{ + struct bio *nbio; + + nbio = push_bio(bio); + nbio->bio_done = done; + nbio->bio_offset = bio->bio_offset; + + dsched_set_bio_dp(nbio, dp); + dsched_set_bio_priv(nbio, priv); + + getmicrotime(&nbio->bio_caller_info3.tv); + dev_dstrategy(dp->d_rawdev, nbio); +} + +void +dsched_new_buf(struct buf *bp) +{ + if (default_new_buf != NULL) + default_new_buf(bp); +} + + +void +dsched_new_proc(struct proc *p) +{ + if (default_new_proc != NULL) + default_new_proc(p); +} + + +void +dsched_new_thread(struct thread *td) +{ + if (default_new_thread != NULL) + default_new_thread(td); +} + +void +dsched_exit_proc(struct proc *p) +{ + if (default_exit_proc != NULL) + default_exit_proc(p); +} + + +void +dsched_exit_thread(struct thread *td) +{ + if (default_exit_thread != NULL) + default_exit_thread(td); +} + +int +default_prepare(struct disk *dp) +{ + return 0; +} + +void +default_teardown(struct disk *dp) +{ + +} + +void +default_flush(struct disk *dp, struct bio *bio) +{ + +} + +void +default_cancel(struct disk *dp) +{ + +} + +int +default_queue(struct disk *dp, struct bio *bio) +{ + dsched_strategy_raw(dp, bio); +#if 0 + dsched_strategy_async(dp, bio, default_completed, NULL); +#endif + return 0; +} + +#if 0 +void +default_completed(struct bio *bp) +{ + struct bio *obio; + + obio = pop_bio(bp); + biodone(obio); +} +#endif + +/* + * dsched device stuff + */ + +static int +dsched_dev_list_disks(struct dsched_ioctl *data) +{ + struct disk *dp = NULL; + uint32_t i; + + for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++); + + if (dp == NULL) + return -1; + + strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name)); + + if (dp->d_sched_ops) { + strncpy(data->pol_name, dp->d_sched_ops->head.name, + sizeof(data->pol_name)); + } else { + strncpy(data->pol_name, "N/A (error)", 12); + } + + return 0; +} + +static int +dsched_dev_list_disk(struct dsched_ioctl *data) +{ + struct disk *dp = NULL; + int found = 0; + + while ((dp = disk_enumerate(dp))) { + if (!strncmp(dp->d_cdev->si_name, data->dev_name, + sizeof(data->dev_name))) { + KKASSERT(dp->d_sched_ops != NULL); + + found = 1; + strncpy(data->pol_name, dp->d_sched_ops->head.name, + sizeof(data->pol_name)); + break; + } + } + if (!found) + return -1; + + return 0; +} + +static int +dsched_dev_list_policies(struct dsched_ioctl *data) +{ + struct dsched_policy *pol = NULL; + uint32_t i; + + for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++); + + if (pol == NULL) + return -1; + + strncpy(data->pol_name, pol->d_ops->head.name, sizeof(data->pol_name)); + return 0; +} + +static int +dsched_dev_handle_switch(char *disk, char *policy) +{ + struct disk *dp; + struct dsched_policy *pol; + + dp = dsched_find_disk(disk); + pol = dsched_find_policy(policy); + + if ((dp == NULL) || (pol == NULL)) + return -1; + + return (dsched_switch(dp, pol->d_ops)); +} + +static int +dsched_dev_open(struct dev_open_args *ap) +{ + /* + * Only allow read-write access. + */ + if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0)) + return(EPERM); + + /* + * We don't allow nonblocking access. + */ + if ((ap->a_oflags & O_NONBLOCK) != 0) { + kprintf("dsched_dev: can't do nonblocking access\n"); + return(ENODEV); + } + + return 0; +} + +static int +dsched_dev_close(struct dev_close_args *ap) +{ + return 0; +} + +static int +dsched_dev_ioctl(struct dev_ioctl_args *ap) +{ + int error; + struct dsched_ioctl *data; + + error = 0; + data = (struct dsched_ioctl *)ap->a_data; + + switch(ap->a_cmd) { + case DSCHED_SET_DEVICE_POLICY: + if (dsched_dev_handle_switch(data->dev_name, data->pol_name)) + error = ENOENT; /* No such file or directory */ + break; + + case DSCHED_LIST_DISK: + if (dsched_dev_list_disk(data) != 0) { + error = EINVAL; /* Invalid argument */ + } + break; + + case DSCHED_LIST_DISKS: + if (dsched_dev_list_disks(data) != 0) { + error = EINVAL; /* Invalid argument */ + } + break; + + case DSCHED_LIST_POLICIES: + if (dsched_dev_list_policies(data) != 0) { + error = EINVAL; /* Invalid argument */ + } + break; + + + default: + error = ENOTTY; /* Inappropriate ioctl for device */ + break; + } + + return(error); +} + +/* + * SYSINIT stuff + */ + + +static void +dsched_init(void) +{ + lockinit(&dsched_lock, "dsched lock", 0, 0); + dsched_register(&dsched_default_ops); +} + +static void +dsched_uninit(void) +{ +} + +static void +dsched_dev_init(void) +{ + dsched_dev = make_dev(&dsched_dev_ops, + 0, + UID_ROOT, + GID_WHEEL, + 0600, + "dsched"); +} + +static void +dsched_dev_uninit(void) +{ + destroy_dev(dsched_dev); +} + +SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_FIRST, dsched_init, NULL); +SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-2, SI_ORDER_ANY, dsched_uninit, NULL); +SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL); +SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL); + +/* + * SYSCTL stuff + */ +SYSCTL_INT(_kern, OID_AUTO, dsched_debug, CTLFLAG_RW, &dsched_debug_enable, + 0, "Enable dsched debugging"); +SYSCTL_INT(_kern, OID_AUTO, dsched_test1, CTLFLAG_RW, &dsched_test1, + 0, "Switch dsched test1 method"); diff --git a/sys/sys/dsched.h b/sys/sys/dsched.h new file mode 100644 index 0000000000..56c55c7253 --- /dev/null +++ b/sys/sys/dsched.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef _SYS_DSCHED_H_ +#define _SYS_DSCHED_H_ + +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + +#ifndef _SYS_QUEUE_H_ +#include +#endif +#ifndef _SYS_BIO_H_ +#include +#endif +#ifndef _SYS_BIOTRACK_H_ +#include +#endif +#ifndef _SYS_LOCK_H_ +#include +#endif +#ifndef _SYS_CONF_H_ +#include +#endif +#ifndef _SYS_MSGPORT_H_ +#include +#endif + +#define DSCHED_POLICY_NAME_LENGTH 64 + +#define dsched_set_disk_priv(dp, x) ((dp)->d_dsched_priv1 = (x)) +#define dsched_get_disk_priv(dp) ((dp)?((dp)->d_dsched_priv1):NULL) +#define dsched_set_proc_priv(pp, x) ((pp)->p_dsched_priv1 = (x)) +#define dsched_get_proc_priv(pp) ((pp)?((pp)->p_dsched_priv1):NULL) + +#define dsched_set_thread_priv(td, x) ((td)->td_dsched_priv1 = (x)) +#define dsched_get_thread_priv(td) ((td)?((td)->td_dsched_priv1):NULL) + +#define dsched_set_buf_priv(bp, x) ((bp)->b_iosched = (x)) +#define dsched_get_buf_priv(bp) ((bp)?((bp)->b_iosched):NULL) +#define dsched_clr_buf_priv(bp) ((bp)->b_iosched = NULL) +#define dsched_is_clear_buf_priv(bp) ((bp)->b_iosched == NULL) + + +#define dsched_set_bio_dp(bio, x) ((bio)->bio_caller_info1.ptr = (x)) +#define dsched_get_bio_dp(bio) ((bio)?((bio)->bio_caller_info1.ptr):NULL) +#define dsched_set_bio_priv(bio, x) ((bio)->bio_caller_info2.ptr = (x)) +#define dsched_get_bio_priv(bio) ((bio)?((bio)->bio_caller_info2.ptr):NULL) +#define dsched_set_bio_stime(bio, x) ((bio)->bio_caller_info3.lvalue = (x)) +#define dsched_get_bio_stime(bio) ((bio)?((bio)->bio_caller_info3.lvalue):0) + + +typedef int dsched_prepare_t(struct disk *dp); +typedef void dsched_teardown_t(struct disk *dp); +typedef void dsched_flush_t(struct disk *dp, struct bio *bio); +typedef void dsched_cancel_t(struct disk *dp); +typedef int dsched_queue_t(struct disk *dp, struct bio *bio); +typedef void dsched_new_buf_t(struct buf *bp); +typedef void dsched_new_proc_t(struct proc *p); +typedef void dsched_new_thread_t(struct thread *td); +typedef void dsched_exit_proc_t(struct proc *p); +typedef void dsched_exit_thread_t(struct thread *td); + +struct dsched_ops { + struct { + char name[DSCHED_POLICY_NAME_LENGTH]; + uint64_t uniq_id; + int ref_count; + } head; + + dsched_prepare_t *prepare; + dsched_teardown_t *teardown; + dsched_flush_t *flush; + dsched_cancel_t *cancel_all; + dsched_queue_t *bio_queue; + + dsched_new_buf_t *new_buf; + dsched_new_proc_t *new_proc; + dsched_new_thread_t *new_thread; + dsched_exit_proc_t *exit_proc; + dsched_exit_thread_t *exit_thread; +}; + +struct dsched_policy { + TAILQ_ENTRY(dsched_policy) link; + + struct dsched_ops *d_ops; +}; + +struct dsched_object +{ + struct disk *dp; + struct bio *bio; + int pid; + struct thread *thread; + struct proc *proc; +}; + +TAILQ_HEAD(dschedq, dsched_object); +TAILQ_HEAD(dsched_policy_head, dsched_policy); + +void dsched_create(struct disk *dp, const char *head_name, int unit); +void dsched_destroy(struct disk *dp); +void dsched_queue(struct disk *dp, struct bio *bio); +int dsched_register(struct dsched_ops *d_ops); +int dsched_unregister(struct dsched_ops *d_ops); +int dsched_switch(struct disk *dp, struct dsched_ops *new_ops); +void dsched_set_policy(struct disk *dp, struct dsched_ops *new_ops); +struct dsched_policy *dsched_find_policy(char *search); +struct disk *dsched_find_disk(char *search); +struct dsched_policy *dsched_policy_enumerate(struct dsched_policy *pol); +struct disk *dsched_disk_enumerate(struct disk *dp, struct dsched_ops *ops); +void dsched_cancel_bio(struct bio *bp); +void dsched_strategy_raw(struct disk *dp, struct bio *bp); +void dsched_strategy_sync(struct disk *dp, struct bio *bp); +void dsched_strategy_async(struct disk *dp, struct bio *bp, biodone_t *done, void *priv); +int dsched_debug(int level, char *fmt, ...); +dsched_new_buf_t dsched_new_buf; +dsched_new_proc_t dsched_new_proc; +dsched_new_thread_t dsched_new_thread; + +dsched_exit_proc_t dsched_exit_proc; +dsched_exit_thread_t dsched_exit_thread; + +#endif /* _KERNEL || _KERNEL_STRUCTURES */ + + +#define DSCHED_NAME_LENGTH 64 +#define DSCHED_SET_DEVICE_POLICY _IOWR('d', 1, struct dsched_ioctl) +#define DSCHED_LIST_DISKS _IOWR('d', 2, struct dsched_ioctl) +#define DSCHED_LIST_DISK _IOWR('d', 3, struct dsched_ioctl) +#define DSCHED_LIST_POLICIES _IOWR('d', 4, struct dsched_ioctl) + +struct dsched_ioctl { + uint16_t num_elem; + char dev_name[DSCHED_NAME_LENGTH]; + char pol_name[DSCHED_NAME_LENGTH]; +}; + +#endif /* _SYS_DSCHED_H_ */ diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index ea1a3508d4..5ec44ed94a 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -34,6 +34,7 @@ SUBDIR= 802_11 \ devinfo \ dev_mkdb \ dntpd \ + dschedctl \ edquota \ faithd \ fdcontrol \ diff --git a/usr.sbin/dschedctl/Makefile b/usr.sbin/dschedctl/Makefile new file mode 100644 index 0000000000..939e8d12b3 --- /dev/null +++ b/usr.sbin/dschedctl/Makefile @@ -0,0 +1,4 @@ +PROG= dschedctl +NOMAN= + +.include diff --git a/usr.sbin/dschedctl/dschedctl.c b/usr.sbin/dschedctl/dschedctl.c new file mode 100644 index 0000000000..d5e3e21bff --- /dev/null +++ b/usr.sbin/dschedctl/dschedctl.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2010 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int dev_fd; + + +static void +usage(void) +{ + fprintf(stderr, + "Usage: dschedctl \n" + "Valid commands are:\n" + " -l [-d ]\n" + "\t Lists all disks and their policies, or just for \n" + " -p\n" + "\t Lists all available I/O scheduling policies\n" + " -s -d \n" + "\t Switches the policy of the disk specified with -d to \n" + "\n" + "Valid options and its arguments are:\n" + " -d \n" + "\t Specifies the disk to be used (for -l and -s)\n" + ); + + exit(1); +} + + +static int +dsched_ioctl(unsigned long cmd, struct dsched_ioctl *pdioc) +{ + if (ioctl(dev_fd, cmd, pdioc) == -1) + err(1, "ioctl"); + + return 0; +} + + +int main(int argc, char *argv[]) +{ + struct dsched_ioctl dioc; + char *disk_name = NULL; + char *policy = NULL; + int dflag = 0, lflag = 0, pflag = 0, sflag = 0; + int ch, error; + + while ((ch = getopt(argc, argv, "d:lps:")) != -1) { + switch (ch) { + case 'd': + dflag = 1; + disk_name = optarg; + break; + case 'l': + lflag = 1; + break; + case 'p': + pflag = 1; + break; + case 's': + sflag = 1; + policy = optarg; + break; + case 'h': + case '?': + default: + usage(); + /* NOT REACHED */ + } + } + + argc -= optind; + argv += optind; + + /* + * Check arguments: + * - need to use at least one mode + * - can not use -s without -d + */ + if (!(lflag || pflag || sflag) || + (sflag && (!dflag))) { + usage(); + /* NOT REACHED */ + } + + dev_fd = open("/dev/dsched", O_RDWR); + if (dev_fd == -1) + err(1, "open(/dev/dsched)"); + + if (lflag) { + if (dflag) { + strncpy(dioc.dev_name, disk_name, DSCHED_NAME_LENGTH); + error = dsched_ioctl(DSCHED_LIST_DISK, &dioc); + if (!error) { + printf("%s\t=>\t%s\n", disk_name, dioc.pol_name); + } + } else { + dioc.num_elem = 0; + while(ioctl(dev_fd, DSCHED_LIST_DISKS, &dioc) != -1) { + ++dioc.num_elem; + printf("%s\t=>\t%s\n", dioc.dev_name, dioc.pol_name); + } + } + } + + if (pflag) { + dioc.num_elem = 0; + while(ioctl(dev_fd, DSCHED_LIST_POLICIES, &dioc) != -1) { + ++dioc.num_elem; + printf("\t>\t%s\n", dioc.pol_name); + } + } + + if (sflag) { + strncpy(dioc.dev_name, disk_name, DSCHED_NAME_LENGTH); + strncpy(dioc.pol_name, policy, DSCHED_NAME_LENGTH); + error = dsched_ioctl(DSCHED_SET_DEVICE_POLICY, &dioc); + if (!error) { + printf("Switched scheduler policy of %s successfully to %s\n", + disk_name, policy); + } + } + + close(dev_fd); + + return 0; +}