dsched - Add request polling wrapper
[dragonfly.git] / sys / kern / kern_dsched.c
CommitLineData
b80a9543
AH
1/*
2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/proc.h>
38#include <sys/sysctl.h>
39#include <sys/buf.h>
40#include <sys/conf.h>
41#include <sys/diskslice.h>
42#include <sys/disk.h>
43#include <sys/malloc.h>
b80a9543
AH
44#include <machine/md_var.h>
45#include <sys/ctype.h>
46#include <sys/syslog.h>
47#include <sys/device.h>
48#include <sys/msgport.h>
49#include <sys/msgport2.h>
50#include <sys/buf2.h>
51#include <sys/dsched.h>
52#include <sys/fcntl.h>
53#include <machine/varargs.h>
54
c7a0a046
AH
55TAILQ_HEAD(tdio_list_head, dsched_thread_io);
56
e02e815e
AH
57MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs");
58
9495e99b
AH
59static dsched_prepare_t noop_prepare;
60static dsched_teardown_t noop_teardown;
61static dsched_cancel_t noop_cancel;
62static dsched_queue_t noop_queue;
b80a9543 63
bc3c9325 64static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name);
265b0d4a
MD
65static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx);
66static void dsched_thread_io_destroy(struct dsched_thread_io *tdio);
67static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx);
bc3c9325 68
e02e815e 69static int dsched_inited = 0;
9495e99b 70static int default_set = 0;
b80a9543
AH
71
72struct lock dsched_lock;
73static int dsched_debug_enable = 0;
b80a9543 74
e02e815e
AH
75struct dsched_stats dsched_stats;
76
77struct objcache_malloc_args dsched_disk_ctx_malloc_args = {
78 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED };
79struct objcache_malloc_args dsched_thread_io_malloc_args = {
80 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED };
81struct objcache_malloc_args dsched_thread_ctx_malloc_args = {
82 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED };
83
84static struct objcache *dsched_diskctx_cache;
85static struct objcache *dsched_tdctx_cache;
86static struct objcache *dsched_tdio_cache;
87
88TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list =
89 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list);
90
91struct lock dsched_tdctx_lock;
92
b80a9543
AH
93static struct dsched_policy_head dsched_policy_list =
94 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
95
9495e99b 96static struct dsched_policy dsched_noop_policy = {
0160356d
AH
97 .name = "noop",
98
9495e99b
AH
99 .prepare = noop_prepare,
100 .teardown = noop_teardown,
101 .cancel_all = noop_cancel,
102 .bio_queue = noop_queue
b80a9543
AH
103};
104
9495e99b 105static struct dsched_policy *default_policy = &dsched_noop_policy;
b80a9543
AH
106
107/*
108 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
109 * using kvprintf
110 */
111int
112dsched_debug(int level, char *fmt, ...)
113{
114 __va_list ap;
115
116 __va_start(ap, fmt);
117 if (level <= dsched_debug_enable)
118 kvprintf(fmt, ap);
119 __va_end(ap);
120
121 return 0;
122}
123
124/*
125 * Called on disk_create()
126 * tries to read which policy to use from loader.conf, if there's
127 * none specified, the default policy is used.
128 */
129void
0160356d 130dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
b80a9543 131{
08db538f 132 char tunable_key[SPECNAMELEN + 48];
b80a9543 133 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
8fd0ee3c 134 char *ptr;
b80a9543
AH
135 struct dsched_policy *policy = NULL;
136
137 /* Also look for serno stuff? */
0160356d 138 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
b80a9543
AH
139 lockmgr(&dsched_lock, LK_EXCLUSIVE);
140
279e9fd5 141 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d",
b80a9543
AH
142 head_name, unit);
143 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
144 sizeof(sched_policy)) != 0) {
145 policy = dsched_find_policy(sched_policy);
146 }
147
279e9fd5 148 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
b80a9543 149 head_name);
8fd0ee3c
AH
150 for (ptr = tunable_key; *ptr; ptr++) {
151 if (*ptr == '/')
152 *ptr = '-';
153 }
b80a9543
AH
154 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
155 sizeof(sched_policy)) != 0)) {
156 policy = dsched_find_policy(sched_policy);
157 }
158
279e9fd5 159 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default");
9495e99b 160 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
b80a9543
AH
161 sizeof(sched_policy)) != 0)) {
162 policy = dsched_find_policy(sched_policy);
163 }
164
165 if (!policy) {
027623d4
MD
166 if (!default_set && bootverbose) {
167 dsched_debug(0,
168 "No policy for %s%d specified, "
169 "or policy not found\n",
170 head_name, unit);
9495e99b
AH
171 }
172 dsched_set_policy(dp, default_policy);
b80a9543 173 } else {
0160356d 174 dsched_set_policy(dp, policy);
b80a9543
AH
175 }
176
8fd0ee3c
AH
177 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0)
178 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name);
179 else
180 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit);
181 for (ptr = tunable_key; *ptr; ptr++) {
182 if (*ptr == '/')
183 *ptr = '-';
184 }
bc3c9325
AH
185 dsched_sysctl_add_disk(
186 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
187 tunable_key);
188
b80a9543
AH
189 lockmgr(&dsched_lock, LK_RELEASE);
190}
191
192/*
279e9fd5
AH
193 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
194 * there's any policy associated with the serial number of the device.
195 */
196void
197dsched_disk_update_callback(struct disk *dp, struct disk_info *info)
198{
199 char tunable_key[SPECNAMELEN + 48];
200 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
201 struct dsched_policy *policy = NULL;
202
203 if (info->d_serialno == NULL)
204 return;
205
206 lockmgr(&dsched_lock, LK_EXCLUSIVE);
207
208 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
209 info->d_serialno);
210
211 if((TUNABLE_STR_FETCH(tunable_key, sched_policy,
212 sizeof(sched_policy)) != 0)) {
213 policy = dsched_find_policy(sched_policy);
214 }
215
216 if (policy) {
217 dsched_switch(dp, policy);
218 }
219
bc3c9325
AH
220 dsched_sysctl_add_disk(
221 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
222 info->d_serialno);
223
279e9fd5
AH
224 lockmgr(&dsched_lock, LK_RELEASE);
225}
226
227/*
b80a9543
AH
228 * Called on disk_destroy()
229 * shuts down the scheduler core and cancels all remaining bios
230 */
231void
0160356d 232dsched_disk_destroy_callback(struct disk *dp)
b80a9543 233{
0160356d 234 struct dsched_policy *old_policy;
bc3c9325 235 struct dsched_disk_ctx *diskctx;
b80a9543
AH
236
237 lockmgr(&dsched_lock, LK_EXCLUSIVE);
238
bc3c9325
AH
239 diskctx = dsched_get_disk_priv(dp);
240
0160356d 241 old_policy = dp->d_sched_policy;
9495e99b 242 dp->d_sched_policy = &dsched_noop_policy;
e02e815e
AH
243 old_policy->cancel_all(dsched_get_disk_priv(dp));
244 old_policy->teardown(dsched_get_disk_priv(dp));
bc3c9325
AH
245
246 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED)
247 sysctl_ctx_free(&diskctx->sysctl_ctx);
248
e02e815e 249 policy_destroy(dp);
0160356d
AH
250 atomic_subtract_int(&old_policy->ref_count, 1);
251 KKASSERT(old_policy->ref_count >= 0);
b80a9543
AH
252
253 lockmgr(&dsched_lock, LK_RELEASE);
254}
255
256
257void
258dsched_queue(struct disk *dp, struct bio *bio)
259{
e02e815e
AH
260 struct dsched_thread_ctx *tdctx;
261 struct dsched_thread_io *tdio;
262 struct dsched_disk_ctx *diskctx;
b80a9543 263
e02e815e
AH
264 int found = 0, error = 0;
265
266 tdctx = dsched_get_buf_priv(bio->bio_buf);
267 if (tdctx == NULL) {
268 /* We don't handle this case, let dsched dispatch */
269 atomic_add_int(&dsched_stats.no_tdctx, 1);
270 dsched_strategy_raw(dp, bio);
271 return;
272 }
273
274 DSCHED_THREAD_CTX_LOCK(tdctx);
275
276 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list));
c7a0a046
AH
277 /*
278 * XXX:
279 * iterate in reverse to make sure we find the most up-to-date
280 * tdio for a given disk. After a switch it may take some time
281 * for everything to clean up.
282 */
283 TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) {
e02e815e
AH
284 if (tdio->dp == dp) {
285 dsched_thread_io_ref(tdio);
286 found = 1;
287 break;
b80a9543 288 }
e02e815e
AH
289 }
290
291 DSCHED_THREAD_CTX_UNLOCK(tdctx);
292 dsched_clr_buf_priv(bio->bio_buf);
293 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */
294
295 KKASSERT(found == 1);
296 diskctx = dsched_get_disk_priv(dp);
297 dsched_disk_ctx_ref(diskctx);
c7a0a046
AH
298
299 if (dp->d_sched_policy != &dsched_noop_policy)
300 KKASSERT(tdio->debug_policy == dp->d_sched_policy);
301
302 KKASSERT(tdio->debug_inited == 0xF00F1234);
303
e02e815e
AH
304 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio);
305
306 if (error) {
b80a9543
AH
307 dsched_strategy_raw(dp, bio);
308 }
e02e815e
AH
309 dsched_disk_ctx_unref(diskctx);
310 dsched_thread_io_unref(tdio);
b80a9543
AH
311}
312
313
314/*
315 * Called from each module_init or module_attach of each policy
316 * registers the policy in the local policy list.
317 */
318int
0160356d 319dsched_register(struct dsched_policy *d_policy)
b80a9543
AH
320{
321 struct dsched_policy *policy;
322 int error = 0;
323
324 lockmgr(&dsched_lock, LK_EXCLUSIVE);
325
0160356d 326 policy = dsched_find_policy(d_policy->name);
b80a9543
AH
327
328 if (!policy) {
0160356d
AH
329 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
330 atomic_add_int(&d_policy->ref_count, 1);
b80a9543
AH
331 } else {
332 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
0160356d 333 d_policy->name);
e02e815e 334 error = EEXIST;
b80a9543
AH
335 }
336
b80a9543
AH
337 lockmgr(&dsched_lock, LK_RELEASE);
338 return error;
339}
340
341/*
342 * Called from each module_detach of each policy
343 * unregisters the policy
344 */
345int
0160356d 346dsched_unregister(struct dsched_policy *d_policy)
b80a9543
AH
347{
348 struct dsched_policy *policy;
349
350 lockmgr(&dsched_lock, LK_EXCLUSIVE);
0160356d 351 policy = dsched_find_policy(d_policy->name);
b80a9543
AH
352
353 if (policy) {
e02e815e
AH
354 if (policy->ref_count > 1) {
355 lockmgr(&dsched_lock, LK_RELEASE);
356 return EBUSY;
357 }
b80a9543 358 TAILQ_REMOVE(&dsched_policy_list, policy, link);
0160356d 359 atomic_subtract_int(&policy->ref_count, 1);
e02e815e 360 KKASSERT(policy->ref_count == 0);
b80a9543
AH
361 }
362 lockmgr(&dsched_lock, LK_RELEASE);
c7a0a046 363
b80a9543
AH
364 return 0;
365}
366
367
368/*
369 * switches the policy by first removing the old one and then
370 * enabling the new one.
371 */
372int
0160356d 373dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
b80a9543 374{
0160356d 375 struct dsched_policy *old_policy;
b80a9543
AH
376
377 /* If we are asked to set the same policy, do nothing */
0160356d 378 if (dp->d_sched_policy == new_policy)
b80a9543
AH
379 return 0;
380
381 /* lock everything down, diskwise */
382 lockmgr(&dsched_lock, LK_EXCLUSIVE);
0160356d 383 old_policy = dp->d_sched_policy;
b80a9543 384
e02e815e
AH
385 atomic_subtract_int(&old_policy->ref_count, 1);
386 KKASSERT(old_policy->ref_count >= 0);
b80a9543 387
9495e99b 388 dp->d_sched_policy = &dsched_noop_policy;
e02e815e
AH
389 old_policy->teardown(dsched_get_disk_priv(dp));
390 policy_destroy(dp);
b80a9543
AH
391
392 /* Bring everything back to life */
0160356d 393 dsched_set_policy(dp, new_policy);
e02e815e 394 lockmgr(&dsched_lock, LK_RELEASE);
c7a0a046 395
b80a9543
AH
396 return 0;
397}
398
399
400/*
401 * Loads a given policy and attaches it to the specified disk.
402 * Also initializes the core for the policy
403 */
404void
0160356d 405dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
b80a9543
AH
406{
407 int locked = 0;
408
409 /* Check if it is locked already. if not, we acquire the devfs lock */
410 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
411 lockmgr(&dsched_lock, LK_EXCLUSIVE);
412 locked = 1;
413 }
414
c7a0a046
AH
415 DSCHED_GLOBAL_THREAD_CTX_LOCK();
416
e02e815e
AH
417 policy_new(dp, new_policy);
418 new_policy->prepare(dsched_get_disk_priv(dp));
0160356d 419 dp->d_sched_policy = new_policy;
c7a0a046
AH
420
421 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
422
0160356d 423 atomic_add_int(&new_policy->ref_count, 1);
b80a9543 424 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
0160356d 425 new_policy->name);
b80a9543
AH
426
427 /* If we acquired the lock, we also get rid of it */
428 if (locked)
429 lockmgr(&dsched_lock, LK_RELEASE);
430}
431
432struct dsched_policy*
433dsched_find_policy(char *search)
434{
435 struct dsched_policy *policy;
436 struct dsched_policy *policy_found = NULL;
437 int locked = 0;
438
439 /* Check if it is locked already. if not, we acquire the devfs lock */
440 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
441 lockmgr(&dsched_lock, LK_EXCLUSIVE);
442 locked = 1;
443 }
444
445 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
0160356d 446 if (!strcmp(policy->name, search)) {
b80a9543
AH
447 policy_found = policy;
448 break;
449 }
450 }
451
452 /* If we acquired the lock, we also get rid of it */
453 if (locked)
454 lockmgr(&dsched_lock, LK_RELEASE);
455
456 return policy_found;
457}
458
459struct disk*
460dsched_find_disk(char *search)
461{
462 struct disk *dp_found = NULL;
463 struct disk *dp = NULL;
464
465 while((dp = disk_enumerate(dp))) {
466 if (!strcmp(dp->d_cdev->si_name, search)) {
467 dp_found = dp;
468 break;
469 }
470 }
471
472 return dp_found;
473}
474
475struct disk*
0160356d 476dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy)
b80a9543
AH
477{
478 while ((dp = disk_enumerate(dp))) {
0160356d 479 if (dp->d_sched_policy == policy)
b80a9543
AH
480 return dp;
481 }
482
483 return NULL;
484}
485
486struct dsched_policy *
487dsched_policy_enumerate(struct dsched_policy *pol)
488{
489 if (!pol)
490 return (TAILQ_FIRST(&dsched_policy_list));
491 else
492 return (TAILQ_NEXT(pol, link));
493}
494
495void
496dsched_cancel_bio(struct bio *bp)
497{
498 bp->bio_buf->b_error = ENXIO;
499 bp->bio_buf->b_flags |= B_ERROR;
500 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
501
502 biodone(bp);
503}
504
505void
506dsched_strategy_raw(struct disk *dp, struct bio *bp)
507{
508 /*
509 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
510 * to avoid panics
511 */
512 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
513 if(bp->bio_track != NULL) {
514 dsched_debug(LOG_INFO,
515 "dsched_strategy_raw sees non-NULL bio_track!! "
8dad49a8 516 "bio: %p\n", bp);
b80a9543
AH
517 bp->bio_track = NULL;
518 }
519 dev_dstrategy(dp->d_rawdev, bp);
520}
521
522void
523dsched_strategy_sync(struct disk *dp, struct bio *bio)
524{
525 struct buf *bp, *nbp;
526 struct bio *nbio;
527
528 bp = bio->bio_buf;
529
530 nbp = getpbuf(NULL);
531 nbio = &nbp->b_bio1;
532
533 nbp->b_cmd = bp->b_cmd;
534 nbp->b_bufsize = bp->b_bufsize;
535 nbp->b_runningbufspace = bp->b_runningbufspace;
536 nbp->b_bcount = bp->b_bcount;
537 nbp->b_resid = bp->b_resid;
538 nbp->b_data = bp->b_data;
9a82e536
MD
539#if 0
540 /*
541 * Buffers undergoing device I/O do not need a kvabase/size.
542 */
b80a9543
AH
543 nbp->b_kvabase = bp->b_kvabase;
544 nbp->b_kvasize = bp->b_kvasize;
9a82e536 545#endif
b80a9543
AH
546 nbp->b_dirtyend = bp->b_dirtyend;
547
548 nbio->bio_done = biodone_sync;
549 nbio->bio_flags |= BIO_SYNC;
550 nbio->bio_track = NULL;
551
552 nbio->bio_caller_info1.ptr = dp;
553 nbio->bio_offset = bio->bio_offset;
554
555 dev_dstrategy(dp->d_rawdev, nbio);
556 biowait(nbio, "dschedsync");
557 bp->b_resid = nbp->b_resid;
558 bp->b_error = nbp->b_error;
559 biodone(bio);
9a82e536
MD
560#if 0
561 nbp->b_kvabase = NULL;
562 nbp->b_kvasize = 0;
563#endif
e02e815e 564 relpbuf(nbp, NULL);
b80a9543
AH
565}
566
567void
568dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
569{
570 struct bio *nbio;
571
572 nbio = push_bio(bio);
573 nbio->bio_done = done;
574 nbio->bio_offset = bio->bio_offset;
575
576 dsched_set_bio_dp(nbio, dp);
577 dsched_set_bio_priv(nbio, priv);
578
579 getmicrotime(&nbio->bio_caller_info3.tv);
580 dev_dstrategy(dp->d_rawdev, nbio);
581}
582
265b0d4a 583/*
09f2bfe9
BP
584 * A special bio done call back function
585 * used by policy having request polling implemented.
586 */
587static void
588request_polling_biodone(struct bio *bp)
589{
590 struct dsched_disk_ctx *diskctx = NULL;
591 struct disk *dp = NULL;
592 struct bio *obio;
593 struct dsched_policy *policy;
594
595 dp = dsched_get_bio_dp(bp);
596 policy = dp->d_sched_policy;
597 diskctx = dsched_get_disk_priv(dp);
598 KKASSERT(diskctx && policy);
599 dsched_disk_ctx_ref(diskctx);
600
601 /*
602 * XXX:
603 * the bio_done function should not be blocked !
604 */
605 if (diskctx->dp->d_sched_policy->bio_done)
606 diskctx->dp->d_sched_policy->bio_done(bp);
607
608 obio = pop_bio(bp);
609 biodone(obio);
610
611 atomic_subtract_int(&diskctx->current_tag_queue_depth, 1);
612
613 /* call the polling function,
614 * XXX:
615 * the polling function should not be blocked!
616 */
617 if (policy->polling_func)
618 policy->polling_func(diskctx);
619 else
620 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n");
621 dsched_disk_ctx_unref(diskctx);
622}
623
624/*
625 * A special dsched strategy used by policy having request polling
626 * (polling function) implemented.
627 *
628 * The strategy is the just like dsched_strategy_async(), but
629 * the biodone call back is set to a preset one.
630 *
631 * If the policy needs its own biodone callback, it should
632 * register it in the policy structure. (bio_done field)
633 *
634 * The current_tag_queue_depth is maintained by this function
635 * and the request_polling_biodone() function
636 */
637
638void
639dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx)
640{
641 atomic_add_int(&diskctx->current_tag_queue_depth, 1);
642 dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio));
643}
644
645/*
265b0d4a
MD
646 * Ref and deref various structures. The 1->0 transition of the reference
647 * count actually transitions 1->0x80000000 and causes the object to be
648 * destroyed. It is possible for transitory references to occur on the
649 * object while it is being destroyed. We use bit 31 to indicate that
650 * destruction is in progress and to prevent nested destructions.
651 */
b80a9543 652void
e02e815e
AH
653dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx)
654{
655 int refcount;
656
657 refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
e02e815e
AH
658}
659
660void
661dsched_thread_io_ref(struct dsched_thread_io *tdio)
662{
663 int refcount;
664
665 refcount = atomic_fetchadd_int(&tdio->refcount, 1);
e02e815e
AH
666}
667
668void
669dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx)
670{
671 int refcount;
672
673 refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
e02e815e
AH
674}
675
676void
677dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx)
678{
265b0d4a
MD
679 int refs;
680 int nrefs;
e02e815e 681
265b0d4a
MD
682 /*
683 * Handle 1->0 transitions for diskctx and nested destruction
684 * recursions. If the refs are already in destruction mode (bit 31
685 * set) on the 1->0 transition we don't try to destruct it again.
686 *
687 * 0x80000001->0x80000000 transitions are handled normally and
688 * thus avoid nested dstruction.
689 */
690 for (;;) {
691 refs = diskctx->refcount;
692 cpu_ccfence();
693 nrefs = refs - 1;
694
695 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
696 if (nrefs) {
697 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs))
698 break;
699 continue;
700 }
701 nrefs = 0x80000000;
702 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) {
703 dsched_disk_ctx_destroy(diskctx);
704 break;
705 }
706 }
707}
e02e815e 708
265b0d4a
MD
709static
710void
711dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx)
712{
713 struct dsched_thread_io *tdio;
e02e815e 714
e02e815e 715#if 0
265b0d4a
MD
716 kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
717 print_backtrace(4);
e02e815e 718#endif
265b0d4a
MD
719 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
720 while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) {
721 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX);
722 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
723 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
724 tdio->diskctx = NULL;
725 /* XXX tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);*/
726 dsched_thread_io_unref(tdio);
e02e815e 727 }
265b0d4a
MD
728 lockmgr(&diskctx->lock, LK_RELEASE);
729 if (diskctx->dp->d_sched_policy->destroy_diskctx)
730 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx);
731 KKASSERT(diskctx->refcount == 0x80000000);
732 objcache_put(dsched_diskctx_cache, diskctx);
733 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1);
e02e815e
AH
734}
735
736void
737dsched_thread_io_unref(struct dsched_thread_io *tdio)
738{
265b0d4a
MD
739 int refs;
740 int nrefs;
e02e815e 741
265b0d4a
MD
742 /*
743 * Handle 1->0 transitions for tdio and nested destruction
744 * recursions. If the refs are already in destruction mode (bit 31
745 * set) on the 1->0 transition we don't try to destruct it again.
746 *
747 * 0x80000001->0x80000000 transitions are handled normally and
748 * thus avoid nested dstruction.
749 */
750 for (;;) {
751 refs = tdio->refcount;
752 cpu_ccfence();
753 nrefs = refs - 1;
754
755 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
756 if (nrefs) {
757 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
758 break;
759 continue;
760 }
761 nrefs = 0x80000000;
762 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) {
763 dsched_thread_io_destroy(tdio);
764 break;
765 }
766 }
767}
e02e815e 768
265b0d4a
MD
769static void
770dsched_thread_io_destroy(struct dsched_thread_io *tdio)
771{
772 struct dsched_thread_ctx *tdctx;
773 struct dsched_disk_ctx *diskctx;
e02e815e 774
e02e815e 775#if 0
265b0d4a
MD
776 kprintf("tdio (%p) destruction started, trace:\n", tdio);
777 print_backtrace(8);
e02e815e 778#endif
265b0d4a 779 KKASSERT(tdio->qlength == 0);
e02e815e 780
265b0d4a
MD
781 while ((diskctx = tdio->diskctx) != NULL) {
782 dsched_disk_ctx_ref(diskctx);
783 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
784 if (diskctx != tdio->diskctx) {
e02e815e 785 lockmgr(&diskctx->lock, LK_RELEASE);
265b0d4a
MD
786 dsched_disk_ctx_unref(diskctx);
787 continue;
e02e815e 788 }
265b0d4a
MD
789 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX);
790 if (diskctx->dp->d_sched_policy->destroy_tdio)
791 diskctx->dp->d_sched_policy->destroy_tdio(tdio);
792 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
793 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
794 tdio->diskctx = NULL;
795 lockmgr(&diskctx->lock, LK_RELEASE);
796 dsched_disk_ctx_unref(diskctx);
797 }
798 while ((tdctx = tdio->tdctx) != NULL) {
799 dsched_thread_ctx_ref(tdctx);
800 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
801 if (tdctx != tdio->tdctx) {
e02e815e 802 lockmgr(&tdctx->lock, LK_RELEASE);
265b0d4a
MD
803 dsched_thread_ctx_unref(tdctx);
804 continue;
e02e815e 805 }
265b0d4a
MD
806 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX);
807 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
808 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
809 tdio->tdctx = NULL;
810 lockmgr(&tdctx->lock, LK_RELEASE);
811 dsched_thread_ctx_unref(tdctx);
812 }
813 KKASSERT(tdio->refcount == 0x80000000);
814 objcache_put(dsched_tdio_cache, tdio);
815 atomic_subtract_int(&dsched_stats.tdio_allocations, 1);
e02e815e 816#if 0
265b0d4a 817 dsched_disk_ctx_unref(diskctx);
e02e815e 818#endif
e02e815e
AH
819}
820
821void
822dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx)
823{
265b0d4a
MD
824 int refs;
825 int nrefs;
e02e815e 826
265b0d4a
MD
827 /*
828 * Handle 1->0 transitions for tdctx and nested destruction
829 * recursions. If the refs are already in destruction mode (bit 31
830 * set) on the 1->0 transition we don't try to destruct it again.
831 *
832 * 0x80000001->0x80000000 transitions are handled normally and
833 * thus avoid nested dstruction.
834 */
835 for (;;) {
836 refs = tdctx->refcount;
837 cpu_ccfence();
838 nrefs = refs - 1;
839
840 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
841 if (nrefs) {
842 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs))
843 break;
844 continue;
845 }
846 nrefs = 0x80000000;
847 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) {
848 dsched_thread_ctx_destroy(tdctx);
849 break;
850 }
851 }
852}
e02e815e 853
265b0d4a
MD
854static void
855dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx)
856{
857 struct dsched_thread_io *tdio;
e02e815e 858
e02e815e 859#if 0
265b0d4a
MD
860 kprintf("tdctx (%p) destruction started, trace:\n", tdctx);
861 print_backtrace(8);
e02e815e 862#endif
265b0d4a
MD
863 DSCHED_GLOBAL_THREAD_CTX_LOCK();
864
c7a0a046
AH
865 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
866
265b0d4a
MD
867 while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) {
868 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX);
869 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
870 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
871 tdio->tdctx = NULL;
872 dsched_thread_io_unref(tdio);
873 }
874 KKASSERT(tdctx->refcount == 0x80000000);
875 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link);
e02e815e 876
c7a0a046
AH
877 lockmgr(&tdctx->lock, LK_RELEASE);
878
265b0d4a 879 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
e02e815e 880
265b0d4a
MD
881 objcache_put(dsched_tdctx_cache, tdctx);
882 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1);
e02e815e
AH
883}
884
e02e815e
AH
885struct dsched_thread_io *
886dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx,
887 struct dsched_policy *pol)
888{
889 struct dsched_thread_io *tdio;
890#if 0
891 dsched_disk_ctx_ref(dsched_get_disk_priv(dp));
892#endif
893 tdio = objcache_get(dsched_tdio_cache, M_WAITOK);
894 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ);
895
896 /* XXX: maybe we do need another ref for the disk list for tdio */
897 dsched_thread_io_ref(tdio);
898
899 DSCHED_THREAD_IO_LOCKINIT(tdio);
900 tdio->dp = dp;
901
902 tdio->diskctx = dsched_get_disk_priv(dp);
903 TAILQ_INIT(&tdio->queue);
904
905 if (pol->new_tdio)
906 pol->new_tdio(tdio);
907
076dc4bb 908 lockmgr(&tdio->diskctx->lock, LK_EXCLUSIVE);
e02e815e 909 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink);
265b0d4a 910 atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
076dc4bb 911 lockmgr(&tdio->diskctx->lock, LK_RELEASE);
e02e815e
AH
912
913 if (tdctx) {
914 tdio->tdctx = tdctx;
915 tdio->p = tdctx->p;
916
917 /* Put the tdio in the tdctx list */
918 DSCHED_THREAD_CTX_LOCK(tdctx);
919 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link);
920 DSCHED_THREAD_CTX_UNLOCK(tdctx);
265b0d4a 921 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
e02e815e
AH
922 }
923
c7a0a046
AH
924 tdio->debug_policy = pol;
925 tdio->debug_inited = 0xF00F1234;
926
e02e815e
AH
927 atomic_add_int(&dsched_stats.tdio_allocations, 1);
928 return tdio;
929}
930
931
932struct dsched_disk_ctx *
933dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol)
934{
935 struct dsched_disk_ctx *diskctx;
936
937 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK);
938 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ);
939 dsched_disk_ctx_ref(diskctx);
940 diskctx->dp = dp;
941 DSCHED_DISK_CTX_LOCKINIT(diskctx);
942 TAILQ_INIT(&diskctx->tdio_list);
09f2bfe9
BP
943 /*
944 * XXX: magic number 32: most device has a tag queue
945 * of depth 32.
946 * Better to retrive more precise value from the driver
947 */
948 diskctx->max_tag_queue_depth = 32;
949 diskctx->current_tag_queue_depth = 0;
e02e815e
AH
950
951 atomic_add_int(&dsched_stats.diskctx_allocations, 1);
952 if (pol->new_diskctx)
953 pol->new_diskctx(diskctx);
954 return diskctx;
955}
956
957
958struct dsched_thread_ctx *
959dsched_thread_ctx_alloc(struct proc *p)
960{
961 struct dsched_thread_ctx *tdctx;
962 struct dsched_thread_io *tdio;
963 struct disk *dp = NULL;
964
965 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK);
966 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ);
967 dsched_thread_ctx_ref(tdctx);
968#if 0
969 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx);
970#endif
971 DSCHED_THREAD_CTX_LOCKINIT(tdctx);
972 TAILQ_INIT(&tdctx->tdio_list);
973 tdctx->p = p;
974
0b81692c 975 DSCHED_GLOBAL_THREAD_CTX_LOCK();
e02e815e
AH
976 while ((dp = disk_enumerate(dp))) {
977 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy);
978 }
979
e02e815e
AH
980 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link);
981 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
982
983 atomic_add_int(&dsched_stats.tdctx_allocations, 1);
984 /* XXX: no callback here */
985 return tdctx;
986}
987
988void
989policy_new(struct disk *dp, struct dsched_policy *pol) {
990 struct dsched_thread_ctx *tdctx;
991 struct dsched_disk_ctx *diskctx;
992 struct dsched_thread_io *tdio;
993
994 diskctx = dsched_disk_ctx_alloc(dp, pol);
995 dsched_disk_ctx_ref(diskctx);
996 dsched_set_disk_priv(dp, diskctx);
997
e02e815e
AH
998 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) {
999 tdio = dsched_thread_io_alloc(dp, tdctx, pol);
1000 }
e02e815e
AH
1001}
1002
1003void
1004policy_destroy(struct disk *dp) {
1005 struct dsched_disk_ctx *diskctx;
1006
1007 diskctx = dsched_get_disk_priv(dp);
1008 KKASSERT(diskctx != NULL);
1009
1010 dsched_disk_ctx_unref(diskctx); /* from prepare */
1011 dsched_disk_ctx_unref(diskctx); /* from alloc */
1012
1013 dsched_set_disk_priv(dp, NULL);
1014}
1015
1016void
b80a9543
AH
1017dsched_new_buf(struct buf *bp)
1018{
e02e815e
AH
1019 struct dsched_thread_ctx *tdctx = NULL;
1020
1021 if (dsched_inited == 0)
1022 return;
1023
1024 if (curproc != NULL) {
1025 tdctx = dsched_get_proc_priv(curproc);
1026 } else {
1027 /* This is a kernel thread, so no proc info is available */
1028 tdctx = dsched_get_thread_priv(curthread);
1029 }
1030
1031#if 0
1032 /*
1033 * XXX: hack. we don't want this assert because we aren't catching all
1034 * threads. mi_startup() is still getting away without an tdctx.
1035 */
1036
1037 /* by now we should have an tdctx. if not, something bad is going on */
1038 KKASSERT(tdctx != NULL);
1039#endif
1040
1041 if (tdctx) {
1042 dsched_thread_ctx_ref(tdctx);
1043 }
1044 dsched_set_buf_priv(bp, tdctx);
b80a9543
AH
1045}
1046
aa166ad1
AH
1047void
1048dsched_exit_buf(struct buf *bp)
1049{
e02e815e
AH
1050 struct dsched_thread_ctx *tdctx;
1051
1052 tdctx = dsched_get_buf_priv(bp);
1053 if (tdctx != NULL) {
1054 dsched_clr_buf_priv(bp);
1055 dsched_thread_ctx_unref(tdctx);
1056 }
aa166ad1 1057}
b80a9543
AH
1058
1059void
1060dsched_new_proc(struct proc *p)
1061{
e02e815e
AH
1062 struct dsched_thread_ctx *tdctx;
1063
1064 if (dsched_inited == 0)
1065 return;
1066
1067 KKASSERT(p != NULL);
1068
1069 tdctx = dsched_thread_ctx_alloc(p);
1070 tdctx->p = p;
1071 dsched_thread_ctx_ref(tdctx);
1072
1073 dsched_set_proc_priv(p, tdctx);
1074 atomic_add_int(&dsched_stats.nprocs, 1);
b80a9543
AH
1075}
1076
1077
1078void
1079dsched_new_thread(struct thread *td)
1080{
e02e815e
AH
1081 struct dsched_thread_ctx *tdctx;
1082
1083 if (dsched_inited == 0)
1084 return;
1085
1086 KKASSERT(td != NULL);
1087
1088 tdctx = dsched_thread_ctx_alloc(NULL);
1089 tdctx->td = td;
1090 dsched_thread_ctx_ref(tdctx);
1091
1092 dsched_set_thread_priv(td, tdctx);
1093 atomic_add_int(&dsched_stats.nthreads, 1);
b80a9543
AH
1094}
1095
1096void
1097dsched_exit_proc(struct proc *p)
1098{
e02e815e
AH
1099 struct dsched_thread_ctx *tdctx;
1100
1101 if (dsched_inited == 0)
1102 return;
1103
1104 KKASSERT(p != NULL);
1105
1106 tdctx = dsched_get_proc_priv(p);
1107 KKASSERT(tdctx != NULL);
1108
1109 tdctx->dead = 0xDEAD;
b5d7061d 1110 dsched_set_proc_priv(p, NULL);
e02e815e
AH
1111
1112 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
1113 dsched_thread_ctx_unref(tdctx); /* one for ref */
1114 atomic_subtract_int(&dsched_stats.nprocs, 1);
b80a9543
AH
1115}
1116
1117
1118void
1119dsched_exit_thread(struct thread *td)
1120{
e02e815e
AH
1121 struct dsched_thread_ctx *tdctx;
1122
1123 if (dsched_inited == 0)
1124 return;
1125
1126 KKASSERT(td != NULL);
1127
1128 tdctx = dsched_get_thread_priv(td);
1129 KKASSERT(tdctx != NULL);
1130
1131 tdctx->dead = 0xDEAD;
1132 dsched_set_thread_priv(td, 0);
1133
1134 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
1135 dsched_thread_ctx_unref(tdctx); /* one for ref */
1136 atomic_subtract_int(&dsched_stats.nthreads, 1);
b80a9543
AH
1137}
1138
89dabacd
AH
1139struct dsched_thread_io *
1140dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx,
1141 struct dsched_policy *pol) {
1142 struct dsched_thread_ctx *tdctx;
1143 struct dsched_thread_io *tdio;
1144
0b81692c
MD
1145 DSCHED_GLOBAL_THREAD_CTX_LOCK();
1146
89dabacd
AH
1147 tdctx = dsched_get_thread_priv(curthread);
1148 KKASSERT(tdctx != NULL);
89dabacd 1149 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol);
0b81692c
MD
1150
1151 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
1152
89dabacd
AH
1153 return tdio;
1154}
1155
e02e815e
AH
1156/* DEFAULT NOOP POLICY */
1157
0160356d 1158static int
9495e99b 1159noop_prepare(struct dsched_disk_ctx *diskctx)
b80a9543
AH
1160{
1161 return 0;
1162}
1163
0160356d 1164static void
9495e99b 1165noop_teardown(struct dsched_disk_ctx *diskctx)
b80a9543
AH
1166{
1167
1168}
1169
0160356d 1170static void
9495e99b 1171noop_cancel(struct dsched_disk_ctx *diskctx)
b80a9543
AH
1172{
1173
1174}
1175
0160356d 1176static int
9495e99b 1177noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
e02e815e 1178 struct bio *bio)
b80a9543 1179{
e02e815e 1180 dsched_strategy_raw(diskctx->dp, bio);
b80a9543 1181#if 0
9495e99b 1182 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL);
b80a9543
AH
1183#endif
1184 return 0;
1185}
1186
b80a9543
AH
1187/*
1188 * SYSINIT stuff
1189 */
b80a9543
AH
1190static void
1191dsched_init(void)
1192{
e02e815e
AH
1193 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0,
1194 NULL, NULL, NULL,
1195 objcache_malloc_alloc,
1196 objcache_malloc_free,
1197 &dsched_thread_io_malloc_args );
1198
1199 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0,
1200 NULL, NULL, NULL,
1201 objcache_malloc_alloc,
1202 objcache_malloc_free,
1203 &dsched_thread_ctx_malloc_args );
1204
1205 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0,
1206 NULL, NULL, NULL,
1207 objcache_malloc_alloc,
1208 objcache_malloc_free,
1209 &dsched_disk_ctx_malloc_args );
1210
1211 bzero(&dsched_stats, sizeof(struct dsched_stats));
1212
279e9fd5 1213 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE);
e02e815e
AH
1214 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
1215
9495e99b 1216 dsched_register(&dsched_noop_policy);
e02e815e
AH
1217
1218 dsched_inited = 1;
b80a9543
AH
1219}
1220
1221static void
1222dsched_uninit(void)
1223{
1224}
1225
e02e815e
AH
1226SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL);
1227SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL);
b80a9543
AH
1228
1229/*
1230 * SYSCTL stuff
1231 */
e02e815e 1232static int
279e9fd5 1233sysctl_dsched_stats(SYSCTL_HANDLER_ARGS)
e02e815e
AH
1234{
1235 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req));
1236}
1237
279e9fd5
AH
1238static int
1239sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS)
1240{
1241 struct dsched_policy *pol = NULL;
1242 int error, first = 1;
1243
1244 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1245
1246 while ((pol = dsched_policy_enumerate(pol))) {
1247 if (!first) {
1248 error = SYSCTL_OUT(req, " ", 1);
1249 if (error)
1250 break;
1251 } else {
1252 first = 0;
1253 }
1254 error = SYSCTL_OUT(req, pol->name, strlen(pol->name));
1255 if (error)
1256 break;
1257
1258 }
1259
1260 lockmgr(&dsched_lock, LK_RELEASE);
1261
1262 error = SYSCTL_OUT(req, "", 1);
9495e99b 1263
279e9fd5
AH
1264 return error;
1265}
1266
bc3c9325
AH
1267static int
1268sysctl_dsched_policy(SYSCTL_HANDLER_ARGS)
1269{
1270 char buf[DSCHED_POLICY_NAME_LENGTH];
1271 struct dsched_disk_ctx *diskctx = arg1;
1272 struct dsched_policy *pol = NULL;
1273 int error;
1274
1275 if (diskctx == NULL) {
1276 return 0;
1277 }
1278
1279 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1280
1281 pol = diskctx->dp->d_sched_policy;
1282 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1283
1284 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1285 if (error || req->newptr == NULL) {
1286 lockmgr(&dsched_lock, LK_RELEASE);
1287 return (error);
1288 }
1289
1290 pol = dsched_find_policy(buf);
1291 if (pol == NULL) {
1292 lockmgr(&dsched_lock, LK_RELEASE);
1293 return 0;
1294 }
1295
1296 dsched_switch(diskctx->dp, pol);
1297
1298 lockmgr(&dsched_lock, LK_RELEASE);
1299
1300 return error;
1301}
1302
9495e99b
AH
1303static int
1304sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS)
1305{
1306 char buf[DSCHED_POLICY_NAME_LENGTH];
1307 struct dsched_policy *pol = NULL;
1308 int error;
1309
1310 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1311
1312 pol = default_policy;
1313 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1314
1315 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1316 if (error || req->newptr == NULL) {
1317 lockmgr(&dsched_lock, LK_RELEASE);
1318 return (error);
1319 }
1320
1321 pol = dsched_find_policy(buf);
1322 if (pol == NULL) {
1323 lockmgr(&dsched_lock, LK_RELEASE);
1324 return 0;
1325 }
1326
1327 default_set = 1;
1328 default_policy = pol;
1329
1330 lockmgr(&dsched_lock, LK_RELEASE);
1331
1332 return error;
1333}
1334
e02e815e
AH
1335SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL,
1336 "Disk Scheduler Framework (dsched) magic");
bc3c9325
AH
1337SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL,
1338 "List of disks and their policies");
e02e815e
AH
1339SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable,
1340 0, "Enable dsched debugging");
1341SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
279e9fd5 1342 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats",
e02e815e 1343 "dsched statistics");
279e9fd5
AH
1344SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD,
1345 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies");
9495e99b
AH
1346SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW,
1347 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy");
279e9fd5 1348
bc3c9325
AH
1349static void
1350dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name)
1351{
1352 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) {
1353 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED;
1354 sysctl_ctx_init(&diskctx->sysctl_ctx);
1355 }
1356
1357 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy),
1358 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW,
1359 diskctx, 0, sysctl_dsched_policy, "A", "policy");
1360}