kernel/dsched: Add a version parameter to the DSCHED_POLICY_MODULE macro.
[dragonfly.git] / share / man / man9 / dsched.9
CommitLineData
89295e71
SW
1.\"
2.\" Copyright (c) 2010
3.\" The DragonFly Project. All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\"
9.\" 1. Redistributions of source code must retain the above copyright
10.\" notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\" notice, this list of conditions and the following disclaimer in
13.\" the documentation and/or other materials provided with the
14.\" distribution.
15.\" 3. Neither the name of The DragonFly Project nor the names of its
16.\" contributors may be used to endorse or promote products derived
17.\" from this software without specific, prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23.\" COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24.\" INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30.\" SUCH DAMAGE.
31.\"
1f509c0d 32.Dd August 7, 2012
89295e71 33.Dt DSCHED 9
fb5b3747 34.Os
89295e71
SW
35.Sh NAME
36.Nm dsched ,
37.Nm dsched_cancel_bio ,
38.Nm dsched_debug ,
39.Nm dsched_disk_ctx_ref ,
40.Nm dsched_disk_ctx_unref ,
41.Nm dsched_new_policy_thread_tdio ,
42.Nm dsched_register ,
43.Nm dsched_strategy_async ,
44.Nm dsched_strategy_raw ,
45.Nm dsched_thread_io_ref ,
46.Nm dsched_thread_io_unref ,
47.Nm dsched_unregister ,
48.Nm DSCHED_POLICY_MODULE ,
49.Nm DSCHED_DISK_CTX_LOCK ,
50.Nm DSCHED_DISK_CTX_UNLOCK ,
51.Nm DSCHED_THREAD_IO_LOCK ,
52.Nm DSCHED_THREAD_IO_UNLOCK ,
53.Nm dsched_get_bio_dp ,
54.Nm dsched_get_bio_priv ,
55.Nm dsched_get_disk_priv
56.Nd kernel disk scheduler framework
57.Sh SYNOPSIS
58.In sys/dsched.h
59.Pp
60Functions:
61.Ft void
62.Fn dsched_cancel_bio "struct bio *bp"
63.Ft int
64.Fn dsched_debug "int level" "char *fmt" "..."
65.Ft void
66.Fn dsched_disk_ctx_ref "struct dsched_disk_ctx *diskctx"
67.Ft void
68.Fn dsched_disk_ctx_unref "struct dsched_disk_ctx *diskctx"
69.Ft struct dsched_thread_io *
70.Fn dsched_new_policy_thread_tdio "struct dsched_disk_ctx *diskctx" "struct dsched_policy *pol"
71.Ft int
72.Fn dsched_register "struct dsched_policy *d_policy"
73.Ft void
74.Fn dsched_strategy_async "struct disk *dp" "struct bio *bp" "biodone_t *done" "void *priv"
75.Ft void
76.Fn dsched_strategy_raw "struct disk *dp" "struct bio *bp"
77.Ft void
78.Fn dsched_thread_io_ref "struct dsched_thread_io *tdio"
79.Ft void
80.Fn dsched_thread_io_unref "struct dsched_thread_io *tdio"
81.Ft int
82.Fn dsched_unregister "struct dsched_policy *d_policy"
83.Pp
84Macros:
1f509c0d 85.Fn DSCHED_POLICY_MODULE "name" "modeventhand_t evh" "version"
89295e71
SW
86.Fn DSCHED_DISK_CTX_LOCK "struct dsched_disk_ctx *diskctx"
87.Fn DSCHED_DISK_CTX_UNLOCK "struct dsched_disk_ctx *diskctx"
88.Fn DSCHED_THREAD_IO_LOCK "struct dsched_thread_io *tdio"
89.Fn DSCHED_THREAD_IO_UNLOCK "struct dsched_thread_io *tdio"
90.Fn dsched_get_bio_dp "struct bio *bio"
91.Fn dsched_get_bio_priv "struct bio *bio"
92.Fn dsched_get_disk_priv "struct disk *dp" "void *priv"
93.Pp
94Callbacks:
95.Ft typedef int
96.Fn dsched_prepare_t "struct dsched_disk_ctx *diskctx"
97.Ft typedef void
98.Fn dsched_teardown_t "struct dsched_disk_ctx *diskctx"
99.Ft typedef void
100.Fn dsched_cancel_t "struct dsched_disk_ctx *diskctx"
101.Ft typedef int
102.Fn dsched_queue_t "struct dsched_disk_ctx *diskctx" "struct dsched_thread_io *tdio" "struct bio *bio"
103.Ft typedef void
104.Fn dsched_new_tdio_t "struct dsched_thread_io *tdio"
105.Ft typedef void
106.Fn dsched_destroy_tdio_t "struct dsched_thread_io *tdio"
107.Ft typedef void
108.Fn dsched_new_diskctx_t "struct dsched_disk_ctx *diskctx"
109.Ft typedef void
110.Fn dsched_destroy_diskctx_t "struct dsched_disk_ctx *diskctx"
111.Sh DESCRIPTION
112To create a new dsched policy
113.Sq foo
114the following is required:
115.Bd -literal
1f509c0d 116DSCHED_POLICY_MODULE(dsched_foo, foo_mod_handler, 1);
89295e71
SW
117
118struct dsched_policy dsched_foo_policy = {
119 .name = "foo",
120
121 .prepare = foo_prepare,
122 .teardown = foo_teardown,
123 .cancel_all = foo_cancel,
124 .bio_queue = foo_queue,
125
126 /* The following are optional */
127 .new_tdio = foo_tdio_ctor,
128 .new_diskctx = foo_diskctx_ctor,
129 .destroy_tdio = foo_tdio_dtor,
130 .destroy_diskctx = foo_diskctx_dtor
131};
132.Ed
133.Pp
134The
135.Fa name
136is the unique identifier of the dsched policy and the name the user
137specifies to set this
138.Nm
139policy.
140.Pp
141The
142.Fa prepare
143callback is called whenever the new
144.Nm
145policy is set for a new disk.
146This can be used to create per disk threads for the
147.Nm
148policy instance.
149Note that any thread created during
150.Fa prepare
151will not have a
152.Ft dsched_thread_ctx
153or
154.Ft dsched_thread_io
155associated with it.
156If this is required because the thread will do I/O, the thread itself
157needs to call
158.Fn dsched_new_policy_thread_tdio .
159.Pp
160The
161.Fa teardown
162callback is called whenever a
163.Nm
164policy is unset/detached from a disk or when a disk is disconnected.
165It should clean up all per-disk resources such as any thread created in
166.Fa prepare .
167The
168.Nm
169framework guarantees that no more calls to any other method such as
170.Fa bio_queue
171will occur once
172.Fa teardown
173has been called.
174.Pp
175The
176.Fa cancel_all
177callback is called immediately before
178.Fa teardown .
179It is required to cancel all
180.Vt bio Ns s
181currently queued or stalled in the
182.Nm
183policy instance for the given disk.
184The
185.Nm
186framework guarantees that no more calls to any other method such as
187.Fa bio_queue
188will occur once
189.Fa cancel_all
190has been called.
191.Pp
192The
193.Fa bio_queue
194callback is called for every
195.Vt bio
196intended for the disk(s) with the given
197.Nm
198policy.
199It needs to either dispatch it, queue it in any other form for later
200dispatch, or return a non-zero return value, in which case the
201.Nm
202framework will dispatch that
203.Vt bio
204directly.
205If the function took care of the
206.Vt bio
207and does not want dsched to dispatch it, 0 must be returned.
208.Pp
209The
210.Fa new_tdio
211callback is called for every
212.Vt dsched_thread_io
213created for a disk with this
214.Nm
215policy.
216Similarly, the
217.Fa destroy_tdio
218callback is called on destruction (release of all references) of the
219.Vt dsched_thread_io .
220These functions don't have to be specified; if they are left out or
221set to
222.Dv NULL
223they simply won't be called.
224.Pp
225The
226.Fa new_diskctx
227callback is called for every
228.Vt dsched_disk_ctx
229created for a disk with this
230.Nm
231policy.
232Similarly, the
233.Fa destroy_diskctx
234callback is called on destruction (release of all references) of the
235.Vt dsched_disk_ctx .
236These functions don't have to be specified; if they are left out or
237set to
238.Dv NULL ,
239they simply won't be called.
240.Pp
241For convenience, the structs
242.Vt dsched_thread_io
243and
244.Vt dsched_disk_ctx
245are allocated with plenty of spare space, so that each policy can extend
246these, for example as follows:
247.Bd -literal
248struct foo_thread_io {
249 struct dsched_thread_io head;
250 int foo;
251 int bar;
252};
253
ef273fc7 254struct foo_disk_ctx {
89295e71
SW
255 struct dsched_disk_ctx head;
256 int foo;
257 int bar;
258};
259
260CTASSERT(sizeof(struct foo_thread_io) <= DSCHED_THREAD_IO_MAX_SZ);
261CTASSERT(sizeof(struct foo_disk_ctx) <= DSCHED_DISK_CTX_MAX_SZ);
262.Ed
263.Pp
264It is important that the first member of the new struct is one of type
265.Vt dsched_thread_io
266or
267.Vt dsched_disk_ctx ,
268respectively.
269The
270.Fn CTASSERT
271must be used to ensure that the new structs fit into the space provided
272by
273.Nm dsched .
274Not including these asserts can cause serious and difficult to debug
275issues.
276For all the functions described in
277.Sx FUNCTIONS
278that require a
279.Vt dsched_thread_io
280or
281.Vt dsched_disk_ctx ,
282the address of the
283.Fa head
284element should be passed, or alternatively the address of the new struct
285be cast to the right type and that passed.
286.Sh FUNCTIONS
287The
288.Fn DSCHED_POLICY_MODULE
289macro declares a
290.Nm
291policy kernel module.
292.Fa evh
293is the event handler for the module (see
294.Xr DECLARE_MODULE 9
295for more information).
296The event handler is supposed to register a
297.Nm
298policy with
299.Fn dsched_register
300on load and to unregister it using
301.Fn dsched_unregister
302when it is unloaded.
1f509c0d
SW
303.Fa version
304is the version number of the module (see
305.Xr MODULE_VERSION 9
306for more information).
89295e71
SW
307.Pp
308The
309.Fn dsched_strategy_async
310function dispatches a
311.Vt bio Fa bp
312in an asynchronous manner to the disk specified by
313.Fa dp .
314The private data
315.Fa priv
316will be attached to the
317.Vt bio
318and is later retrievable via
319.Fn dsched_get_bio_priv .
320The
321.Vt biodone_t
322routine
323.Fa done
324will be called once the
325.Vt bio
326completes.
327The
328.Fa done
329routine can use
330.Fn dsched_get_disk_priv ,
331.Fn dsched_get_bio_dp
332and
333.Fn dsched_get_bio_priv
334to retrieve the context.
335Since
336.Fn dsched_strategy_async
337also saves the current time (via
338.Fn getmicrotime )
339in
340.Fa bio->bio_caller_info3.tv ,
341the
342.Fa done
343routine can also calculate the time passed from dispatch to completion
344by getting the current time again (via
5961c420
SW
345.Fn getmicrotime )
346and calculating the timeval difference to the value stored in
89295e71
SW
347.Fa bio->bio_caller_info3.tv .
348At the end of the
349.Fa done
350routine it needs to call
351.Fn pop_bio
352and
353.Fn biodone
354as for any other
355.Vt biodone_t
356routine.
357.Pp
358The
359.Fn dsched_cancel_bio
360function cancels the
361.Vt bio
362and sets
363.Er ENXIO
364as error on the buf.
365.Pp
366The
367.Fn dsched_strategy_raw
368function simply dispatches the
369.Vt bio
370directly to the disk specified by
371.Fa dp
372using
373.Fn dev_dstrategy .
374.Pp
375The
376.Fn dsched_debug
377function works as a conditional
378.Fn kprintf .
379Depending on the setting of the
380.Va dsched.debug
381.Xr sysctl 8
382variable, the debug info will be shown or not.
383.Pp
384The
385.Fn dsched_register
386function registers the policy described by
387.Fa d_policy
388as a valid
389.Nm
390policy which can then be used as a scheduler policy for the disks.
391If a policy with the given name already exists,
392.Er EEXIST
393is returned (otherwise 0).
394.Pp
395The
396.Fn dsched_unregister
397function unregisters the policy described by
398.Fa d_policy .
399The given
400.Nm
401policy will no longer be valid as a scheduler policy.
402If the given policy is currently in use,
403.Er EBUSY
404will be returned and the policy won't be unregistered; otherwise 0 is returned.
405.Pp
406The
407.Fn DSCHED_THREAD_IO_LOCK
408and
409.Fn DSCHED_THREAD_IO_UNLOCK
410functions lock and unlock a
411.Vt dsched_thread_io
412.Fa tdio ,
413respectively.
414The lock must be held whenever the members
415.Fa queue
416and
417.Fa qlength
418are manipulated to avoid messing up the
419.Vt TAILQ .
420It can also be used to serialize any other access to the derived
421.Vt foo_thread_io
422members.
423.Pp
424The
425.Fn DSCHED_DISK_CTX_LOCK
426and
427.Fn DSCHED_DISK_CTX_UNLOCK
428functions lock and unlock a
429.Vt dsched_disk_ctx
430.Fa diskctx ,
431respectively.
432The lock must be held whenever the member
433.Fa queue
434is manipulated to avoid messing up the
435.Vt TAILQ .
436It can also be used to serialize any other access to the derived
437.Vt foo_disk_ctx
438members.
439.Pp
440The
441.Fn dsched_thread_io_ref
442and
443.Fn dsched_thread_io_unref
444functions increase and decrease the reference count on a
445.Vt dsched_thread_io
446.Fa tdio ,
447respectively.
448Whenever the reference count drops to 0, the
449.Fa tdio
450will be released.
451Be aware that it is possible that the
452.Nm
453framework holds references on the
454.Fa tdio ,
455too, so it can be that the object is not freed when all
456references are dropped.
457.Pp
458The
459.Fn dsched_disk_ctx_ref
460and
461.Fn dsched_disk_ctx_unref
462functions increase and decrease the reference count on a
463.Vt dsched_disk_ctx
464.Fa diskctx ,
465respectively.
466Whenever the reference count drops to 0, the
467.Fa diskctx
468will be released.
469Be aware that it is possible that the
470.Nm
471framework holds references on the
472.Fa diskctx ,
473too, so it can be that the object is not freed when all
474references are dropped.
475.Pp
476The
477.Fn dsched_get_bio_dp ,
478.Fn dsched_get_disk_priv
479and
480.Fn dsched_get_bio_priv
481are intended for use in the
482.Vt biodone_t
483routine specified in the call to
484.Fn dsched_strategy_async .
485.Fn dsched_get_bio_dp
486retrieves the
487.Vt struct disk
488associated with the
489.Vt bio .
490This can then be used to retrieve the
491.Vt struct dsched_disk_ctx
492via
493.Fn dsched_get_disk_priv .
494The
495.Fn dsched_get_bio_priv
496function returns the private data associated with the
497.Fa bio
498on the call to
499.Fn dsched_strategy_async .
500.Pp
501The
502.Fn dsched_new_policy_thread_tdio
503function must be called from any thread created within the
504.Fa prepare
505method that will perform I/O, since these won't have a
506.Vt dsched_thread_io
507associated with them.
4af1dc70 508The function returns a new
89295e71
SW
509.Vt dsched_thread_io
510for the current thread, for
511the
512.Fa diskctx
513and
514.Fa policy
515specified.
516.Sh FILES
517The uncontended path of the
518.Nm
519implementation is in
520.Pa /sys/kern/kern_dsched.c .
521The data structures are in
522.Pa /sys/sys/dsched.h .
523.Sh SEE ALSO
524.Xr dsched 4
525.Sh HISTORY
526The
527.Nm
528framework first appeared in
529.Dx 2.5 .
530.Sh AUTHORS
531The
532.Nm
533framework was written by
534.An Alex Hornung .