2 * Copyright (c) 2011 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Brills Peng <brillsp@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 #ifndef _DSCHED_BFQ_H_
37 #define _DSCHED_BFQ_H_
39 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
42 #include <sys/queue.h>
49 #ifndef _SYS_BIOTRACK_H_
50 #include <sys/biotrack.h>
53 #ifndef _SYS_SPINLOCK_H_
54 #include <sys/spinlock.h>
61 #ifndef _SYS_DSCHED_H_
62 #include <sys/dsched.h>
65 #ifndef _DSCHED_BFQ_WF2Q_H_
66 #include <kern/dsched/bfq/wf2q.h>
71 struct bfq_thread_io {
72 struct dsched_thread_io head;
73 RB_ENTRY(bfq_thread_io) entry;
74 int budget; /* The budget of a thread */
75 int vd; /* Virtual deadline (finish time) */
76 int ve; /* Virtual eligible time (start time) */
77 int min_vd; /* Minimum vd among the sub trees, used for augmented rb-tree */
78 int weight; /* Weight of the thread, the higher, the more
79 chance to be dispatched the thread will have */
81 volatile int maybe_timeout; /* a flag indicating that the tdio may
82 expire, only when active_tdio = this is it valid */
86 off_t last_seek_end; /* the end point of seeking of the last bio
88 uint32_t seek_samples; /* averange seek length samples */
89 off_t seek_avg; /* averange seek length, fixed point */
92 uint32_t ttime_samples; /* averange think time samples */
93 uint64_t ttime_avg; /* averange think time, usec */
96 struct timeval service_start_time; /* the time when the first request
97 of the current service period is dispatched */
98 struct timeval last_request_done_time; /* the time when the last
100 struct timeval as_start_time; /* the start time of AS waiting */
101 struct timeval last_bio_pushed_time;
103 uint32_t service_received; /* the amount of read/write during
105 uint32_t bio_dispatched; /* number of bios dispatched during
106 the current period */
107 uint32_t bio_completed; /* number of bios completed during
108 the current period */
111 struct bfq_disk_ctx {
112 struct dsched_disk_ctx head;
114 struct lock bfq_lock;
116 struct callout bfq_callout; /* the blocking-timer callout */
117 struct wf2q_t bfq_wf2q; /* the wf2q scheduler */
119 struct bfq_thread_io *bfq_blockon; /* waiting on any */
120 struct bfq_thread_io *bfq_active_tdio; /* currently active tdio */
122 int pending_dequeue; /* number of dequeue() calls pending
126 int bfq_remaining_budget; /* remaining budget of the current tdio */
128 uint32_t bfq_flag; /* SEE BFQ_FLAG_* define for all flags */
131 uint32_t bfq_peak_rate_samples; /* peak rate samples */
132 uint64_t bfq_peak_rate; /* peak rate, fixed point */
137 uint32_t bfq_as_avg_wait_miss; /* average AS waiting time for
139 uint32_t bfq_as_avg_wait_all; /* average AS waiting time for all, ms */
140 uint32_t bfq_as_max_wait; /* maximum AS waiting time, ms */
141 uint32_t bfq_as_max_wait2; /* maximum AS waiting time(from callout), ms */
143 int bfq_as_high_wait_count; /* the number of times when AS waiting time
144 is longer than 5 * BFQ_T_WAIT_MIN (50ms now) */
145 int bfq_as_high_wait_count2; /* the number of times when AS waiting
146 time is longer than 5 * BFQ_T_WAIT_MIN (50ms now) */
148 uint32_t bfq_avg_time_slice; /* average time slice length, ms */
149 uint32_t bfq_max_time_slice; /* maximum time slice length, ms */
150 int bfq_high_time_slice_count; /* the number of times when a time slice
151 is longer than 5 * BFQ_SLICE_TIMEOUT */
153 struct sysctl_ctx_list bfq_sysctl_ctx; /* bfq statistics interface
155 /* helper thread and its lwkt message cache and port*/
156 struct thread *helper_thread;
157 struct objcache *helper_msg_cache;
158 struct lwkt_port helper_msg_port;
161 enum bfq_expire_reason {
162 BFQ_REASON_TIMEOUT = 0,
164 BFQ_REASON_OUT_OF_BUDGET,
165 BFQ_REASON_NO_MORE_REQ
168 #define BFQ_FLAG_AS 0x01
169 #define BFQ_FLAG_AUTO_MAX_BUDGET 0x02
171 #define BFQ_TDIO_SEEKY(x) (((x)->seek_avg) > (1024 * SECT_SIZE))
173 #define BFQ_LOCKINIT(x) \
174 lockinit(&(x)->bfq_lock, "bfqwf2q", 0, LK_CANRECURSE);
176 #define BFQ_LOCK(x) do { \
177 dsched_disk_ctx_ref(&(x)->head); \
178 lockmgr(&(x)->bfq_lock, LK_EXCLUSIVE); \
181 #define BFQ_UNLOCK(x) do { \
182 lockmgr(&(x)->bfq_lock, LK_RELEASE); \
183 dsched_disk_ctx_unref(&(x)->head); \
186 #define SECT_SIZE 512 /* XXX: DEV_BSIZE? */
187 #define BFQ_DEBUG_CRITICAL 1
188 #define BFQ_DEBUG_NORMAL 2
189 #define BFQ_DEBUG_VERBOSE 3
190 #define BFQ_DEFAULT_MAX_BUDGET (1024*512) /* 1024 sectors / 0.2sec */
191 #define BFQ_DEFAULT_MIN_BUDGET (32*512) /* 32 sectors / 0.2sec */
192 #define BFQ_BUDG_INC_STEP (1*128*512) /* The linear increasing step of budget */
194 /* If the budget is larger than this threshold,
195 * it will get linear increment, else,
196 * it will get exponential increment.*/
197 #define BFQ_BUDGET_MULTIPLE_THRESHOLD (256*512)
199 #define BFQ_DEFAULT_WEIGHT 1
201 /* Get the size of a bio */
202 #define BIO_SIZE(x) ((x)->bio_buf->b_bcount)
204 /* Anticipatory waiting time (ticks) ~ 20ms, min ~ 10ms */
205 #define BFQ_T_WAIT ((hz/50) > 5 ? (hz/50) : 5)
207 #define BFQ_T_WAIT_MIN ((hz/100 > 0) ? (hz/100) : 1)
209 /* Time slice for each service period ~200ms (ticks) */
210 #define BFQ_SLICE_TIMEOUT (hz/5)
212 #define BFQ_FIXPOINT_SHIFT 10 /* fixed point arithmetic shift */
214 #define BFQ_VALID_MIN_SAMPLES 80 /* minimum number of samples */
216 #define ABS(x) (((x) < 0) ? (-(x)) : (x))
218 /* as statistics define */
219 #define BFQ_AS_STAT_ALL 0x1
220 #define BFQ_AS_STAT_ONLY_MISS 0x2
222 /* functions helper thread calls */
223 void bfq_timeout(void *);
224 void bfq_dequeue(struct dsched_disk_ctx *);
225 void bfq_helper_destroy_tdio(struct dsched_thread_io *, struct bfq_disk_ctx *);
227 /* sysctl handlers, registered in the helper thread */
228 int bfq_sysctl_as_switch_handler(SYSCTL_HANDLER_ARGS);
229 int bfq_sysctl_auto_max_budget_handler(SYSCTL_HANDLER_ARGS);
231 #endif /* _KERNEL || _KERNEL_STRUCTURES */
232 struct dsched_bfq_stats {
238 #endif /*_DSCHED_BFQ_H_ */