dsched - Correct some length modifiers to fix buildkernel for x86_64.
[dragonfly.git] / sys / dsched / fq / dsched_fq_core.c
CommitLineData
74ce043b
AH
1/*
2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/proc.h>
38#include <sys/sysctl.h>
39#include <sys/buf.h>
40#include <sys/conf.h>
41#include <sys/diskslice.h>
42#include <sys/disk.h>
43#include <machine/atomic.h>
44#include <sys/malloc.h>
45#include <sys/thread.h>
46#include <sys/thread2.h>
47#include <sys/sysctl.h>
48#include <sys/spinlock2.h>
49#include <machine/md_var.h>
50#include <sys/ctype.h>
51#include <sys/syslog.h>
52#include <sys/device.h>
53#include <sys/msgport.h>
54#include <sys/msgport2.h>
55#include <sys/buf2.h>
56#include <sys/dsched.h>
57#include <machine/varargs.h>
58#include <machine/param.h>
59
60#include <dsched/fq/dsched_fq.h>
61
62MALLOC_DECLARE(M_DSCHEDFQ);
63
64static int dsched_fq_version_maj = 0;
f9ff3d6c 65static int dsched_fq_version_min = 8;
74ce043b
AH
66
67struct dsched_fq_stats fq_stats;
68
69struct objcache_malloc_args dsched_fq_dpriv_malloc_args = {
70 sizeof(struct dsched_fq_dpriv), M_DSCHEDFQ };
71struct objcache_malloc_args dsched_fq_priv_malloc_args = {
72 sizeof(struct dsched_fq_priv), M_DSCHEDFQ };
73struct objcache_malloc_args dsched_fq_mpriv_malloc_args = {
74 sizeof(struct dsched_fq_mpriv), M_DSCHEDFQ };
75
76static struct objcache *fq_dpriv_cache;
77static struct objcache *fq_mpriv_cache;
78static struct objcache *fq_priv_cache;
79
80TAILQ_HEAD(, dsched_fq_mpriv) dsched_fqmp_list =
81 TAILQ_HEAD_INITIALIZER(dsched_fqmp_list);
82
83struct spinlock fq_fqmp_lock;
84struct callout fq_callout;
85
86extern struct dsched_ops dsched_fq_ops;
87
74ce043b
AH
88void
89fq_reference_dpriv(struct dsched_fq_dpriv *dpriv)
90{
91 int refcount;
92
93 refcount = atomic_fetchadd_int(&dpriv->refcount, 1);
94
95 KKASSERT(refcount >= 0);
96}
97
98void
99fq_reference_priv(struct dsched_fq_priv *fqp)
100{
101 int refcount;
102
103 refcount = atomic_fetchadd_int(&fqp->refcount, 1);
104
105 KKASSERT(refcount >= 0);
106}
107
108void
109fq_reference_mpriv(struct dsched_fq_mpriv *fqmp)
110{
111 int refcount;
112
113 refcount = atomic_fetchadd_int(&fqmp->refcount, 1);
114
115 KKASSERT(refcount >= 0);
116}
117
118void
119fq_dereference_dpriv(struct dsched_fq_dpriv *dpriv)
120{
121 struct dsched_fq_priv *fqp, *fqp2;
122 int refcount;
123
124 refcount = atomic_fetchadd_int(&dpriv->refcount, -1);
125
126
edd247d3 127 KKASSERT(refcount >= 0 || refcount <= -0x400);
74ce043b
AH
128
129 if (refcount == 1) {
edd247d3 130 atomic_subtract_int(&dpriv->refcount, 0x400); /* mark as: in destruction */
74ce043b
AH
131#if 1
132 kprintf("dpriv (%p) destruction started, trace:\n", dpriv);
7ce2998e 133 print_backtrace(4);
74ce043b
AH
134#endif
135 spin_lock_wr(&dpriv->lock);
136 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
137 TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
138 fqp->flags &= ~FQP_LINKED_DPRIV;
139 fq_dereference_priv(fqp);
140 }
141 spin_unlock_wr(&dpriv->lock);
142
143 objcache_put(fq_dpriv_cache, dpriv);
144 atomic_subtract_int(&fq_stats.dpriv_allocations, 1);
145 }
146}
147
148void
149fq_dereference_priv(struct dsched_fq_priv *fqp)
150{
151 struct dsched_fq_mpriv *fqmp;
152 struct dsched_fq_dpriv *dpriv;
153 int refcount;
154
155 refcount = atomic_fetchadd_int(&fqp->refcount, -1);
156
edd247d3 157 KKASSERT(refcount >= 0 || refcount <= -0x400);
74ce043b
AH
158
159 if (refcount == 1) {
edd247d3 160 atomic_subtract_int(&fqp->refcount, 0x400); /* mark as: in destruction */
74ce043b
AH
161#if 0
162 kprintf("fqp (%p) destruction started, trace:\n", fqp);
7ce2998e 163 print_backtrace(8);
74ce043b
AH
164#endif
165 dpriv = fqp->dpriv;
166 KKASSERT(dpriv != NULL);
167
168 spin_lock_wr(&fqp->lock);
169
170 KKASSERT(fqp->qlength == 0);
171
172 if (fqp->flags & FQP_LINKED_DPRIV) {
173 spin_lock_wr(&dpriv->lock);
174
175 TAILQ_REMOVE(&dpriv->fq_priv_list, fqp, dlink);
176 fqp->flags &= ~FQP_LINKED_DPRIV;
177
178 spin_unlock_wr(&dpriv->lock);
179 }
180
181 if (fqp->flags & FQP_LINKED_FQMP) {
182 fqmp = fqp->fqmp;
183 KKASSERT(fqmp != NULL);
184
185 spin_lock_wr(&fqmp->lock);
186
187 TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
188 fqp->flags &= ~FQP_LINKED_FQMP;
189
190 spin_unlock_wr(&fqmp->lock);
191 }
192
193 spin_unlock_wr(&fqp->lock);
194
195 objcache_put(fq_priv_cache, fqp);
196 atomic_subtract_int(&fq_stats.fqp_allocations, 1);
197#if 0
198 fq_dereference_dpriv(dpriv);
199#endif
200 }
201}
202
203void
204fq_dereference_mpriv(struct dsched_fq_mpriv *fqmp)
205{
206 struct dsched_fq_priv *fqp, *fqp2;
207 int refcount;
208
209 refcount = atomic_fetchadd_int(&fqmp->refcount, -1);
210
edd247d3 211 KKASSERT(refcount >= 0 || refcount <= -0x400);
74ce043b
AH
212
213 if (refcount == 1) {
edd247d3 214 atomic_subtract_int(&fqmp->refcount, 0x400); /* mark as: in destruction */
74ce043b
AH
215#if 0
216 kprintf("fqmp (%p) destruction started, trace:\n", fqmp);
7ce2998e 217 print_backtrace(8);
74ce043b
AH
218#endif
219 FQ_GLOBAL_FQMP_LOCK();
220 spin_lock_wr(&fqmp->lock);
221
222 TAILQ_FOREACH_MUTABLE(fqp, &fqmp->fq_priv_list, link, fqp2) {
223 TAILQ_REMOVE(&fqmp->fq_priv_list, fqp, link);
224 fqp->flags &= ~FQP_LINKED_FQMP;
225 fq_dereference_priv(fqp);
226 }
227 TAILQ_REMOVE(&dsched_fqmp_list, fqmp, link);
228
229 spin_unlock_wr(&fqmp->lock);
230 FQ_GLOBAL_FQMP_UNLOCK();
231
232 objcache_put(fq_mpriv_cache, fqmp);
233 atomic_subtract_int(&fq_stats.fqmp_allocations, 1);
234 }
235}
236
237
238struct dsched_fq_priv *
edd247d3 239fq_alloc_priv(struct disk *dp, struct dsched_fq_mpriv *fqmp)
74ce043b
AH
240{
241 struct dsched_fq_priv *fqp;
242#if 0
243 fq_reference_dpriv(dsched_get_disk_priv(dp));
244#endif
245 fqp = objcache_get(fq_priv_cache, M_WAITOK);
246 bzero(fqp, sizeof(struct dsched_fq_priv));
247
248 /* XXX: maybe we do need another ref for the disk list for fqp */
249 fq_reference_priv(fqp);
250
251 FQ_FQP_LOCKINIT(fqp);
252 FQ_FQP_LOCK(fqp);
253 fqp->dp = dp;
254
255 fqp->dpriv = dsched_get_disk_priv(dp);
256
edd247d3
AH
257 if (fqmp) {
258 fqp->fqmp = fqmp;
259 fqp->p = fqmp->p;
260
261 /* Put the fqp in the fqmp list */
262 FQ_FQMP_LOCK(fqmp);
263 TAILQ_INSERT_TAIL(&fqmp->fq_priv_list, fqp, link);
264 FQ_FQMP_UNLOCK(fqmp);
265 fqp->flags |= FQP_LINKED_FQMP;
266 }
267
74ce043b
AH
268 TAILQ_INIT(&fqp->queue);
269 TAILQ_INSERT_TAIL(&fqp->dpriv->fq_priv_list, fqp, dlink);
270 fqp->flags |= FQP_LINKED_DPRIV;
271
272 atomic_add_int(&fq_stats.fqp_allocations, 1);
273 FQ_FQP_UNLOCK(fqp);
274 return fqp;
275}
276
277
278struct dsched_fq_dpriv *
279fq_alloc_dpriv(struct disk *dp)
280{
281 struct dsched_fq_dpriv *dpriv;
282
283 dpriv = objcache_get(fq_dpriv_cache, M_WAITOK);
284 bzero(dpriv, sizeof(struct dsched_fq_dpriv));
285 fq_reference_dpriv(dpriv);
286 dpriv->dp = dp;
287 dpriv->avg_rq_time = 0;
288 dpriv->incomplete_tp = 0;
289 FQ_DPRIV_LOCKINIT(dpriv);
290 TAILQ_INIT(&dpriv->fq_priv_list);
291
292 atomic_add_int(&fq_stats.dpriv_allocations, 1);
293 return dpriv;
294}
295
296
297struct dsched_fq_mpriv *
aa166ad1 298fq_alloc_mpriv(struct proc *p)
74ce043b
AH
299{
300 struct dsched_fq_mpriv *fqmp;
301 struct dsched_fq_priv *fqp;
302 struct disk *dp = NULL;
303
304 fqmp = objcache_get(fq_mpriv_cache, M_WAITOK);
305 bzero(fqmp, sizeof(struct dsched_fq_mpriv));
306 fq_reference_mpriv(fqmp);
307#if 0
308 kprintf("fq_alloc_mpriv, new fqmp = %p\n", fqmp);
309#endif
310 FQ_FQMP_LOCKINIT(fqmp);
74ce043b 311 TAILQ_INIT(&fqmp->fq_priv_list);
edd247d3 312 fqmp->p = p;
74ce043b
AH
313
314 while ((dp = dsched_disk_enumerate(dp, &dsched_fq_ops))) {
edd247d3 315 fqp = fq_alloc_priv(dp, fqmp);
74ce043b
AH
316#if 0
317 fq_reference_priv(fqp);
318#endif
74ce043b
AH
319 }
320
321 FQ_GLOBAL_FQMP_LOCK();
322 TAILQ_INSERT_TAIL(&dsched_fqmp_list, fqmp, link);
323 FQ_GLOBAL_FQMP_UNLOCK();
74ce043b
AH
324
325 atomic_add_int(&fq_stats.fqmp_allocations, 1);
326 return fqmp;
327}
328
329
330void
331fq_dispatcher(struct dsched_fq_dpriv *dpriv)
332{
aa166ad1 333 struct dsched_fq_mpriv *fqmp;
74ce043b
AH
334 struct dsched_fq_priv *fqp, *fqp2;
335 struct bio *bio, *bio2;
0f0e78e2 336 int idle;
74ce043b 337
aa166ad1
AH
338 /*
339 * We need to manually assign an fqp to the fqmp of this thread
340 * since it isn't assigned one during fq_prepare, as the disk
341 * is not set up yet.
342 */
343 fqmp = dsched_get_thread_priv(curthread);
aa166ad1 344 KKASSERT(fqmp != NULL);
0f0e78e2 345
edd247d3 346 fqp = fq_alloc_priv(dpriv->dp, fqmp);
aa166ad1
AH
347#if 0
348 fq_reference_priv(fqp);
349#endif
aa166ad1 350
74ce043b
AH
351 FQ_DPRIV_LOCK(dpriv);
352 for(;;) {
9cc004d0 353 idle = 0;
74ce043b 354 /* sleep ~60 ms */
9cc004d0 355 if ((ssleep(dpriv, &dpriv->lock, 0, "fq_dispatcher", hz/15) == 0)) {
0f0e78e2
AH
356 /*
357 * We've been woken up; this either means that we are
358 * supposed to die away nicely or that the disk is idle.
359 */
360
173c72b7
AH
361 if (__predict_false(dpriv->die == 1)) {
362 /* If we are supposed to die, drain all queues */
3ee00e04 363 fq_drain(dpriv, FQ_DRAIN_FLUSH);
173c72b7
AH
364
365 /* Now we can safely unlock and exit */
9cc004d0
AH
366 FQ_DPRIV_UNLOCK(dpriv);
367 kprintf("fq_dispatcher is peacefully dying\n");
368 lwkt_exit();
369 /* NOTREACHED */
9cc004d0 370 }
0f0e78e2
AH
371
372 /*
373 * We have been awakened because the disk is idle.
374 * So let's get ready to dispatch some extra bios.
375 */
376 idle = 1;
74ce043b
AH
377 }
378
0f0e78e2 379 /* Maybe the disk is idle and we just didn't get the wakeup */
9cc004d0
AH
380 if (idle == 0)
381 idle = dpriv->idle;
382
0f0e78e2
AH
383 /*
384 * XXX: further room for improvements here. It would be better
385 * to dispatch a few requests from each fqp as to ensure
386 * real fairness.
387 */
74ce043b 388 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
0f0e78e2
AH
389 if (fqp->qlength == 0)
390 continue;
391
392 FQ_FQP_LOCK(fqp);
e6c2b48a
AH
393 if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
394 fq_balance_self(fqp);
0f0e78e2
AH
395 /*
396 * XXX: why 5 extra? should probably be dynamic,
397 * relying on information on latency.
398 */
399 if ((fqp->max_tp > 0) && idle &&
400 (fqp->issued >= fqp->max_tp)) {
401 fqp->max_tp += 5;
0f0e78e2
AH
402 }
403
404 TAILQ_FOREACH_MUTABLE(bio, &fqp->queue, link, bio2) {
e6c2b48a
AH
405 if (atomic_cmpset_int(&fqp->rebalance, 1, 0))
406 fq_balance_self(fqp);
0f0e78e2
AH
407 if ((fqp->max_tp > 0) &&
408 ((fqp->issued >= fqp->max_tp)))
409 break;
410
411 TAILQ_REMOVE(&fqp->queue, bio, link);
412 --fqp->qlength;
74ce043b 413
9cc004d0 414 /*
0f0e78e2
AH
415 * beware that we do have an fqp reference
416 * from the queueing
9cc004d0 417 */
0f0e78e2 418 fq_dispatch(dpriv, bio, fqp);
74ce043b 419 }
0f0e78e2
AH
420 FQ_FQP_UNLOCK(fqp);
421
74ce043b
AH
422 }
423 }
424}
425
74ce043b
AH
426void
427fq_balance_thread(struct dsched_fq_dpriv *dpriv)
428{
429 struct dsched_fq_priv *fqp, *fqp2;
ef46c87b
AH
430 static struct timeval old_tv;
431 struct timeval tv;
0746e160 432 int64_t total_budget, product;
38f2331e 433 int64_t budget[FQ_PRIO_MAX+1];
e6c2b48a 434 int n, i, sum, total_disk_time;
0746e160 435 int lost_bits;
74ce043b 436
81b5f250 437 getmicrotime(&old_tv);
d161bce9 438
81b5f250
AH
439 FQ_DPRIV_LOCK(dpriv);
440 for (;;) {
441 /* sleep ~1s */
0746e160 442 if ((ssleep(curthread, &dpriv->lock, 0, "fq_balancer", hz/2) == 0)) {
81b5f250
AH
443 if (__predict_false(dpriv->die)) {
444 FQ_DPRIV_UNLOCK(dpriv);
445 lwkt_exit();
446 }
447 }
448
38f2331e 449 bzero(budget, sizeof(budget));
81b5f250
AH
450 total_budget = 0;
451 n = 0;
452
453 getmicrotime(&tv);
ef46c87b 454
ef46c87b
AH
455 total_disk_time = (int)(1000000*((tv.tv_sec - old_tv.tv_sec)) +
456 (tv.tv_usec - old_tv.tv_usec));
41b0c7c3
AH
457
458 if (total_disk_time == 0)
459 total_disk_time = 1;
460
ef46c87b 461 dsched_debug(LOG_INFO, "total_disk_time = %d\n", total_disk_time);
ef46c87b 462
81b5f250 463 old_tv = tv;
ef46c87b 464
e6c2b48a
AH
465 dpriv->disk_busy = (100*(total_disk_time - dpriv->idle_time)) / total_disk_time;
466 if (dpriv->disk_busy < 0)
467 dpriv->disk_busy = 0;
74ce043b 468
81b5f250 469 dpriv->idle_time = 0;
0746e160 470 lost_bits = 0;
d161bce9 471
81b5f250
AH
472 TAILQ_FOREACH_MUTABLE(fqp, &dpriv->fq_priv_list, dlink, fqp2) {
473 fqp->s_avg_latency = fqp->avg_latency;
474 fqp->s_transactions = fqp->transactions;
e6c2b48a 475 if (fqp->s_transactions > 0 /* 30 */) {
0746e160
AH
476 product = fqp->s_avg_latency * fqp->s_transactions;
477 product >>= lost_bits;
478 while(total_budget >= INT64_MAX - product) {
479 ++lost_bits;
480 product >>= 1;
481 total_budget >>= 1;
482 }
483 total_budget += product;
38f2331e
AH
484 ++budget[(fqp->p) ? fqp->p->p_ionice : 0];
485 KKASSERT(total_budget >= 0);
81b5f250
AH
486 dsched_debug(LOG_INFO,
487 "%d) avg_latency = %d, transactions = %d, ioprio = %d\n",
488 n, fqp->s_avg_latency, fqp->s_transactions,
489 (fqp->p) ? fqp->p->p_ionice : 0);
490 ++n;
491 } else {
492 fqp->max_tp = 0;
81b5f250 493 }
38f2331e 494 fqp->rebalance = 0;
e6c2b48a
AH
495 fqp->transactions = 0;
496 fqp->avg_latency = 0;
497 fqp->issued = 0;
74ce043b 498 }
74ce043b 499
81b5f250 500 dsched_debug(LOG_INFO, "%d procs competing for disk\n"
760d1e3d
AHJ
501 "total_budget = %jd (lost bits = %d)\n"
502 "incomplete tp = %d\n", n, (intmax_t)total_budget,
503 lost_bits, dpriv->incomplete_tp);
d161bce9 504
81b5f250 505 if (n == 0)
aa166ad1 506 continue;
3ee00e04 507
81b5f250 508 sum = 0;
3ee00e04 509
81b5f250 510 for (i = 0; i < FQ_PRIO_MAX+1; i++) {
38f2331e 511 if (budget[i] == 0)
81b5f250 512 continue;
38f2331e 513 sum += (FQ_PRIO_BIAS+i)*budget[i];
81b5f250 514 }
74ce043b 515
81b5f250
AH
516 if (sum == 0)
517 sum = 1;
aa166ad1 518
81b5f250 519 dsched_debug(LOG_INFO, "sum = %d\n", sum);
aa166ad1 520
81b5f250 521 for (i = 0; i < FQ_PRIO_MAX+1; i++) {
38f2331e 522 if (budget[i] == 0)
81b5f250 523 continue;
aa166ad1 524
0746e160
AH
525 /*
526 * XXX: if we still overflow here, we really need to switch to
527 * some more advanced mechanism such as compound int128 or
528 * storing the lost bits so they can be used in the
529 * fq_balance_self.
530 */
531 dpriv->budgetpb[i] = ((FQ_PRIO_BIAS+i)*total_budget/sum) << lost_bits;
38f2331e 532 KKASSERT(dpriv->budgetpb[i] >= 0);
81b5f250 533 }
d161bce9 534
81b5f250
AH
535 if (total_budget > dpriv->max_budget)
536 dpriv->max_budget = total_budget;
d161bce9 537
fcefa6f2 538 dsched_debug(4, "disk is %d%% busy\n", dpriv->disk_busy);
e6c2b48a
AH
539 TAILQ_FOREACH(fqp, &dpriv->fq_priv_list, dlink) {
540 fqp->rebalance = 1;
541 }
74ce043b 542
e6c2b48a
AH
543 dpriv->prev_full = dpriv->last_full;
544 dpriv->last_full = (dpriv->disk_busy >= 90)?1:0;
545 }
546}
81b5f250 547
81b5f250 548
e6c2b48a
AH
549/*
550 * fq_balance_self should be called from all sorts of dispatchers. It basically
551 * offloads some of the heavier calculations on throttling onto the process that
552 * wants to do I/O instead of doing it in the fq_balance thread.
553 * - should be called with dpriv lock held
554 */
555void
556fq_balance_self(struct dsched_fq_priv *fqp) {
557 struct dsched_fq_dpriv *dpriv;
81b5f250 558
e6c2b48a
AH
559 int64_t budget, used_budget;
560 int64_t avg_latency;
561 int64_t transactions;
74ce043b 562
e6c2b48a
AH
563 transactions = (int64_t)fqp->s_transactions;
564 avg_latency = (int64_t)fqp->s_avg_latency;
565 dpriv = fqp->dpriv;
566
0746e160 567 used_budget = ((int64_t)avg_latency * transactions);
e6c2b48a
AH
568 budget = dpriv->budgetpb[(fqp->p) ? fqp->p->p_ionice : 0];
569
570 if (used_budget > 0) {
571 dsched_debug(LOG_INFO,
760d1e3d
AHJ
572 "info: used_budget = %jd, budget = %jd\n",
573 (intmax_t)used_budget, budget);
e6c2b48a
AH
574 }
575
576 if ((used_budget > budget) && (dpriv->disk_busy >= 90)) {
577 KKASSERT(avg_latency != 0);
578
0746e160 579 fqp->max_tp = budget/(avg_latency);
e6c2b48a
AH
580 atomic_add_int(&fq_stats.procs_limited, 1);
581
582 dsched_debug(LOG_INFO,
583 "rate limited to %d transactions\n", fqp->max_tp);
584
585 } else if (((used_budget*2 < budget) || (dpriv->disk_busy < 80)) &&
586 (!dpriv->prev_full && !dpriv->last_full)) {
587 fqp->max_tp = 0;
81b5f250 588 }
74ce043b
AH
589}
590
591
592static int
593do_fqstats(SYSCTL_HANDLER_ARGS)
594{
595 return (sysctl_handle_opaque(oidp, &fq_stats, sizeof(struct dsched_fq_stats), req));
596}
597
598
599SYSCTL_PROC(_kern, OID_AUTO, fq_stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
600 0, sizeof(struct dsched_fq_stats), do_fqstats, "fq_stats",
601 "dsched_fq statistics");
602
603
74ce043b
AH
604static void
605fq_init(void)
606{
607
608}
609
610static void
611fq_uninit(void)
612{
613
614}
615
616static void
617fq_earlyinit(void)
618{
619 fq_priv_cache = objcache_create("fq-priv-cache", 0, 0,
620 NULL, NULL, NULL,
621 objcache_malloc_alloc,
622 objcache_malloc_free,
623 &dsched_fq_priv_malloc_args );
624
625 fq_mpriv_cache = objcache_create("fq-mpriv-cache", 0, 0,
626 NULL, NULL, NULL,
627 objcache_malloc_alloc,
628 objcache_malloc_free,
629 &dsched_fq_mpriv_malloc_args );
630
631 FQ_GLOBAL_FQMP_LOCKINIT();
632
633 fq_dpriv_cache = objcache_create("fq-dpriv-cache", 0, 0,
634 NULL, NULL, NULL,
635 objcache_malloc_alloc,
636 objcache_malloc_free,
637 &dsched_fq_dpriv_malloc_args );
638
639 bzero(&fq_stats, sizeof(struct dsched_fq_stats));
640
641 dsched_register(&dsched_fq_ops);
642 callout_init_mp(&fq_callout);
643
644 kprintf("FQ scheduler policy version %d.%d loaded\n",
645 dsched_fq_version_maj, dsched_fq_version_min);
646}
647
648static void
649fq_earlyuninit(void)
650{
651 callout_stop(&fq_callout);
652 callout_deactivate(&fq_callout);
653 return;
654}
655
656SYSINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, fq_init, NULL);
657SYSUNINIT(fq_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, fq_uninit, NULL);
658
659SYSINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, fq_earlyinit, NULL);
660SYSUNINIT(fq_early, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, fq_earlyuninit, NULL);