Merge branch 'vendor/LIBARCHIVE'
[dragonfly.git] / sys / kern / dsched / bfq / bfq_helper_thread.c
1 /*
2  * Copyright (c) 2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Brills Peng <brillsp@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35
36 /*
37  * bfq_helper_thread.c:
38  * Thread function of the helper thread and
39  * message sending routines.
40  *
41  * XXX: The current approach of serializing using lwkt messages is suboptimal.
42  *      The idea is to replace it with way more fine-grained and lockless
43  *      accesses spread all over the place. It makes things more complicated,
44  *      but it will also improve performance significantly.
45  *
46  * The sysctl node of bfq is also initialized
47  * here.
48  */
49
50 #include <sys/systm.h>
51 #include <sys/kernel.h>
52 #include <sys/proc.h>
53 #include <sys/sysctl.h>
54 #include <sys/buf.h>
55 #include <sys/conf.h>
56 #include <sys/diskslice.h>
57 #include <sys/disk.h>
58 #include <sys/malloc.h>
59 #include <machine/md_var.h>
60 #include <sys/ctype.h>
61 #include <sys/syslog.h>
62 #include <sys/device.h>
63 #include <sys/msgport.h>
64 #include <sys/msgport2.h>
65 #include <sys/mplock2.h>
66 #include <sys/buf2.h>
67 #include <sys/dsched.h>
68 #include <sys/fcntl.h>
69 #include <machine/varargs.h>
70
71 #include <kern/dsched/bfq/bfq.h>
72 #include <kern/dsched/bfq/bfq_helper_thread.h>
73
74 extern struct sysctl_oid *bfq_mod_oid;
75 extern struct dsched_policy dsched_bfq_policy;
76
77 static void helper_thread(struct bfq_disk_ctx *bfq_diskctx);
78 static int helper_msg_exec(helper_msg_t msg);
79 static void helper_sysctl_init(struct bfq_disk_ctx *bfq_diskctx);
80
81 MALLOC_DEFINE(M_HELPER, "bfq", "BFQ helper thread message allocations");
82
83 /*
84  * All threads share one dispose port
85  */
86 static struct lwkt_port helper_dispose_port;
87
88 /* XXX: should be an mpipe */
89 static struct objcache_malloc_args helper_msg_malloc_args = {
90         sizeof(struct helper_msg), M_HELPER };
91
92
93 static helper_msg_t
94 helper_msg_get(struct bfq_disk_ctx *bfq_diskctx)
95 {
96         /*
97          * XXX: wait is OK?
98          */
99         return objcache_get(bfq_diskctx->helper_msg_cache, M_WAITOK);
100 }
101
102 static int
103 helper_msg_put(struct bfq_disk_ctx *bfq_diskctx, helper_msg_t msg)
104 {
105         objcache_put(bfq_diskctx->helper_msg_cache, msg);
106         return 0;
107 }
108
109 static void
110 helper_msg_autofree_reply(lwkt_port_t port, lwkt_msg_t msg)
111 {
112         helper_msg_t hm = (helper_msg_t)msg;
113         helper_msg_put(hm->bfq_diskctx, (helper_msg_t)msg);
114 }
115
116 /*
117  * Initialize the dispose port. All helper threads share this port.
118  * Must be called only once, and before any helper thread being created.
119  *
120  * Called by bfq.c: bfq_moc_handler()
121  */
122 void
123 helper_init_global(void)
124 {
125         lwkt_initport_replyonly(&helper_dispose_port, helper_msg_autofree_reply);
126 }
127
128 /*
129  * Helper thread initialization function:
130  * initialize the per-disk objcache and create the
131  * helper thread.
132  *
133  * Called by bfq.c:bfq_prepare()
134  */
135 void
136 helper_init(struct bfq_disk_ctx *bfq_diskctx)
137 {
138         struct thread *phelper_thread;
139
140         bfq_diskctx->helper_msg_cache = objcache_create("bfq-helper-msg-cache", 0, 0,
141                         NULL, NULL, NULL,
142                         objcache_malloc_alloc,
143                         objcache_malloc_free,
144                         &helper_msg_malloc_args);
145
146         lwkt_create((void (*) (void *)) helper_thread, bfq_diskctx,
147                         &phelper_thread, NULL, 0, -1,
148                         "bfq_helper_td_%s", bfq_diskctx->head.dp->d_cdev->si_name);
149
150         bfq_diskctx->helper_thread = phelper_thread;
151 }
152
153 static void
154 helper_msg_send(struct bfq_disk_ctx *bfq_diskctx, uint32_t cmd, helper_msg_t helper_msg)
155 {
156         lwkt_port_t port = &bfq_diskctx->helper_msg_port;
157
158         lwkt_initmsg(&helper_msg->hdr, &helper_dispose_port, 0);
159         helper_msg->bfq_diskctx = bfq_diskctx;
160         helper_msg->hdr.u.ms_result = cmd;
161
162         if (port->mpu_td == curthread){
163                 helper_msg_exec(helper_msg);
164                 lwkt_replymsg(&helper_msg->hdr, 0);
165         } else {
166                 lwkt_sendmsg(port, (lwkt_msg_t)helper_msg);
167         }
168 }
169
170 /*
171  * Deallocate the objcache.
172  * Called by bfq.c: bfq_teardown()
173  */
174 void
175 helper_uninit(struct bfq_disk_ctx *bfq_diskctx)
176 {
177         objcache_destroy(bfq_diskctx->helper_msg_cache);
178 }
179
180 static void
181 helper_sysctl_init(struct bfq_disk_ctx *bfq_diskctx)
182 {
183         struct sysctl_oid *oid;
184
185         sysctl_ctx_init(&bfq_diskctx->bfq_sysctl_ctx);
186
187         if (!bfq_mod_oid){
188                 kprintf("Failed to create BFQ dev sysctl node!\n");
189                 return;
190         }
191
192         oid = SYSCTL_ADD_NODE(&bfq_diskctx->bfq_sysctl_ctx,
193                 SYSCTL_CHILDREN(bfq_mod_oid),
194                 OID_AUTO,
195                 bfq_diskctx->head.dp->d_cdev->si_name,
196                 CTLFLAG_RD, 0, "");
197
198         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
199                         SYSCTL_CHILDREN(oid),
200                         OID_AUTO,
201                         "max_budget",
202                         CTLFLAG_RW,
203                         &bfq_diskctx->bfq_max_budget,
204                         0,
205                         "BFQ max budget");
206
207         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
208                         SYSCTL_CHILDREN(oid),
209                         OID_AUTO,
210                         "peak_rate",
211                         CTLFLAG_RD,
212                         &bfq_diskctx->bfq_peak_rate,
213                         0,
214                         "BFQ estimated peak rate");
215
216         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
217                         SYSCTL_CHILDREN(oid),
218                         OID_AUTO,
219                         "peak_samples",
220                         CTLFLAG_RD,
221                         &bfq_diskctx->bfq_peak_rate_samples,
222                         0,
223                         "BFQ estimated peak rate samples");
224
225         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
226                         SYSCTL_CHILDREN(oid),
227                         OID_AUTO,
228                         "as_miss",
229                         CTLFLAG_RD,
230                         &bfq_diskctx->bfq_as_miss,
231                         0,
232                         "BFQ AS miss");
233
234         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
235                         SYSCTL_CHILDREN(oid),
236                         OID_AUTO,
237                         "as_hit",
238                         CTLFLAG_RD,
239                         &bfq_diskctx->bfq_as_hit,
240                         0,
241                         "BFQ AS hit");
242
243         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
244                         SYSCTL_CHILDREN(oid),
245                         OID_AUTO,
246                         "as_wait_avg_all",
247                         CTLFLAG_RD,
248                         &bfq_diskctx->bfq_as_avg_wait_all,
249                         0,
250                         "BFQ AS waitall");
251
252         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
253                         SYSCTL_CHILDREN(oid),
254                         OID_AUTO,
255                         "as_wait_avg_miss",
256                         CTLFLAG_RD,
257                         &bfq_diskctx->bfq_as_avg_wait_miss,
258                         0,
259                         "BFQ AS waitmiss");
260
261         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
262                         SYSCTL_CHILDREN(oid),
263                         OID_AUTO,
264                         "as_wait_max",
265                         CTLFLAG_RD,
266                         &bfq_diskctx->bfq_as_max_wait,
267                         0,
268                         "BFQ AS waitmax");
269
270         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
271                         SYSCTL_CHILDREN(oid),
272                         OID_AUTO,
273                         "as_wait_max2",
274                         CTLFLAG_RD,
275                         &bfq_diskctx->bfq_as_max_wait2,
276                         0,
277                         "BFQ AS waitmax2");
278
279         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
280                         SYSCTL_CHILDREN(oid),
281                         OID_AUTO,
282                         "as_high_wait_count",
283                         CTLFLAG_RD,
284                         &bfq_diskctx->bfq_as_high_wait_count,
285                         0,
286                         "BFQ AS high count");
287
288         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
289                         SYSCTL_CHILDREN(oid),
290                         OID_AUTO,
291                         "as_high_wait_count2",
292                         CTLFLAG_RD,
293                         &bfq_diskctx->bfq_as_high_wait_count2,
294                         0,
295                         "BFQ AS high count2");
296
297         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
298                         SYSCTL_CHILDREN(oid),
299                         OID_AUTO,
300                         "avg_time_slice",
301                         CTLFLAG_RD,
302                         &bfq_diskctx->bfq_avg_time_slice,
303                         0,
304                         "BFQ average time slice");
305
306         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
307                         SYSCTL_CHILDREN(oid),
308                         OID_AUTO,
309                         "max_time_slice",
310                         CTLFLAG_RD,
311                         &bfq_diskctx->bfq_max_time_slice,
312                         0,
313                         "BFQ max time slice");
314
315         SYSCTL_ADD_INT(&bfq_diskctx->bfq_sysctl_ctx,
316                         SYSCTL_CHILDREN(oid),
317                         OID_AUTO,
318                         "high_time_slice_count",
319                         CTLFLAG_RD,
320                         &bfq_diskctx->bfq_high_time_slice_count,
321                         0,
322                         "BFQ high time slice count");
323
324         SYSCTL_ADD_PROC(&bfq_diskctx->bfq_sysctl_ctx, SYSCTL_CHILDREN(oid),
325                         OID_AUTO, "as_switch", CTLTYPE_INT|CTLFLAG_RW,
326                         bfq_diskctx, 0, bfq_sysctl_as_switch_handler, "I", "as_switch");
327
328         SYSCTL_ADD_PROC(&bfq_diskctx->bfq_sysctl_ctx, SYSCTL_CHILDREN(oid),
329                         OID_AUTO, "auto_max_budget_switch", CTLTYPE_INT|CTLFLAG_RW,
330                         bfq_diskctx, 0, bfq_sysctl_auto_max_budget_handler, "I", "amb_switch");
331 }
332
333 static void
334 helper_thread(struct bfq_disk_ctx *bfq_diskctx)
335 {
336         struct dsched_thread_io *tdio;
337
338         int r;
339         helper_msg_t msg;
340
341         tdio = dsched_new_policy_thread_tdio(&bfq_diskctx->head, &dsched_bfq_policy);
342
343         lwkt_initport_thread(&bfq_diskctx->helper_msg_port, curthread);
344         dsched_disk_ctx_ref(&bfq_diskctx->head);
345         helper_sysctl_init(bfq_diskctx);
346
347         dsched_debug(BFQ_DEBUG_NORMAL, "BFQ: helper thread created\n");
348 #if 0
349         /* XXX: why mplock?! */
350         get_mplock();
351 #endif
352
353         for(;;) {
354                 msg = (helper_msg_t)lwkt_waitport(&bfq_diskctx->helper_msg_port, 0);
355                 dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: helper: msg recv: %d\n", msg->hdr.u.ms_result);
356                 r = helper_msg_exec(msg);
357                 lwkt_replymsg(&msg->hdr, 0);
358                 /*
359                  * received BFQ_MSG_KILL
360                  */
361                 if (r == -1)
362                         break;
363         }
364
365 #if 0
366         rel_mplock();
367 #endif
368
369         sysctl_ctx_free(&bfq_diskctx->bfq_sysctl_ctx);
370         dsched_disk_ctx_unref(&bfq_diskctx->head);
371         dsched_debug(BFQ_DEBUG_NORMAL, "BFQ: helper: die peacefully\n");
372         lwkt_exit();
373 }
374
375 static int
376 helper_msg_exec(helper_msg_t msg)
377 {
378         struct bfq_disk_ctx *bfq_diskctx;
379
380         bfq_diskctx = msg->bfq_diskctx;
381
382
383         switch (msg->hdr.u.ms_result)
384         {
385                 case BFQ_MSG_DEQUEUE:
386                         if (atomic_cmpset_int(&bfq_diskctx->pending_dequeue, 0, 1))
387                                 bfq_dequeue((struct dsched_disk_ctx *)bfq_diskctx);
388                         break;
389                 case BFQ_MSG_AS_TIMEOUT:
390                         bfq_timeout(bfq_diskctx);
391                         break;
392
393                 case BFQ_MSG_DESTROY_TDIO:
394                         bfq_helper_destroy_tdio(msg->tdio, bfq_diskctx);
395                         break;
396
397                 case BFQ_MSG_KILL:
398                         return -1;
399
400                 default:
401                         break;
402         }
403         return 0;
404 }
405
406 void
407 helper_msg_dequeue(struct bfq_disk_ctx *bfq_diskctx)
408 {
409         helper_msg_t helper_msg = helper_msg_get(bfq_diskctx);
410
411         helper_msg_send(bfq_diskctx, BFQ_MSG_DEQUEUE, helper_msg);
412 }
413
414 void
415 helper_msg_as_timeout(struct bfq_disk_ctx *bfq_diskctx)
416 {
417         helper_msg_t helper_msg = helper_msg_get(bfq_diskctx);
418         /**
419          * For statisticsal use, temporary
420          * ------------------------------
421          */
422         struct bfq_thread_io *bfq_tdio;
423         struct timeval tv;
424         uint32_t msec;
425
426
427         bfq_tdio = bfq_diskctx->bfq_blockon;
428         if (bfq_tdio) {
429                 getmicrotime(&tv);
430                 timevalsub(&tv, &bfq_tdio->as_start_time);
431                 msec = ((uint64_t)(1000000*tv.tv_sec + tv.tv_usec)) >> 10;
432                 if (msec > 5 * BFQ_T_WAIT_MIN * (1000 / hz))
433                         atomic_add_int(&bfq_diskctx->bfq_as_high_wait_count2, 1);
434                 if (msec > bfq_diskctx->bfq_as_max_wait2)
435                         bfq_diskctx->bfq_as_max_wait2 = msec;
436         }
437         /* ----------------------------- */
438
439         helper_msg_send(bfq_diskctx, BFQ_MSG_AS_TIMEOUT, helper_msg);
440 }
441
442 void
443 helper_msg_destroy_tdio(struct bfq_disk_ctx *bfq_diskctx, struct dsched_thread_io *tdio)
444 {
445         helper_msg_t helper_msg = helper_msg_get(bfq_diskctx);
446
447         helper_msg->tdio = tdio;
448         helper_msg_send(bfq_diskctx, BFQ_MSG_DESTROY_TDIO, helper_msg);
449 }
450
451 void
452 helper_msg_kill(struct bfq_disk_ctx *bfq_diskctx)
453 {
454         helper_msg_t helper_msg = helper_msg_get(bfq_diskctx);
455
456         helper_msg_send(bfq_diskctx, BFQ_MSG_KILL, helper_msg);
457 }