2 * Copyright (c) 2001-2002 Luigi Rizzo
4 * Supported by: the Xorp Project (www.xorp.org)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28 * $DragonFly: src/sys/kern/kern_poll.c,v 1.44 2008/03/29 05:27:07 sephe Exp $
31 #include "opt_polling.h"
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/socket.h> /* needed by net/if.h */
36 #include <sys/sysctl.h>
38 #include <sys/thread2.h>
39 #include <sys/msgport2.h>
41 #include <net/if.h> /* for IFF_* flags */
42 #include <net/netmsg2.h>
45 * Polling support for [network] device drivers.
47 * Drivers which support this feature try to register with the
50 * If registration is successful, the driver must disable interrupts,
51 * and further I/O is performed through the handler, which is invoked
52 * (at least once per clock tick) with 3 arguments: the "arg" passed at
53 * register time (a struct ifnet pointer), a command, and a "count" limit.
55 * The command can be one of the following:
56 * POLL_ONLY: quick move of "count" packets from input/output queues.
57 * POLL_AND_CHECK_STATUS: as above, plus check status registers or do
58 * other more expensive operations. This command is issued periodically
59 * but less frequently than POLL_ONLY.
60 * POLL_DEREGISTER: deregister and return to interrupt mode.
61 * POLL_REGISTER: register and disable interrupts
63 * The first two commands are only issued if the interface is marked as
64 * 'IFF_UP, IFF_RUNNING and IFF_POLLING', the last two only if IFF_RUNNING
67 * The count limit specifies how much work the handler can do during the
68 * call -- typically this is the number of packets to be received, or
69 * transmitted, etc. (drivers are free to interpret this number, as long
70 * as the max time spent in the function grows roughly linearly with the
73 * Deregistration can be requested by the driver itself (typically in the
74 * *_stop() routine), or by the polling code, by invoking the handler.
76 * Polling can be enabled or disabled on particular CPU_X with the sysctl
77 * variable kern.polling.X.enable (default is 1, enabled)
79 * A second variable controls the sharing of CPU between polling/kernel
80 * network processing, and other activities (typically userlevel tasks):
81 * kern.polling.X.user_frac (between 0 and 100, default 50) sets the share
82 * of CPU allocated to user tasks. CPU is allocated proportionally to the
83 * shares, by dynamically adjusting the "count" (poll_burst).
85 * Other parameters can should be left to their default values.
86 * The following constraints hold
88 * 1 <= poll_burst <= poll_burst_max
89 * 1 <= poll_each_burst <= poll_burst_max
90 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
93 #define MIN_POLL_BURST_MAX 10
94 #define MAX_POLL_BURST_MAX 1000
95 #define POLL_BURST_MAX 150 /* good for 100Mbit net and HZ=1000 */
97 #ifndef DEVICE_POLLING_FREQ_MAX
98 #define DEVICE_POLLING_FREQ_MAX 30000
100 #define DEVICE_POLLING_FREQ_DEFAULT 2000
102 #define POLL_LIST_LEN 128
107 #define POLLCTX_MAX 32
110 struct sysctl_ctx_list poll_sysctl_ctx;
111 struct sysctl_oid *poll_sysctl_tree;
113 uint32_t poll_burst; /* state */
114 uint32_t poll_each_burst; /* tunable */
115 uint32_t poll_burst_max; /* tunable */
116 uint32_t user_frac; /* tunable */
117 int reg_frac_count; /* state */
118 uint32_t reg_frac; /* tunable */
119 uint32_t short_ticks; /* statistics */
120 uint32_t lost_polls; /* statistics */
121 uint32_t pending_polls; /* state */
122 int residual_burst; /* state */
123 uint32_t phase; /* state */
124 uint32_t suspect; /* statistics */
125 uint32_t stalled; /* statistics */
126 struct timeval poll_start_t; /* state */
127 struct timeval prev_t; /* state */
129 uint32_t poll_handlers; /* next free entry in pr[]. */
130 struct pollrec pr[POLL_LIST_LEN];
133 struct systimer pollclock;
134 int polling_enabled; /* tunable */
135 int pollhz; /* tunable */
137 struct netmsg poll_netmsg;
138 struct netmsg poll_more_netmsg;
141 static struct pollctx *poll_context[POLLCTX_MAX];
143 SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
144 "Device polling parameters");
146 static int poll_defcpu = -1;
147 SYSCTL_INT(_kern_polling, OID_AUTO, defcpu, CTLFLAG_RD,
148 &poll_defcpu, 0, "default CPU to run device polling");
150 static uint32_t poll_cpumask0 = 0xffffffff;
151 TUNABLE_INT("kern.polling.cpumask", (int *)&poll_cpumask0);
153 static uint32_t poll_cpumask;
154 SYSCTL_INT(_kern_polling, OID_AUTO, cpumask, CTLFLAG_RD,
155 &poll_cpumask, 0, "CPUs that can run device polling");
157 static int polling_enabled = 1; /* global polling enable */
158 TUNABLE_INT("kern.polling.enable", &polling_enabled);
160 static int pollhz = DEVICE_POLLING_FREQ_DEFAULT;
161 TUNABLE_INT("kern.polling.pollhz", &pollhz);
163 static int poll_burst_max = POLL_BURST_MAX;
164 TUNABLE_INT("kern.polling.burst_max", &poll_burst_max);
166 /* Netisr handlers */
167 static void netisr_poll(struct netmsg *);
168 static void netisr_pollmore(struct netmsg *);
169 static void poll_register(struct netmsg *);
170 static void poll_deregister(struct netmsg *);
171 static void poll_sysctl_pollhz(struct netmsg *);
172 static void poll_sysctl_polling(struct netmsg *);
173 static void poll_sysctl_regfrac(struct netmsg *);
174 static void poll_sysctl_burstmax(struct netmsg *);
175 static void poll_sysctl_eachburst(struct netmsg *);
177 /* Systimer handler */
178 static void pollclock(systimer_t, struct intrframe *);
180 /* Sysctl handlers */
181 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS);
182 static int sysctl_polling(SYSCTL_HANDLER_ARGS);
183 static int sysctl_regfrac(SYSCTL_HANDLER_ARGS);
184 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS);
185 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS);
186 static void poll_add_sysctl(struct sysctl_ctx_list *,
187 struct sysctl_oid_list *, struct pollctx *);
189 static void schedpoll_oncpu(struct pollctx *, struct netmsg *, netisr_fn_t);
191 void init_device_poll_pcpu(int); /* per-cpu init routine */
194 poll_reset_state(struct pollctx *pctx)
197 pctx->poll_burst = 5;
198 pctx->reg_frac_count = 0;
199 pctx->pending_polls = 0;
200 pctx->residual_burst = 0;
202 bzero(&pctx->poll_start_t, sizeof(pctx->poll_start_t));
203 bzero(&pctx->prev_t, sizeof(pctx->prev_t));
208 * Initialize per-cpu polling(4) context. Called from kern_clock.c:
211 init_device_poll_pcpu(int cpuid)
213 struct pollctx *pctx;
216 if (cpuid >= POLLCTX_MAX)
219 if (((1 << cpuid) & poll_cpumask0) == 0)
222 if (poll_burst_max < MIN_POLL_BURST_MAX)
223 poll_burst_max = MIN_POLL_BURST_MAX;
224 else if (poll_burst_max > MAX_POLL_BURST_MAX)
225 poll_burst_max = MAX_POLL_BURST_MAX;
227 poll_cpumask |= (1 << cpuid);
229 pctx = kmalloc(sizeof(*pctx), M_DEVBUF, M_WAITOK | M_ZERO);
231 pctx->poll_each_burst = 5;
232 pctx->poll_burst_max = poll_burst_max;
233 pctx->user_frac = 50;
235 pctx->polling_enabled = polling_enabled;
236 pctx->pollhz = pollhz;
237 pctx->poll_cpuid = cpuid;
238 netmsg_init(&pctx->poll_netmsg, &netisr_adone_rport, 0, NULL);
239 netmsg_init(&pctx->poll_more_netmsg, &netisr_adone_rport, 0, NULL);
240 poll_reset_state(pctx);
242 KASSERT(cpuid < POLLCTX_MAX, ("cpu id must < %d", cpuid));
243 poll_context[cpuid] = pctx;
245 if (poll_defcpu < 0) {
249 * Initialize global sysctl nodes, for compat
251 poll_add_sysctl(NULL, SYSCTL_STATIC_CHILDREN(_kern_polling),
256 * Initialize per-cpu sysctl nodes
258 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", pctx->poll_cpuid);
260 sysctl_ctx_init(&pctx->poll_sysctl_ctx);
261 pctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&pctx->poll_sysctl_ctx,
262 SYSCTL_STATIC_CHILDREN(_kern_polling),
263 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, "");
264 poll_add_sysctl(&pctx->poll_sysctl_ctx,
265 SYSCTL_CHILDREN(pctx->poll_sysctl_tree), pctx);
268 * Initialize systimer
270 systimer_init_periodic_nq(&pctx->pollclock, pollclock, pctx, 1);
274 schedpoll(struct pollctx *pctx)
277 schedpoll_oncpu(pctx, &pctx->poll_netmsg, netisr_poll);
282 schedpollmore(struct pollctx *pctx)
284 schedpoll_oncpu(pctx, &pctx->poll_more_netmsg, netisr_pollmore);
288 * Set the polling frequency
291 sysctl_pollhz(SYSCTL_HANDLER_ARGS)
293 struct pollctx *pctx = arg1;
299 error = sysctl_handle_int(oidp, &phz, 0, req);
300 if (error || req->newptr == NULL)
304 else if (phz > DEVICE_POLLING_FREQ_MAX)
305 phz = DEVICE_POLLING_FREQ_MAX;
307 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_pollhz);
308 msg.nm_lmsg.u.ms_result = phz;
310 port = cpu_portfn(pctx->poll_cpuid);
311 lwkt_domsg(port, &msg.nm_lmsg, 0);
319 sysctl_polling(SYSCTL_HANDLER_ARGS)
321 struct pollctx *pctx = arg1;
326 enabled = pctx->polling_enabled;
327 error = sysctl_handle_int(oidp, &enabled, 0, req);
328 if (error || req->newptr == NULL)
331 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_polling);
332 msg.nm_lmsg.u.ms_result = enabled;
334 port = cpu_portfn(pctx->poll_cpuid);
335 lwkt_domsg(port, &msg.nm_lmsg, 0);
340 sysctl_regfrac(SYSCTL_HANDLER_ARGS)
342 struct pollctx *pctx = arg1;
348 reg_frac = pctx->reg_frac;
349 error = sysctl_handle_int(oidp, ®_frac, 0, req);
350 if (error || req->newptr == NULL)
353 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_regfrac);
354 msg.nm_lmsg.u.ms_result = reg_frac;
356 port = cpu_portfn(pctx->poll_cpuid);
357 lwkt_domsg(port, &msg.nm_lmsg, 0);
362 sysctl_burstmax(SYSCTL_HANDLER_ARGS)
364 struct pollctx *pctx = arg1;
370 burst_max = pctx->poll_burst_max;
371 error = sysctl_handle_int(oidp, &burst_max, 0, req);
372 if (error || req->newptr == NULL)
374 if (burst_max < MIN_POLL_BURST_MAX)
375 burst_max = MIN_POLL_BURST_MAX;
376 else if (burst_max > MAX_POLL_BURST_MAX)
377 burst_max = MAX_POLL_BURST_MAX;
379 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_burstmax);
380 msg.nm_lmsg.u.ms_result = burst_max;
382 port = cpu_portfn(pctx->poll_cpuid);
383 lwkt_domsg(port, &msg.nm_lmsg, 0);
388 sysctl_eachburst(SYSCTL_HANDLER_ARGS)
390 struct pollctx *pctx = arg1;
396 each_burst = pctx->poll_each_burst;
397 error = sysctl_handle_int(oidp, &each_burst, 0, req);
398 if (error || req->newptr == NULL)
401 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_eachburst);
402 msg.nm_lmsg.u.ms_result = each_burst;
404 port = cpu_portfn(pctx->poll_cpuid);
405 lwkt_domsg(port, &msg.nm_lmsg, 0);
410 * Hook from polling systimer. Tries to schedule a netisr, but keeps
411 * track of lost ticks due to the previous handler taking too long.
412 * Normally, this should not happen, because polling handler should
413 * run for a short time. However, in some cases (e.g. when there are
414 * changes in link status etc.) the drivers take a very long time
415 * (even in the order of milliseconds) to reset and reconfigure the
416 * device, causing apparent lost polls.
418 * The first part of the code is just for debugging purposes, and tries
419 * to count how often hardclock ticks are shorter than they should,
420 * meaning either stray interrupts or delayed events.
422 * WARNING! called from fastint or IPI, the MP lock might not be held.
425 pollclock(systimer_t info, struct intrframe *frame __unused)
427 struct pollctx *pctx = info->data;
431 if (pctx->poll_handlers == 0)
435 delta = (t.tv_usec - pctx->prev_t.tv_usec) +
436 (t.tv_sec - pctx->prev_t.tv_sec)*1000000;
437 if (delta * pctx->pollhz < 500000)
442 if (pctx->pending_polls > 100) {
444 * Too much, assume it has stalled (not always true
445 * see comment above).
448 pctx->pending_polls = 0;
452 if (pctx->phase <= 2) {
453 if (pctx->phase != 0)
459 if (pctx->pending_polls++ > 0)
464 * netisr_pollmore is called after other netisr's, possibly scheduling
465 * another NETISR_POLL call, or adapting the burst size for the next cycle.
467 * It is very bad to fetch large bursts of packets from a single card at once,
468 * because the burst could take a long time to be completely processed leading
469 * to unfairness. To reduce the problem, and also to account better for time
470 * spent in network-related processing, we split the burst in smaller chunks
471 * of fixed size, giving control to the other netisr's between chunks. This
472 * helps in improving the fairness, reducing livelock (because we emulate more
473 * closely the "process to completion" that we have with fastforwarding) and
474 * accounting for the work performed in low level handling and forwarding.
479 netisr_pollmore(struct netmsg *msg)
481 struct pollctx *pctx;
483 int kern_load, cpuid;
484 uint32_t pending_polls;
486 cpuid = mycpu->gd_cpuid;
487 KKASSERT(cpuid < POLLCTX_MAX);
489 pctx = poll_context[cpuid];
490 KKASSERT(pctx != NULL);
491 KKASSERT(pctx->poll_cpuid == cpuid);
492 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
494 lwkt_replymsg(&msg->nm_lmsg, 0);
496 if (pctx->poll_handlers == 0)
499 KASSERT(pctx->polling_enabled,
500 ("# of registered poll handlers are not zero, "
501 "but polling is not enabled\n"));
504 if (pctx->residual_burst > 0) {
506 /* will run immediately on return, followed by netisrs */
509 /* here we can account time spent in netisr's in this tick */
511 kern_load = (t.tv_usec - pctx->poll_start_t.tv_usec) +
512 (t.tv_sec - pctx->poll_start_t.tv_sec)*1000000; /* us */
513 kern_load = (kern_load * pctx->pollhz) / 10000; /* 0..100 */
514 if (kern_load > (100 - pctx->user_frac)) { /* try decrease ticks */
515 if (pctx->poll_burst > 1)
518 if (pctx->poll_burst < pctx->poll_burst_max)
523 pctx->pending_polls--;
524 pending_polls = pctx->pending_polls;
527 if (pending_polls == 0) { /* we are done */
531 * Last cycle was long and caused us to miss one or more
532 * hardclock ticks. Restart processing again, but slightly
533 * reduce the burst size to prevent that this happens again.
535 pctx->poll_burst -= (pctx->poll_burst / 8);
536 if (pctx->poll_burst < 1)
537 pctx->poll_burst = 1;
544 * netisr_poll is scheduled by schedpoll when appropriate, typically once
545 * per polling systimer tick.
547 * Note that the message is replied immediately in order to allow a new
548 * ISR to be scheduled in the handler.
550 * XXX each registration should indicate whether it needs a critical
551 * section to operate.
555 netisr_poll(struct netmsg *msg)
557 struct pollctx *pctx;
558 int i, cycles, cpuid;
559 enum poll_cmd arg = POLL_ONLY;
561 cpuid = mycpu->gd_cpuid;
562 KKASSERT(cpuid < POLLCTX_MAX);
564 pctx = poll_context[cpuid];
565 KKASSERT(pctx != NULL);
566 KKASSERT(pctx->poll_cpuid == cpuid);
567 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
570 lwkt_replymsg(&msg->nm_lmsg, 0);
573 if (pctx->poll_handlers == 0)
576 KASSERT(pctx->polling_enabled,
577 ("# of registered poll handlers are not zero, "
578 "but polling is not enabled\n"));
581 if (pctx->residual_burst == 0) { /* first call in this tick */
582 microuptime(&pctx->poll_start_t);
584 if (pctx->reg_frac_count-- == 0) {
585 arg = POLL_AND_CHECK_STATUS;
586 pctx->reg_frac_count = pctx->reg_frac - 1;
589 pctx->residual_burst = pctx->poll_burst;
591 cycles = (pctx->residual_burst < pctx->poll_each_burst) ?
592 pctx->residual_burst : pctx->poll_each_burst;
593 pctx->residual_burst -= cycles;
595 for (i = 0 ; i < pctx->poll_handlers ; i++) {
596 struct ifnet *ifp = pctx->pr[i].ifp;
598 if (!lwkt_serialize_try(ifp->if_serializer))
601 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING|IFF_POLLING))
602 == (IFF_UP|IFF_RUNNING|IFF_POLLING))
603 ifp->if_poll(ifp, arg, cycles);
605 lwkt_serialize_exit(ifp->if_serializer);
613 poll_register(struct netmsg *msg)
615 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
616 struct pollctx *pctx;
619 cpuid = mycpu->gd_cpuid;
620 KKASSERT(cpuid < POLLCTX_MAX);
622 pctx = poll_context[cpuid];
623 KKASSERT(pctx != NULL);
624 KKASSERT(pctx->poll_cpuid == cpuid);
626 if (pctx->polling_enabled == 0) {
627 /* Polling disabled, cannot register */
633 * Check if there is room.
635 if (pctx->poll_handlers >= POLL_LIST_LEN) {
637 * List full, cannot register more entries.
638 * This should never happen; if it does, it is probably a
639 * broken driver trying to register multiple times. Checking
640 * this at runtime is expensive, and won't solve the problem
641 * anyways, so just report a few times and then give up.
643 static int verbose = 10; /* XXX */
645 kprintf("poll handlers list full, "
646 "maybe a broken driver ?\n");
651 pctx->pr[pctx->poll_handlers].ifp = ifp;
652 pctx->poll_handlers++;
655 if (pctx->poll_handlers == 1) {
656 KKASSERT(pctx->polling_enabled);
657 systimer_adjust_periodic(&pctx->pollclock,
662 lwkt_replymsg(&msg->nm_lmsg, rc);
666 * Try to register routine for polling. Returns 1 if successful
667 * (and polling should be enabled), 0 otherwise.
669 * Called from mainline code only, not called from an interrupt.
672 ether_poll_register(struct ifnet *ifp)
676 KKASSERT(poll_defcpu < POLLCTX_MAX);
678 return ether_pollcpu_register(ifp, poll_defcpu);
682 ether_pollcpu_register(struct ifnet *ifp, int cpuid)
688 if (ifp->if_poll == NULL) {
689 /* Device does not support polling */
693 if (cpuid < 0 || cpuid >= POLLCTX_MAX)
696 if (((1 << cpuid) & poll_cpumask) == 0) {
697 /* Polling is not supported on 'cpuid' */
700 KKASSERT(poll_context[cpuid] != NULL);
703 * Attempt to register. Interlock with IFF_POLLING.
705 crit_enter(); /* XXX MP - not mp safe */
707 lwkt_serialize_enter(ifp->if_serializer);
708 if (ifp->if_flags & IFF_POLLING) {
709 /* Already polling */
710 KKASSERT(ifp->if_poll_cpuid >= 0);
711 lwkt_serialize_exit(ifp->if_serializer);
715 KKASSERT(ifp->if_poll_cpuid < 0);
716 ifp->if_flags |= IFF_POLLING;
717 ifp->if_poll_cpuid = cpuid;
718 if (ifp->if_flags & IFF_RUNNING)
719 ifp->if_poll(ifp, POLL_REGISTER, 0);
720 lwkt_serialize_exit(ifp->if_serializer);
722 netmsg_init(&msg, &curthread->td_msgport, 0, poll_register);
723 msg.nm_lmsg.u.ms_resultp = ifp;
725 port = cpu_portfn(cpuid);
726 lwkt_domsg(port, &msg.nm_lmsg, 0);
728 if (msg.nm_lmsg.ms_error) {
729 lwkt_serialize_enter(ifp->if_serializer);
730 ifp->if_flags &= ~IFF_POLLING;
731 ifp->if_poll_cpuid = -1;
732 if (ifp->if_flags & IFF_RUNNING)
733 ifp->if_poll(ifp, POLL_DEREGISTER, 0);
734 lwkt_serialize_exit(ifp->if_serializer);
745 poll_deregister(struct netmsg *msg)
747 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
748 struct pollctx *pctx;
751 cpuid = mycpu->gd_cpuid;
752 KKASSERT(cpuid < POLLCTX_MAX);
754 pctx = poll_context[cpuid];
755 KKASSERT(pctx != NULL);
756 KKASSERT(pctx->poll_cpuid == cpuid);
758 for (i = 0 ; i < pctx->poll_handlers ; i++) {
759 if (pctx->pr[i].ifp == ifp) /* Found it */
762 if (i == pctx->poll_handlers) {
763 kprintf("ether_poll_deregister: ifp not found!!!\n");
766 pctx->poll_handlers--;
767 if (i < pctx->poll_handlers) {
768 /* Last entry replaces this one. */
769 pctx->pr[i].ifp = pctx->pr[pctx->poll_handlers].ifp;
772 if (pctx->poll_handlers == 0) {
773 systimer_adjust_periodic(&pctx->pollclock, 1);
774 poll_reset_state(pctx);
778 lwkt_replymsg(&msg->nm_lmsg, rc);
782 * Remove interface from the polling list. Occurs when polling is turned
783 * off. Called from mainline code only, not called from an interrupt.
786 ether_poll_deregister(struct ifnet *ifp)
792 KKASSERT(ifp != NULL);
794 if (ifp->if_poll == NULL)
799 lwkt_serialize_enter(ifp->if_serializer);
800 if ((ifp->if_flags & IFF_POLLING) == 0) {
801 KKASSERT(ifp->if_poll_cpuid < 0);
802 lwkt_serialize_exit(ifp->if_serializer);
807 cpuid = ifp->if_poll_cpuid;
808 KKASSERT(cpuid >= 0);
809 KKASSERT(poll_context[cpuid] != NULL);
811 ifp->if_flags &= ~IFF_POLLING;
812 ifp->if_poll_cpuid = -1;
813 lwkt_serialize_exit(ifp->if_serializer);
815 netmsg_init(&msg, &curthread->td_msgport, 0, poll_deregister);
816 msg.nm_lmsg.u.ms_resultp = ifp;
818 port = cpu_portfn(cpuid);
819 lwkt_domsg(port, &msg.nm_lmsg, 0);
821 if (!msg.nm_lmsg.ms_error) {
822 lwkt_serialize_enter(ifp->if_serializer);
823 if (ifp->if_flags & IFF_RUNNING)
824 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
825 lwkt_serialize_exit(ifp->if_serializer);
836 poll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent,
837 struct pollctx *pctx)
839 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "enable",
840 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_polling,
841 "I", "Polling enabled");
843 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "pollhz",
844 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_pollhz,
845 "I", "Device polling frequency");
847 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "reg_frac",
848 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_regfrac,
849 "IU", "Every this many cycles poll register");
851 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max",
852 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_burstmax,
853 "IU", "Max Polling burst size");
855 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst",
856 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_eachburst,
857 "IU", "Max size of each burst");
859 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD,
860 &pctx->phase, 0, "Polling phase");
862 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW,
863 &pctx->suspect, 0, "suspect event");
865 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW,
866 &pctx->stalled, 0, "potential stalls");
868 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD,
869 &pctx->poll_burst, 0, "Current polling burst size");
871 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW,
873 "Desired user fraction of cpu time");
875 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW,
876 &pctx->short_ticks, 0,
877 "Hardclock ticks shorter than they should be");
879 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW,
880 &pctx->lost_polls, 0,
881 "How many times we would have lost a poll tick");
883 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD,
884 &pctx->pending_polls, 0, "Do we need to poll again");
886 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD,
887 &pctx->residual_burst, 0,
888 "# of residual cycles in burst");
890 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD,
891 &pctx->poll_handlers, 0,
892 "Number of registered poll handlers");
896 schedpoll_oncpu(struct pollctx *pctx, struct netmsg *msg, netisr_fn_t handler)
898 if (msg->nm_lmsg.ms_flags & MSGF_DONE) {
901 netmsg_init(msg, &netisr_adone_rport, 0, handler);
903 msg->nm_lmsg.u.ms_resultp = pctx;
905 port = cpu_portfn(mycpu->gd_cpuid);
906 lwkt_sendmsg(port, &msg->nm_lmsg);
911 poll_sysctl_pollhz(struct netmsg *msg)
913 struct pollctx *pctx;
916 cpuid = mycpu->gd_cpuid;
917 KKASSERT(cpuid < POLLCTX_MAX);
919 pctx = poll_context[cpuid];
920 KKASSERT(pctx != NULL);
921 KKASSERT(pctx->poll_cpuid == cpuid);
924 * If polling is disabled or there is no device registered,
925 * don't adjust polling systimer frequency.
926 * Polling systimer frequency will be adjusted once polling
927 * is enabled and there are registered devices.
929 pctx->pollhz = msg->nm_lmsg.u.ms_result;
930 if (pctx->polling_enabled && pctx->poll_handlers)
931 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
934 * Make sure that reg_frac and reg_frac_count are within valid range.
936 if (pctx->reg_frac > pctx->pollhz) {
937 pctx->reg_frac = pctx->pollhz;
938 if (pctx->reg_frac_count > pctx->reg_frac)
939 pctx->reg_frac_count = pctx->reg_frac - 1;
942 lwkt_replymsg(&msg->nm_lmsg, 0);
946 poll_sysctl_polling(struct netmsg *msg)
948 struct pollctx *pctx;
951 cpuid = mycpu->gd_cpuid;
952 KKASSERT(cpuid < POLLCTX_MAX);
954 pctx = poll_context[cpuid];
955 KKASSERT(pctx != NULL);
956 KKASSERT(pctx->poll_cpuid == cpuid);
959 * If polling is disabled or there is no device registered,
960 * cut the polling systimer frequency to 1hz.
962 pctx->polling_enabled = msg->nm_lmsg.u.ms_result;
963 if (pctx->polling_enabled && pctx->poll_handlers) {
964 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
966 systimer_adjust_periodic(&pctx->pollclock, 1);
967 poll_reset_state(pctx);
970 if (!pctx->polling_enabled && pctx->poll_handlers != 0) {
973 for (i = 0 ; i < pctx->poll_handlers ; i++) {
974 struct ifnet *ifp = pctx->pr[i].ifp;
976 lwkt_serialize_enter(ifp->if_serializer);
978 if ((ifp->if_flags & IFF_POLLING) == 0) {
979 KKASSERT(ifp->if_poll_cpuid < 0);
980 lwkt_serialize_exit(ifp->if_serializer);
983 ifp->if_flags &= ~IFF_POLLING;
984 ifp->if_poll_cpuid = -1;
987 * Only call the interface deregistration
988 * function if the interface is still
991 if (ifp->if_flags & IFF_RUNNING)
992 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
994 lwkt_serialize_exit(ifp->if_serializer);
996 pctx->poll_handlers = 0;
999 lwkt_replymsg(&msg->nm_lmsg, 0);
1003 poll_sysctl_regfrac(struct netmsg *msg)
1005 struct pollctx *pctx;
1009 cpuid = mycpu->gd_cpuid;
1010 KKASSERT(cpuid < POLLCTX_MAX);
1012 pctx = poll_context[cpuid];
1013 KKASSERT(pctx != NULL);
1014 KKASSERT(pctx->poll_cpuid == cpuid);
1016 reg_frac = msg->nm_lmsg.u.ms_result;
1017 if (reg_frac > pctx->pollhz)
1018 reg_frac = pctx->pollhz;
1019 else if (reg_frac < 1)
1022 pctx->reg_frac = reg_frac;
1023 if (pctx->reg_frac_count > pctx->reg_frac)
1024 pctx->reg_frac_count = pctx->reg_frac - 1;
1026 lwkt_replymsg(&msg->nm_lmsg, 0);
1030 poll_sysctl_burstmax(struct netmsg *msg)
1032 struct pollctx *pctx;
1035 cpuid = mycpu->gd_cpuid;
1036 KKASSERT(cpuid < POLLCTX_MAX);
1038 pctx = poll_context[cpuid];
1039 KKASSERT(pctx != NULL);
1040 KKASSERT(pctx->poll_cpuid == cpuid);
1042 pctx->poll_burst_max = msg->nm_lmsg.u.ms_result;
1043 if (pctx->poll_each_burst > pctx->poll_burst_max)
1044 pctx->poll_each_burst = pctx->poll_burst_max;
1045 if (pctx->poll_burst > pctx->poll_burst_max)
1046 pctx->poll_burst = pctx->poll_burst_max;
1047 if (pctx->residual_burst > pctx->poll_burst_max)
1048 pctx->residual_burst = pctx->poll_burst_max;
1050 lwkt_replymsg(&msg->nm_lmsg, 0);
1054 poll_sysctl_eachburst(struct netmsg *msg)
1056 struct pollctx *pctx;
1057 uint32_t each_burst;
1060 cpuid = mycpu->gd_cpuid;
1061 KKASSERT(cpuid < POLLCTX_MAX);
1063 pctx = poll_context[cpuid];
1064 KKASSERT(pctx != NULL);
1065 KKASSERT(pctx->poll_cpuid == cpuid);
1067 each_burst = msg->nm_lmsg.u.ms_result;
1068 if (each_burst > pctx->poll_burst_max)
1069 each_burst = pctx->poll_burst_max;
1070 else if (each_burst < 1)
1072 pctx->poll_each_burst = each_burst;
1074 lwkt_replymsg(&msg->nm_lmsg, 0);