| 1 | /*- |
| 2 | * Copyright (c) 2001-2002 Luigi Rizzo |
| 3 | * |
| 4 | * Supported by: the Xorp Project (www.xorp.org) |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND |
| 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE |
| 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 25 | * SUCH DAMAGE. |
| 26 | * |
| 27 | * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $ |
| 28 | */ |
| 29 | |
| 30 | #include "opt_ifpoll.h" |
| 31 | |
| 32 | #include <sys/param.h> |
| 33 | #include <sys/kernel.h> |
| 34 | #include <sys/ktr.h> |
| 35 | #include <sys/malloc.h> |
| 36 | #include <sys/serialize.h> |
| 37 | #include <sys/socket.h> |
| 38 | #include <sys/sysctl.h> |
| 39 | |
| 40 | #include <sys/thread2.h> |
| 41 | #include <sys/msgport2.h> |
| 42 | |
| 43 | #include <machine/atomic.h> |
| 44 | #include <machine/clock.h> |
| 45 | #include <machine/smp.h> |
| 46 | |
| 47 | #include <net/if.h> |
| 48 | #include <net/if_poll.h> |
| 49 | #include <net/netmsg2.h> |
| 50 | |
| 51 | /* |
| 52 | * Polling support for network device drivers. |
| 53 | * |
| 54 | * Drivers which support this feature try to register one status polling |
| 55 | * handler and several TX/RX polling handlers with the polling code. |
| 56 | * If interface's if_npoll is called with non-NULL second argument, then |
| 57 | * a register operation is requested, else a deregister operation is |
| 58 | * requested. If the requested operation is "register", driver should |
| 59 | * setup the ifpoll_info passed in accoding its own needs: |
| 60 | * ifpoll_info.ifpi_status.status_func == NULL |
| 61 | * No status polling handler will be installed on CPU(0) |
| 62 | * ifpoll_info.ifpi_rx[n].poll_func == NULL |
| 63 | * No RX polling handler will be installed on CPU(n) |
| 64 | * ifpoll_info.ifpi_tx[n].poll_func == NULL |
| 65 | * No TX polling handler will be installed on CPU(n) |
| 66 | * |
| 67 | * RX is polled at the specified polling frequency (net.ifpoll.X.pollhz). |
| 68 | * TX and status polling could be done at lower frequency than RX frequency |
| 69 | * (net.ifpoll.0.status_frac and net.ifpoll.X.tx_frac). To avoid systimer |
| 70 | * staggering at high frequency, RX systimer gives TX and status polling a |
| 71 | * piggyback (XXX). |
| 72 | * |
| 73 | * All of the registered polling handlers are called only if the interface |
| 74 | * is marked as 'IFF_RUNNING and IFF_NPOLLING'. However, the interface's |
| 75 | * register and deregister function (ifnet.if_npoll) will be called even |
| 76 | * if interface is not marked with 'IFF_RUNNING'. |
| 77 | * |
| 78 | * If registration is successful, the driver must disable interrupts, |
| 79 | * and further I/O is performed through the TX/RX polling handler, which |
| 80 | * are invoked (at least once per clock tick) with 3 arguments: the "arg" |
| 81 | * passed at register time, a struct ifnet pointer, and a "count" limit. |
| 82 | * The registered serializer will be held before calling the related |
| 83 | * polling handler. |
| 84 | * |
| 85 | * The count limit specifies how much work the handler can do during the |
| 86 | * call -- typically this is the number of packets to be received, or |
| 87 | * transmitted, etc. (drivers are free to interpret this number, as long |
| 88 | * as the max time spent in the function grows roughly linearly with the |
| 89 | * count). |
| 90 | * |
| 91 | * A second variable controls the sharing of CPU between polling/kernel |
| 92 | * network processing, and other activities (typically userlevel tasks): |
| 93 | * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the |
| 94 | * share of CPU allocated to user tasks. CPU is allocated proportionally |
| 95 | * to the shares, by dynamically adjusting the "count" (poll_burst). |
| 96 | * |
| 97 | * Other parameters can should be left to their default values. |
| 98 | * The following constraints hold |
| 99 | * |
| 100 | * 1 <= poll_burst <= poll_burst_max |
| 101 | * 1 <= poll_each_burst <= poll_burst_max |
| 102 | * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX |
| 103 | */ |
| 104 | |
| 105 | #define IFPOLL_LIST_LEN 128 |
| 106 | #define IFPOLL_FREQ_MAX 30000 |
| 107 | |
| 108 | #define MIN_IOPOLL_BURST_MAX 10 |
| 109 | #define MAX_IOPOLL_BURST_MAX 1000 |
| 110 | #define IOPOLL_BURST_MAX 150 /* good for 100Mbit net and HZ=1000 */ |
| 111 | |
| 112 | #define IOPOLL_EACH_BURST 5 |
| 113 | |
| 114 | #define IFPOLL_FREQ_DEFAULT 2000 |
| 115 | |
| 116 | #define IFPOLL_TXFRAC_DEFAULT 1 /* 1/2 of the pollhz */ |
| 117 | #define IFPOLL_STFRAC_DEFAULT 19 /* 1/20 of the pollhz */ |
| 118 | |
| 119 | #define IFPOLL_RX 0x1 |
| 120 | #define IFPOLL_TX 0x2 |
| 121 | |
| 122 | union ifpoll_time { |
| 123 | struct timeval tv; |
| 124 | uint64_t tsc; |
| 125 | }; |
| 126 | |
| 127 | struct iopoll_rec { |
| 128 | struct lwkt_serialize *serializer; |
| 129 | struct ifnet *ifp; |
| 130 | void *arg; |
| 131 | ifpoll_iofn_t poll_func; |
| 132 | }; |
| 133 | |
| 134 | struct iopoll_ctx { |
| 135 | union ifpoll_time prev_t; |
| 136 | u_long short_ticks; /* statistics */ |
| 137 | u_long lost_polls; /* statistics */ |
| 138 | u_long suspect; /* statistics */ |
| 139 | u_long stalled; /* statistics */ |
| 140 | uint32_t pending_polls; /* state */ |
| 141 | |
| 142 | struct netmsg_base poll_netmsg; |
| 143 | struct netmsg_base poll_more_netmsg; |
| 144 | |
| 145 | int poll_cpuid; |
| 146 | int pollhz; |
| 147 | uint32_t phase; /* state */ |
| 148 | int residual_burst; /* state */ |
| 149 | uint32_t poll_each_burst; /* tunable */ |
| 150 | union ifpoll_time poll_start_t; /* state */ |
| 151 | |
| 152 | uint32_t poll_burst; /* state */ |
| 153 | uint32_t poll_burst_max; /* tunable */ |
| 154 | uint32_t user_frac; /* tunable */ |
| 155 | uint32_t kern_frac; /* state */ |
| 156 | |
| 157 | uint32_t poll_handlers; /* next free entry in pr[]. */ |
| 158 | struct iopoll_rec pr[IFPOLL_LIST_LEN]; |
| 159 | |
| 160 | struct sysctl_ctx_list poll_sysctl_ctx; |
| 161 | struct sysctl_oid *poll_sysctl_tree; |
| 162 | } __cachealign; |
| 163 | |
| 164 | struct poll_comm { |
| 165 | struct systimer pollclock; |
| 166 | int poll_cpuid; |
| 167 | |
| 168 | int stfrac_count; /* state */ |
| 169 | int poll_stfrac; /* tunable */ |
| 170 | |
| 171 | int txfrac_count; /* state */ |
| 172 | int poll_txfrac; /* tunable */ |
| 173 | |
| 174 | int pollhz; /* tunable */ |
| 175 | |
| 176 | struct sysctl_ctx_list sysctl_ctx; |
| 177 | struct sysctl_oid *sysctl_tree; |
| 178 | } __cachealign; |
| 179 | |
| 180 | struct stpoll_rec { |
| 181 | struct lwkt_serialize *serializer; |
| 182 | struct ifnet *ifp; |
| 183 | ifpoll_stfn_t status_func; |
| 184 | }; |
| 185 | |
| 186 | struct stpoll_ctx { |
| 187 | struct netmsg_base poll_netmsg; |
| 188 | |
| 189 | uint32_t poll_handlers; /* next free entry in pr[]. */ |
| 190 | struct stpoll_rec pr[IFPOLL_LIST_LEN]; |
| 191 | |
| 192 | struct sysctl_ctx_list poll_sysctl_ctx; |
| 193 | struct sysctl_oid *poll_sysctl_tree; |
| 194 | } __cachealign; |
| 195 | |
| 196 | struct iopoll_sysctl_netmsg { |
| 197 | struct netmsg_base base; |
| 198 | struct iopoll_ctx *ctx; |
| 199 | }; |
| 200 | |
| 201 | void ifpoll_init_pcpu(int); |
| 202 | static void ifpoll_register_handler(netmsg_t); |
| 203 | static void ifpoll_deregister_handler(netmsg_t); |
| 204 | |
| 205 | /* |
| 206 | * Status polling |
| 207 | */ |
| 208 | static void stpoll_init(void); |
| 209 | static void stpoll_handler(netmsg_t); |
| 210 | static void stpoll_clock(struct stpoll_ctx *); |
| 211 | static int stpoll_register(struct ifnet *, const struct ifpoll_status *); |
| 212 | static int stpoll_deregister(struct ifnet *); |
| 213 | |
| 214 | /* |
| 215 | * RX/TX polling |
| 216 | */ |
| 217 | static struct iopoll_ctx *iopoll_ctx_create(int, int); |
| 218 | static void iopoll_init(int); |
| 219 | static void rxpoll_handler(netmsg_t); |
| 220 | static void txpoll_handler(netmsg_t); |
| 221 | static void rxpollmore_handler(netmsg_t); |
| 222 | static void txpollmore_handler(netmsg_t); |
| 223 | static void iopoll_clock(struct iopoll_ctx *); |
| 224 | static int iopoll_register(struct ifnet *, struct iopoll_ctx *, |
| 225 | const struct ifpoll_io *); |
| 226 | static int iopoll_deregister(struct ifnet *, struct iopoll_ctx *); |
| 227 | |
| 228 | static void iopoll_add_sysctl(struct sysctl_ctx_list *, |
| 229 | struct sysctl_oid_list *, struct iopoll_ctx *, int); |
| 230 | static void sysctl_burstmax_handler(netmsg_t); |
| 231 | static int sysctl_burstmax(SYSCTL_HANDLER_ARGS); |
| 232 | static void sysctl_eachburst_handler(netmsg_t); |
| 233 | static int sysctl_eachburst(SYSCTL_HANDLER_ARGS); |
| 234 | |
| 235 | /* |
| 236 | * Common functions |
| 237 | */ |
| 238 | static void poll_comm_init(int); |
| 239 | static void poll_comm_start(int); |
| 240 | static void poll_comm_adjust_pollhz(struct poll_comm *); |
| 241 | static void poll_comm_systimer0(systimer_t, int, struct intrframe *); |
| 242 | static void poll_comm_systimer(systimer_t, int, struct intrframe *); |
| 243 | static void sysctl_pollhz_handler(netmsg_t); |
| 244 | static void sysctl_stfrac_handler(netmsg_t); |
| 245 | static void sysctl_txfrac_handler(netmsg_t); |
| 246 | static int sysctl_pollhz(SYSCTL_HANDLER_ARGS); |
| 247 | static int sysctl_stfrac(SYSCTL_HANDLER_ARGS); |
| 248 | static int sysctl_txfrac(SYSCTL_HANDLER_ARGS); |
| 249 | |
| 250 | static struct stpoll_ctx stpoll_context; |
| 251 | static struct poll_comm *poll_common[MAXCPU]; |
| 252 | static struct iopoll_ctx *rxpoll_context[MAXCPU]; |
| 253 | static struct iopoll_ctx *txpoll_context[MAXCPU]; |
| 254 | |
| 255 | SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0, |
| 256 | "Network device polling parameters"); |
| 257 | |
| 258 | static int iopoll_burst_max = IOPOLL_BURST_MAX; |
| 259 | static int iopoll_each_burst = IOPOLL_EACH_BURST; |
| 260 | |
| 261 | static int ifpoll_pollhz = IFPOLL_FREQ_DEFAULT; |
| 262 | static int ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; |
| 263 | static int ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; |
| 264 | |
| 265 | TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max); |
| 266 | TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst); |
| 267 | TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz); |
| 268 | TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac); |
| 269 | TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac); |
| 270 | |
| 271 | static __inline void |
| 272 | ifpoll_sendmsg_oncpu(netmsg_t msg) |
| 273 | { |
| 274 | if (msg->lmsg.ms_flags & MSGF_DONE) |
| 275 | lwkt_sendmsg(netisr_portfn(mycpuid), &msg->lmsg); |
| 276 | } |
| 277 | |
| 278 | static __inline void |
| 279 | sched_stpoll(struct stpoll_ctx *st_ctx) |
| 280 | { |
| 281 | ifpoll_sendmsg_oncpu((netmsg_t)&st_ctx->poll_netmsg); |
| 282 | } |
| 283 | |
| 284 | static __inline void |
| 285 | sched_iopoll(struct iopoll_ctx *io_ctx) |
| 286 | { |
| 287 | ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_netmsg); |
| 288 | } |
| 289 | |
| 290 | static __inline void |
| 291 | sched_iopollmore(struct iopoll_ctx *io_ctx) |
| 292 | { |
| 293 | ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_more_netmsg); |
| 294 | } |
| 295 | |
| 296 | static __inline void |
| 297 | ifpoll_time_get(union ifpoll_time *t) |
| 298 | { |
| 299 | if (__predict_true(tsc_present)) |
| 300 | t->tsc = rdtsc(); |
| 301 | else |
| 302 | microuptime(&t->tv); |
| 303 | } |
| 304 | |
| 305 | /* Return time diff in us */ |
| 306 | static __inline int |
| 307 | ifpoll_time_diff(const union ifpoll_time *s, const union ifpoll_time *e) |
| 308 | { |
| 309 | if (__predict_true(tsc_present)) { |
| 310 | return (((e->tsc - s->tsc) * 1000000) / tsc_frequency); |
| 311 | } else { |
| 312 | return ((e->tv.tv_usec - s->tv.tv_usec) + |
| 313 | (e->tv.tv_sec - s->tv.tv_sec) * 1000000); |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | /* |
| 318 | * Initialize per-cpu qpolling(4) context. Called from kern_clock.c: |
| 319 | */ |
| 320 | void |
| 321 | ifpoll_init_pcpu(int cpuid) |
| 322 | { |
| 323 | if (cpuid >= ncpus2) |
| 324 | return; |
| 325 | |
| 326 | poll_comm_init(cpuid); |
| 327 | |
| 328 | if (cpuid == 0) |
| 329 | stpoll_init(); |
| 330 | iopoll_init(cpuid); |
| 331 | |
| 332 | poll_comm_start(cpuid); |
| 333 | } |
| 334 | |
| 335 | int |
| 336 | ifpoll_register(struct ifnet *ifp) |
| 337 | { |
| 338 | struct ifpoll_info *info; |
| 339 | struct netmsg_base nmsg; |
| 340 | int error; |
| 341 | |
| 342 | if (ifp->if_npoll == NULL) { |
| 343 | /* Device does not support polling */ |
| 344 | return EOPNOTSUPP; |
| 345 | } |
| 346 | |
| 347 | info = kmalloc(sizeof(*info), M_TEMP, M_WAITOK | M_ZERO); |
| 348 | |
| 349 | /* |
| 350 | * Attempt to register. Interlock with IFF_NPOLLING. |
| 351 | */ |
| 352 | |
| 353 | ifnet_serialize_all(ifp); |
| 354 | |
| 355 | if (ifp->if_flags & IFF_NPOLLING) { |
| 356 | /* Already polling */ |
| 357 | ifnet_deserialize_all(ifp); |
| 358 | kfree(info, M_TEMP); |
| 359 | return EBUSY; |
| 360 | } |
| 361 | |
| 362 | info->ifpi_ifp = ifp; |
| 363 | |
| 364 | ifp->if_flags |= IFF_NPOLLING; |
| 365 | ifp->if_npoll(ifp, info); |
| 366 | KASSERT(ifp->if_npoll_cpuid >= 0, ("invalid npoll cpuid")); |
| 367 | |
| 368 | ifnet_deserialize_all(ifp); |
| 369 | |
| 370 | netmsg_init(&nmsg, NULL, &curthread->td_msgport, |
| 371 | 0, ifpoll_register_handler); |
| 372 | nmsg.lmsg.u.ms_resultp = info; |
| 373 | |
| 374 | error = lwkt_domsg(netisr_portfn(0), &nmsg.lmsg, 0); |
| 375 | if (error) { |
| 376 | if (!ifpoll_deregister(ifp)) { |
| 377 | if_printf(ifp, "ifpoll_register: " |
| 378 | "ifpoll_deregister failed!\n"); |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | kfree(info, M_TEMP); |
| 383 | return error; |
| 384 | } |
| 385 | |
| 386 | int |
| 387 | ifpoll_deregister(struct ifnet *ifp) |
| 388 | { |
| 389 | struct netmsg_base nmsg; |
| 390 | int error; |
| 391 | |
| 392 | if (ifp->if_npoll == NULL) |
| 393 | return EOPNOTSUPP; |
| 394 | |
| 395 | ifnet_serialize_all(ifp); |
| 396 | |
| 397 | if ((ifp->if_flags & IFF_NPOLLING) == 0) { |
| 398 | ifnet_deserialize_all(ifp); |
| 399 | return EINVAL; |
| 400 | } |
| 401 | ifp->if_flags &= ~IFF_NPOLLING; |
| 402 | |
| 403 | ifnet_deserialize_all(ifp); |
| 404 | |
| 405 | netmsg_init(&nmsg, NULL, &curthread->td_msgport, |
| 406 | 0, ifpoll_deregister_handler); |
| 407 | nmsg.lmsg.u.ms_resultp = ifp; |
| 408 | |
| 409 | error = lwkt_domsg(netisr_portfn(0), &nmsg.lmsg, 0); |
| 410 | if (!error) { |
| 411 | ifnet_serialize_all(ifp); |
| 412 | ifp->if_npoll(ifp, NULL); |
| 413 | KASSERT(ifp->if_npoll_cpuid < 0, ("invalid npoll cpuid")); |
| 414 | ifnet_deserialize_all(ifp); |
| 415 | } |
| 416 | return error; |
| 417 | } |
| 418 | |
| 419 | static void |
| 420 | ifpoll_register_handler(netmsg_t nmsg) |
| 421 | { |
| 422 | const struct ifpoll_info *info = nmsg->lmsg.u.ms_resultp; |
| 423 | int cpuid = mycpuid, nextcpu; |
| 424 | int error; |
| 425 | |
| 426 | KKASSERT(cpuid < ncpus2); |
| 427 | KKASSERT(&curthread->td_msgport == netisr_portfn(cpuid)); |
| 428 | |
| 429 | if (cpuid == 0) { |
| 430 | error = stpoll_register(info->ifpi_ifp, &info->ifpi_status); |
| 431 | if (error) |
| 432 | goto failed; |
| 433 | } |
| 434 | |
| 435 | error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid], |
| 436 | &info->ifpi_rx[cpuid]); |
| 437 | if (error) |
| 438 | goto failed; |
| 439 | |
| 440 | error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid], |
| 441 | &info->ifpi_tx[cpuid]); |
| 442 | if (error) |
| 443 | goto failed; |
| 444 | |
| 445 | /* Adjust polling frequency, after all registration is done */ |
| 446 | poll_comm_adjust_pollhz(poll_common[cpuid]); |
| 447 | |
| 448 | nextcpu = cpuid + 1; |
| 449 | if (nextcpu < ncpus2) |
| 450 | lwkt_forwardmsg(netisr_portfn(nextcpu), &nmsg->lmsg); |
| 451 | else |
| 452 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 453 | return; |
| 454 | failed: |
| 455 | lwkt_replymsg(&nmsg->lmsg, error); |
| 456 | } |
| 457 | |
| 458 | static void |
| 459 | ifpoll_deregister_handler(netmsg_t nmsg) |
| 460 | { |
| 461 | struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; |
| 462 | int cpuid = mycpuid, nextcpu; |
| 463 | |
| 464 | KKASSERT(cpuid < ncpus2); |
| 465 | KKASSERT(&curthread->td_msgport == netisr_portfn(cpuid)); |
| 466 | |
| 467 | /* Ignore errors */ |
| 468 | if (cpuid == 0) |
| 469 | stpoll_deregister(ifp); |
| 470 | iopoll_deregister(ifp, rxpoll_context[cpuid]); |
| 471 | iopoll_deregister(ifp, txpoll_context[cpuid]); |
| 472 | |
| 473 | /* Adjust polling frequency, after all deregistration is done */ |
| 474 | poll_comm_adjust_pollhz(poll_common[cpuid]); |
| 475 | |
| 476 | nextcpu = cpuid + 1; |
| 477 | if (nextcpu < ncpus2) |
| 478 | lwkt_forwardmsg(netisr_portfn(nextcpu), &nmsg->lmsg); |
| 479 | else |
| 480 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 481 | } |
| 482 | |
| 483 | static void |
| 484 | stpoll_init(void) |
| 485 | { |
| 486 | struct stpoll_ctx *st_ctx = &stpoll_context; |
| 487 | const struct poll_comm *comm = poll_common[0]; |
| 488 | |
| 489 | sysctl_ctx_init(&st_ctx->poll_sysctl_ctx); |
| 490 | st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx, |
| 491 | SYSCTL_CHILDREN(comm->sysctl_tree), |
| 492 | OID_AUTO, "status", CTLFLAG_RD, 0, ""); |
| 493 | |
| 494 | SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx, |
| 495 | SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree), |
| 496 | OID_AUTO, "handlers", CTLFLAG_RD, |
| 497 | &st_ctx->poll_handlers, 0, |
| 498 | "Number of registered status poll handlers"); |
| 499 | |
| 500 | netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport, |
| 501 | 0, stpoll_handler); |
| 502 | } |
| 503 | |
| 504 | /* |
| 505 | * stpoll_handler is scheduled by sched_stpoll when appropriate, typically |
| 506 | * once per polling systimer tick. |
| 507 | */ |
| 508 | static void |
| 509 | stpoll_handler(netmsg_t msg) |
| 510 | { |
| 511 | struct stpoll_ctx *st_ctx = &stpoll_context; |
| 512 | struct thread *td = curthread; |
| 513 | int i; |
| 514 | |
| 515 | KKASSERT(&td->td_msgport == netisr_portfn(0)); |
| 516 | |
| 517 | crit_enter_quick(td); |
| 518 | |
| 519 | /* Reply ASAP */ |
| 520 | lwkt_replymsg(&msg->lmsg, 0); |
| 521 | |
| 522 | if (st_ctx->poll_handlers == 0) { |
| 523 | crit_exit_quick(td); |
| 524 | return; |
| 525 | } |
| 526 | |
| 527 | for (i = 0; i < st_ctx->poll_handlers; ++i) { |
| 528 | const struct stpoll_rec *rec = &st_ctx->pr[i]; |
| 529 | struct ifnet *ifp = rec->ifp; |
| 530 | |
| 531 | if (!lwkt_serialize_try(rec->serializer)) |
| 532 | continue; |
| 533 | |
| 534 | if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == |
| 535 | (IFF_RUNNING | IFF_NPOLLING)) |
| 536 | rec->status_func(ifp); |
| 537 | |
| 538 | lwkt_serialize_exit(rec->serializer); |
| 539 | } |
| 540 | |
| 541 | crit_exit_quick(td); |
| 542 | } |
| 543 | |
| 544 | /* |
| 545 | * Hook from status poll systimer. Tries to schedule an status poll. |
| 546 | * NOTE: Caller should hold critical section. |
| 547 | */ |
| 548 | static void |
| 549 | stpoll_clock(struct stpoll_ctx *st_ctx) |
| 550 | { |
| 551 | KKASSERT(mycpuid == 0); |
| 552 | |
| 553 | if (st_ctx->poll_handlers == 0) |
| 554 | return; |
| 555 | sched_stpoll(st_ctx); |
| 556 | } |
| 557 | |
| 558 | static int |
| 559 | stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec) |
| 560 | { |
| 561 | struct stpoll_ctx *st_ctx = &stpoll_context; |
| 562 | int error; |
| 563 | |
| 564 | KKASSERT(&curthread->td_msgport == netisr_portfn(0)); |
| 565 | |
| 566 | if (st_rec->status_func == NULL) |
| 567 | return 0; |
| 568 | |
| 569 | /* |
| 570 | * Check if there is room. |
| 571 | */ |
| 572 | if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) { |
| 573 | /* |
| 574 | * List full, cannot register more entries. |
| 575 | * This should never happen; if it does, it is probably a |
| 576 | * broken driver trying to register multiple times. Checking |
| 577 | * this at runtime is expensive, and won't solve the problem |
| 578 | * anyways, so just report a few times and then give up. |
| 579 | */ |
| 580 | static int verbose = 10; /* XXX */ |
| 581 | |
| 582 | if (verbose > 0) { |
| 583 | kprintf("status poll handlers list full, " |
| 584 | "maybe a broken driver ?\n"); |
| 585 | verbose--; |
| 586 | } |
| 587 | error = ENOENT; |
| 588 | } else { |
| 589 | struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers]; |
| 590 | |
| 591 | rec->ifp = ifp; |
| 592 | rec->serializer = st_rec->serializer; |
| 593 | rec->status_func = st_rec->status_func; |
| 594 | |
| 595 | st_ctx->poll_handlers++; |
| 596 | error = 0; |
| 597 | } |
| 598 | return error; |
| 599 | } |
| 600 | |
| 601 | static int |
| 602 | stpoll_deregister(struct ifnet *ifp) |
| 603 | { |
| 604 | struct stpoll_ctx *st_ctx = &stpoll_context; |
| 605 | int i, error; |
| 606 | |
| 607 | KKASSERT(&curthread->td_msgport == netisr_portfn(0)); |
| 608 | |
| 609 | for (i = 0; i < st_ctx->poll_handlers; ++i) { |
| 610 | if (st_ctx->pr[i].ifp == ifp) /* Found it */ |
| 611 | break; |
| 612 | } |
| 613 | if (i == st_ctx->poll_handlers) { |
| 614 | error = ENOENT; |
| 615 | } else { |
| 616 | st_ctx->poll_handlers--; |
| 617 | if (i < st_ctx->poll_handlers) { |
| 618 | /* Last entry replaces this one. */ |
| 619 | st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers]; |
| 620 | } |
| 621 | error = 0; |
| 622 | } |
| 623 | return error; |
| 624 | } |
| 625 | |
| 626 | static __inline void |
| 627 | iopoll_reset_state(struct iopoll_ctx *io_ctx) |
| 628 | { |
| 629 | crit_enter(); |
| 630 | io_ctx->poll_burst = 5; |
| 631 | io_ctx->pending_polls = 0; |
| 632 | io_ctx->residual_burst = 0; |
| 633 | io_ctx->phase = 0; |
| 634 | io_ctx->kern_frac = 0; |
| 635 | bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t)); |
| 636 | bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t)); |
| 637 | crit_exit(); |
| 638 | } |
| 639 | |
| 640 | static void |
| 641 | iopoll_init(int cpuid) |
| 642 | { |
| 643 | KKASSERT(cpuid < ncpus2); |
| 644 | |
| 645 | rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX); |
| 646 | txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX); |
| 647 | } |
| 648 | |
| 649 | static struct iopoll_ctx * |
| 650 | iopoll_ctx_create(int cpuid, int poll_type) |
| 651 | { |
| 652 | struct poll_comm *comm; |
| 653 | struct iopoll_ctx *io_ctx; |
| 654 | const char *poll_type_str; |
| 655 | netisr_fn_t handler, more_handler; |
| 656 | |
| 657 | KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX); |
| 658 | |
| 659 | /* |
| 660 | * Make sure that tunables are in sane state |
| 661 | */ |
| 662 | if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX) |
| 663 | iopoll_burst_max = MIN_IOPOLL_BURST_MAX; |
| 664 | else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX) |
| 665 | iopoll_burst_max = MAX_IOPOLL_BURST_MAX; |
| 666 | |
| 667 | if (iopoll_each_burst > iopoll_burst_max) |
| 668 | iopoll_each_burst = iopoll_burst_max; |
| 669 | |
| 670 | comm = poll_common[cpuid]; |
| 671 | |
| 672 | /* |
| 673 | * Create the per-cpu polling context |
| 674 | */ |
| 675 | io_ctx = kmalloc_cachealign(sizeof(*io_ctx), M_DEVBUF, |
| 676 | M_WAITOK | M_ZERO); |
| 677 | |
| 678 | io_ctx->poll_each_burst = iopoll_each_burst; |
| 679 | io_ctx->poll_burst_max = iopoll_burst_max; |
| 680 | io_ctx->user_frac = 50; |
| 681 | if (poll_type == IFPOLL_RX) |
| 682 | io_ctx->pollhz = comm->pollhz; |
| 683 | else |
| 684 | io_ctx->pollhz = comm->pollhz / (comm->poll_txfrac + 1); |
| 685 | io_ctx->poll_cpuid = cpuid; |
| 686 | iopoll_reset_state(io_ctx); |
| 687 | |
| 688 | if (poll_type == IFPOLL_RX) { |
| 689 | handler = rxpoll_handler; |
| 690 | more_handler = rxpollmore_handler; |
| 691 | } else { |
| 692 | handler = txpoll_handler; |
| 693 | more_handler = txpollmore_handler; |
| 694 | } |
| 695 | |
| 696 | netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport, |
| 697 | 0, handler); |
| 698 | io_ctx->poll_netmsg.lmsg.u.ms_resultp = io_ctx; |
| 699 | |
| 700 | netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport, |
| 701 | 0, more_handler); |
| 702 | io_ctx->poll_more_netmsg.lmsg.u.ms_resultp = io_ctx; |
| 703 | |
| 704 | /* |
| 705 | * Initialize per-cpu sysctl nodes |
| 706 | */ |
| 707 | if (poll_type == IFPOLL_RX) |
| 708 | poll_type_str = "rx"; |
| 709 | else |
| 710 | poll_type_str = "tx"; |
| 711 | |
| 712 | sysctl_ctx_init(&io_ctx->poll_sysctl_ctx); |
| 713 | io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx, |
| 714 | SYSCTL_CHILDREN(comm->sysctl_tree), |
| 715 | OID_AUTO, poll_type_str, CTLFLAG_RD, 0, ""); |
| 716 | iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx, |
| 717 | SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx, poll_type); |
| 718 | |
| 719 | return io_ctx; |
| 720 | } |
| 721 | |
| 722 | /* |
| 723 | * Hook from iopoll systimer. Tries to schedule an iopoll, but keeps |
| 724 | * track of lost ticks due to the previous handler taking too long. |
| 725 | * Normally, this should not happen, because polling handler should |
| 726 | * run for a short time. However, in some cases (e.g. when there are |
| 727 | * changes in link status etc.) the drivers take a very long time |
| 728 | * (even in the order of milliseconds) to reset and reconfigure the |
| 729 | * device, causing apparent lost polls. |
| 730 | * |
| 731 | * The first part of the code is just for debugging purposes, and tries |
| 732 | * to count how often hardclock ticks are shorter than they should, |
| 733 | * meaning either stray interrupts or delayed events. |
| 734 | * |
| 735 | * WARNING! called from fastint or IPI, the MP lock might not be held. |
| 736 | * NOTE: Caller should hold critical section. |
| 737 | */ |
| 738 | static void |
| 739 | iopoll_clock(struct iopoll_ctx *io_ctx) |
| 740 | { |
| 741 | union ifpoll_time t; |
| 742 | int delta; |
| 743 | |
| 744 | KKASSERT(mycpuid == io_ctx->poll_cpuid); |
| 745 | |
| 746 | if (io_ctx->poll_handlers == 0) |
| 747 | return; |
| 748 | |
| 749 | ifpoll_time_get(&t); |
| 750 | delta = ifpoll_time_diff(&io_ctx->prev_t, &t); |
| 751 | if (delta * io_ctx->pollhz < 500000) |
| 752 | io_ctx->short_ticks++; |
| 753 | else |
| 754 | io_ctx->prev_t = t; |
| 755 | |
| 756 | if (io_ctx->pending_polls > 100) { |
| 757 | /* |
| 758 | * Too much, assume it has stalled (not always true |
| 759 | * see comment above). |
| 760 | */ |
| 761 | io_ctx->stalled++; |
| 762 | io_ctx->pending_polls = 0; |
| 763 | io_ctx->phase = 0; |
| 764 | } |
| 765 | |
| 766 | if (io_ctx->phase <= 2) { |
| 767 | if (io_ctx->phase != 0) |
| 768 | io_ctx->suspect++; |
| 769 | io_ctx->phase = 1; |
| 770 | sched_iopoll(io_ctx); |
| 771 | io_ctx->phase = 2; |
| 772 | } |
| 773 | if (io_ctx->pending_polls++ > 0) |
| 774 | io_ctx->lost_polls++; |
| 775 | } |
| 776 | |
| 777 | /* |
| 778 | * rxpoll_handler and txpoll_handler are scheduled by sched_iopoll when |
| 779 | * appropriate, typically once per polling systimer tick. |
| 780 | * |
| 781 | * Note that the message is replied immediately in order to allow a new |
| 782 | * ISR to be scheduled in the handler. |
| 783 | */ |
| 784 | static void |
| 785 | rxpoll_handler(netmsg_t msg) |
| 786 | { |
| 787 | struct iopoll_ctx *io_ctx; |
| 788 | struct thread *td = curthread; |
| 789 | int i, cycles; |
| 790 | |
| 791 | io_ctx = msg->lmsg.u.ms_resultp; |
| 792 | KKASSERT(&td->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 793 | |
| 794 | crit_enter_quick(td); |
| 795 | |
| 796 | /* Reply ASAP */ |
| 797 | lwkt_replymsg(&msg->lmsg, 0); |
| 798 | |
| 799 | if (io_ctx->poll_handlers == 0) { |
| 800 | crit_exit_quick(td); |
| 801 | return; |
| 802 | } |
| 803 | |
| 804 | io_ctx->phase = 3; |
| 805 | if (io_ctx->residual_burst == 0) { |
| 806 | /* First call in this tick */ |
| 807 | ifpoll_time_get(&io_ctx->poll_start_t); |
| 808 | io_ctx->residual_burst = io_ctx->poll_burst; |
| 809 | } |
| 810 | cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ? |
| 811 | io_ctx->residual_burst : io_ctx->poll_each_burst; |
| 812 | io_ctx->residual_burst -= cycles; |
| 813 | |
| 814 | for (i = 0; i < io_ctx->poll_handlers; i++) { |
| 815 | const struct iopoll_rec *rec = &io_ctx->pr[i]; |
| 816 | struct ifnet *ifp = rec->ifp; |
| 817 | |
| 818 | if (!lwkt_serialize_try(rec->serializer)) |
| 819 | continue; |
| 820 | |
| 821 | if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == |
| 822 | (IFF_RUNNING | IFF_NPOLLING)) |
| 823 | rec->poll_func(ifp, rec->arg, cycles); |
| 824 | |
| 825 | lwkt_serialize_exit(rec->serializer); |
| 826 | } |
| 827 | |
| 828 | /* |
| 829 | * Do a quick exit/enter to catch any higher-priority |
| 830 | * interrupt sources. |
| 831 | */ |
| 832 | crit_exit_quick(td); |
| 833 | crit_enter_quick(td); |
| 834 | |
| 835 | sched_iopollmore(io_ctx); |
| 836 | io_ctx->phase = 4; |
| 837 | |
| 838 | crit_exit_quick(td); |
| 839 | } |
| 840 | |
| 841 | static void |
| 842 | txpoll_handler(netmsg_t msg) |
| 843 | { |
| 844 | struct iopoll_ctx *io_ctx; |
| 845 | struct thread *td = curthread; |
| 846 | int i; |
| 847 | |
| 848 | io_ctx = msg->lmsg.u.ms_resultp; |
| 849 | KKASSERT(&td->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 850 | |
| 851 | crit_enter_quick(td); |
| 852 | |
| 853 | /* Reply ASAP */ |
| 854 | lwkt_replymsg(&msg->lmsg, 0); |
| 855 | |
| 856 | if (io_ctx->poll_handlers == 0) { |
| 857 | crit_exit_quick(td); |
| 858 | return; |
| 859 | } |
| 860 | |
| 861 | io_ctx->phase = 3; |
| 862 | |
| 863 | for (i = 0; i < io_ctx->poll_handlers; i++) { |
| 864 | const struct iopoll_rec *rec = &io_ctx->pr[i]; |
| 865 | struct ifnet *ifp = rec->ifp; |
| 866 | |
| 867 | if (!lwkt_serialize_try(rec->serializer)) |
| 868 | continue; |
| 869 | |
| 870 | if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == |
| 871 | (IFF_RUNNING | IFF_NPOLLING)) |
| 872 | rec->poll_func(ifp, rec->arg, -1); |
| 873 | |
| 874 | lwkt_serialize_exit(rec->serializer); |
| 875 | } |
| 876 | |
| 877 | /* |
| 878 | * Do a quick exit/enter to catch any higher-priority |
| 879 | * interrupt sources. |
| 880 | */ |
| 881 | crit_exit_quick(td); |
| 882 | crit_enter_quick(td); |
| 883 | |
| 884 | sched_iopollmore(io_ctx); |
| 885 | io_ctx->phase = 4; |
| 886 | |
| 887 | crit_exit_quick(td); |
| 888 | } |
| 889 | |
| 890 | /* |
| 891 | * rxpollmore_handler and txpollmore_handler are called after other netisr's, |
| 892 | * possibly scheduling another rxpoll_handler or txpoll_handler call, or |
| 893 | * adapting the burst size for the next cycle. |
| 894 | * |
| 895 | * It is very bad to fetch large bursts of packets from a single card at once, |
| 896 | * because the burst could take a long time to be completely processed leading |
| 897 | * to unfairness. To reduce the problem, and also to account better for time |
| 898 | * spent in network-related processing, we split the burst in smaller chunks |
| 899 | * of fixed size, giving control to the other netisr's between chunks. This |
| 900 | * helps in improving the fairness, reducing livelock and accounting for the |
| 901 | * work performed in low level handling. |
| 902 | */ |
| 903 | static void |
| 904 | rxpollmore_handler(netmsg_t msg) |
| 905 | { |
| 906 | struct thread *td = curthread; |
| 907 | struct iopoll_ctx *io_ctx; |
| 908 | union ifpoll_time t; |
| 909 | int kern_load; |
| 910 | uint32_t pending_polls; |
| 911 | |
| 912 | io_ctx = msg->lmsg.u.ms_resultp; |
| 913 | KKASSERT(&td->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 914 | |
| 915 | crit_enter_quick(td); |
| 916 | |
| 917 | /* Replay ASAP */ |
| 918 | lwkt_replymsg(&msg->lmsg, 0); |
| 919 | |
| 920 | if (io_ctx->poll_handlers == 0) { |
| 921 | crit_exit_quick(td); |
| 922 | return; |
| 923 | } |
| 924 | |
| 925 | io_ctx->phase = 5; |
| 926 | if (io_ctx->residual_burst > 0) { |
| 927 | sched_iopoll(io_ctx); |
| 928 | crit_exit_quick(td); |
| 929 | /* Will run immediately on return, followed by netisrs */ |
| 930 | return; |
| 931 | } |
| 932 | |
| 933 | /* Here we can account time spent in iopoll's in this tick */ |
| 934 | ifpoll_time_get(&t); |
| 935 | kern_load = ifpoll_time_diff(&io_ctx->poll_start_t, &t); |
| 936 | kern_load = (kern_load * io_ctx->pollhz) / 10000; /* 0..100 */ |
| 937 | io_ctx->kern_frac = kern_load; |
| 938 | |
| 939 | if (kern_load > (100 - io_ctx->user_frac)) { |
| 940 | /* Try decrease ticks */ |
| 941 | if (io_ctx->poll_burst > 1) |
| 942 | io_ctx->poll_burst--; |
| 943 | } else { |
| 944 | if (io_ctx->poll_burst < io_ctx->poll_burst_max) |
| 945 | io_ctx->poll_burst++; |
| 946 | } |
| 947 | |
| 948 | io_ctx->pending_polls--; |
| 949 | pending_polls = io_ctx->pending_polls; |
| 950 | |
| 951 | if (pending_polls == 0) { |
| 952 | /* We are done */ |
| 953 | io_ctx->phase = 0; |
| 954 | } else { |
| 955 | /* |
| 956 | * Last cycle was long and caused us to miss one or more |
| 957 | * hardclock ticks. Restart processing again, but slightly |
| 958 | * reduce the burst size to prevent that this happens again. |
| 959 | */ |
| 960 | io_ctx->poll_burst -= (io_ctx->poll_burst / 8); |
| 961 | if (io_ctx->poll_burst < 1) |
| 962 | io_ctx->poll_burst = 1; |
| 963 | sched_iopoll(io_ctx); |
| 964 | io_ctx->phase = 6; |
| 965 | } |
| 966 | |
| 967 | crit_exit_quick(td); |
| 968 | } |
| 969 | |
| 970 | static void |
| 971 | txpollmore_handler(netmsg_t msg) |
| 972 | { |
| 973 | struct thread *td = curthread; |
| 974 | struct iopoll_ctx *io_ctx; |
| 975 | uint32_t pending_polls; |
| 976 | |
| 977 | io_ctx = msg->lmsg.u.ms_resultp; |
| 978 | KKASSERT(&td->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 979 | |
| 980 | crit_enter_quick(td); |
| 981 | |
| 982 | /* Replay ASAP */ |
| 983 | lwkt_replymsg(&msg->lmsg, 0); |
| 984 | |
| 985 | if (io_ctx->poll_handlers == 0) { |
| 986 | crit_exit_quick(td); |
| 987 | return; |
| 988 | } |
| 989 | |
| 990 | io_ctx->phase = 5; |
| 991 | |
| 992 | io_ctx->pending_polls--; |
| 993 | pending_polls = io_ctx->pending_polls; |
| 994 | |
| 995 | if (pending_polls == 0) { |
| 996 | /* We are done */ |
| 997 | io_ctx->phase = 0; |
| 998 | } else { |
| 999 | /* |
| 1000 | * Last cycle was long and caused us to miss one or more |
| 1001 | * hardclock ticks. Restart processing again. |
| 1002 | */ |
| 1003 | sched_iopoll(io_ctx); |
| 1004 | io_ctx->phase = 6; |
| 1005 | } |
| 1006 | |
| 1007 | crit_exit_quick(td); |
| 1008 | } |
| 1009 | |
| 1010 | static void |
| 1011 | iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, |
| 1012 | struct iopoll_ctx *io_ctx, int poll_type) |
| 1013 | { |
| 1014 | if (poll_type == IFPOLL_RX) { |
| 1015 | SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max", |
| 1016 | CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax, |
| 1017 | "IU", "Max Polling burst size"); |
| 1018 | |
| 1019 | SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst", |
| 1020 | CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst, |
| 1021 | "IU", "Max size of each burst"); |
| 1022 | |
| 1023 | SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD, |
| 1024 | &io_ctx->poll_burst, 0, "Current polling burst size"); |
| 1025 | |
| 1026 | SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW, |
| 1027 | &io_ctx->user_frac, 0, "Desired user fraction of cpu time"); |
| 1028 | |
| 1029 | SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD, |
| 1030 | &io_ctx->kern_frac, 0, "Kernel fraction of cpu time"); |
| 1031 | |
| 1032 | SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD, |
| 1033 | &io_ctx->residual_burst, 0, |
| 1034 | "# of residual cycles in burst"); |
| 1035 | } |
| 1036 | |
| 1037 | SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD, |
| 1038 | &io_ctx->phase, 0, "Polling phase"); |
| 1039 | |
| 1040 | SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW, |
| 1041 | &io_ctx->suspect, "Suspected events"); |
| 1042 | |
| 1043 | SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW, |
| 1044 | &io_ctx->stalled, "Potential stalls"); |
| 1045 | |
| 1046 | SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW, |
| 1047 | &io_ctx->short_ticks, |
| 1048 | "Hardclock ticks shorter than they should be"); |
| 1049 | |
| 1050 | SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW, |
| 1051 | &io_ctx->lost_polls, |
| 1052 | "How many times we would have lost a poll tick"); |
| 1053 | |
| 1054 | SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD, |
| 1055 | &io_ctx->pending_polls, 0, "Do we need to poll again"); |
| 1056 | |
| 1057 | SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD, |
| 1058 | &io_ctx->poll_handlers, 0, "Number of registered poll handlers"); |
| 1059 | } |
| 1060 | |
| 1061 | static void |
| 1062 | sysctl_burstmax_handler(netmsg_t nmsg) |
| 1063 | { |
| 1064 | struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; |
| 1065 | struct iopoll_ctx *io_ctx; |
| 1066 | |
| 1067 | io_ctx = msg->ctx; |
| 1068 | KKASSERT(&curthread->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 1069 | |
| 1070 | io_ctx->poll_burst_max = nmsg->lmsg.u.ms_result; |
| 1071 | if (io_ctx->poll_each_burst > io_ctx->poll_burst_max) |
| 1072 | io_ctx->poll_each_burst = io_ctx->poll_burst_max; |
| 1073 | if (io_ctx->poll_burst > io_ctx->poll_burst_max) |
| 1074 | io_ctx->poll_burst = io_ctx->poll_burst_max; |
| 1075 | if (io_ctx->residual_burst > io_ctx->poll_burst_max) |
| 1076 | io_ctx->residual_burst = io_ctx->poll_burst_max; |
| 1077 | |
| 1078 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 1079 | } |
| 1080 | |
| 1081 | static int |
| 1082 | sysctl_burstmax(SYSCTL_HANDLER_ARGS) |
| 1083 | { |
| 1084 | struct iopoll_ctx *io_ctx = arg1; |
| 1085 | struct iopoll_sysctl_netmsg msg; |
| 1086 | uint32_t burst_max; |
| 1087 | int error; |
| 1088 | |
| 1089 | burst_max = io_ctx->poll_burst_max; |
| 1090 | error = sysctl_handle_int(oidp, &burst_max, 0, req); |
| 1091 | if (error || req->newptr == NULL) |
| 1092 | return error; |
| 1093 | if (burst_max < MIN_IOPOLL_BURST_MAX) |
| 1094 | burst_max = MIN_IOPOLL_BURST_MAX; |
| 1095 | else if (burst_max > MAX_IOPOLL_BURST_MAX) |
| 1096 | burst_max = MAX_IOPOLL_BURST_MAX; |
| 1097 | |
| 1098 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, |
| 1099 | 0, sysctl_burstmax_handler); |
| 1100 | msg.base.lmsg.u.ms_result = burst_max; |
| 1101 | msg.ctx = io_ctx; |
| 1102 | |
| 1103 | return lwkt_domsg(netisr_portfn(io_ctx->poll_cpuid), &msg.base.lmsg, 0); |
| 1104 | } |
| 1105 | |
| 1106 | static void |
| 1107 | sysctl_eachburst_handler(netmsg_t nmsg) |
| 1108 | { |
| 1109 | struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; |
| 1110 | struct iopoll_ctx *io_ctx; |
| 1111 | uint32_t each_burst; |
| 1112 | |
| 1113 | io_ctx = msg->ctx; |
| 1114 | KKASSERT(&curthread->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 1115 | |
| 1116 | each_burst = nmsg->lmsg.u.ms_result; |
| 1117 | if (each_burst > io_ctx->poll_burst_max) |
| 1118 | each_burst = io_ctx->poll_burst_max; |
| 1119 | else if (each_burst < 1) |
| 1120 | each_burst = 1; |
| 1121 | io_ctx->poll_each_burst = each_burst; |
| 1122 | |
| 1123 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 1124 | } |
| 1125 | |
| 1126 | static int |
| 1127 | sysctl_eachburst(SYSCTL_HANDLER_ARGS) |
| 1128 | { |
| 1129 | struct iopoll_ctx *io_ctx = arg1; |
| 1130 | struct iopoll_sysctl_netmsg msg; |
| 1131 | uint32_t each_burst; |
| 1132 | int error; |
| 1133 | |
| 1134 | each_burst = io_ctx->poll_each_burst; |
| 1135 | error = sysctl_handle_int(oidp, &each_burst, 0, req); |
| 1136 | if (error || req->newptr == NULL) |
| 1137 | return error; |
| 1138 | |
| 1139 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, |
| 1140 | 0, sysctl_eachburst_handler); |
| 1141 | msg.base.lmsg.u.ms_result = each_burst; |
| 1142 | msg.ctx = io_ctx; |
| 1143 | |
| 1144 | return lwkt_domsg(netisr_portfn(io_ctx->poll_cpuid), &msg.base.lmsg, 0); |
| 1145 | } |
| 1146 | |
| 1147 | static int |
| 1148 | iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx, |
| 1149 | const struct ifpoll_io *io_rec) |
| 1150 | { |
| 1151 | int error; |
| 1152 | |
| 1153 | KKASSERT(&curthread->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 1154 | |
| 1155 | if (io_rec->poll_func == NULL) |
| 1156 | return 0; |
| 1157 | |
| 1158 | /* |
| 1159 | * Check if there is room. |
| 1160 | */ |
| 1161 | if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) { |
| 1162 | /* |
| 1163 | * List full, cannot register more entries. |
| 1164 | * This should never happen; if it does, it is probably a |
| 1165 | * broken driver trying to register multiple times. Checking |
| 1166 | * this at runtime is expensive, and won't solve the problem |
| 1167 | * anyways, so just report a few times and then give up. |
| 1168 | */ |
| 1169 | static int verbose = 10; /* XXX */ |
| 1170 | if (verbose > 0) { |
| 1171 | kprintf("io poll handlers list full, " |
| 1172 | "maybe a broken driver ?\n"); |
| 1173 | verbose--; |
| 1174 | } |
| 1175 | error = ENOENT; |
| 1176 | } else { |
| 1177 | struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers]; |
| 1178 | |
| 1179 | rec->ifp = ifp; |
| 1180 | rec->serializer = io_rec->serializer; |
| 1181 | rec->arg = io_rec->arg; |
| 1182 | rec->poll_func = io_rec->poll_func; |
| 1183 | |
| 1184 | io_ctx->poll_handlers++; |
| 1185 | error = 0; |
| 1186 | } |
| 1187 | return error; |
| 1188 | } |
| 1189 | |
| 1190 | static int |
| 1191 | iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx) |
| 1192 | { |
| 1193 | int i, error; |
| 1194 | |
| 1195 | KKASSERT(&curthread->td_msgport == netisr_portfn(io_ctx->poll_cpuid)); |
| 1196 | |
| 1197 | for (i = 0; i < io_ctx->poll_handlers; ++i) { |
| 1198 | if (io_ctx->pr[i].ifp == ifp) /* Found it */ |
| 1199 | break; |
| 1200 | } |
| 1201 | if (i == io_ctx->poll_handlers) { |
| 1202 | error = ENOENT; |
| 1203 | } else { |
| 1204 | io_ctx->poll_handlers--; |
| 1205 | if (i < io_ctx->poll_handlers) { |
| 1206 | /* Last entry replaces this one. */ |
| 1207 | io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers]; |
| 1208 | } |
| 1209 | |
| 1210 | if (io_ctx->poll_handlers == 0) |
| 1211 | iopoll_reset_state(io_ctx); |
| 1212 | error = 0; |
| 1213 | } |
| 1214 | return error; |
| 1215 | } |
| 1216 | |
| 1217 | static void |
| 1218 | poll_comm_init(int cpuid) |
| 1219 | { |
| 1220 | struct poll_comm *comm; |
| 1221 | char cpuid_str[16]; |
| 1222 | |
| 1223 | comm = kmalloc_cachealign(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO); |
| 1224 | |
| 1225 | if (ifpoll_stfrac < 0) |
| 1226 | ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; |
| 1227 | if (ifpoll_txfrac < 0) |
| 1228 | ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; |
| 1229 | |
| 1230 | comm->pollhz = ifpoll_pollhz; |
| 1231 | comm->poll_cpuid = cpuid; |
| 1232 | comm->poll_stfrac = ifpoll_stfrac; |
| 1233 | comm->poll_txfrac = ifpoll_txfrac; |
| 1234 | |
| 1235 | ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid); |
| 1236 | |
| 1237 | sysctl_ctx_init(&comm->sysctl_ctx); |
| 1238 | comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx, |
| 1239 | SYSCTL_STATIC_CHILDREN(_net_ifpoll), |
| 1240 | OID_AUTO, cpuid_str, CTLFLAG_RD, 0, ""); |
| 1241 | |
| 1242 | SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), |
| 1243 | OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW, |
| 1244 | comm, 0, sysctl_pollhz, |
| 1245 | "I", "Device polling frequency"); |
| 1246 | |
| 1247 | if (cpuid == 0) { |
| 1248 | SYSCTL_ADD_PROC(&comm->sysctl_ctx, |
| 1249 | SYSCTL_CHILDREN(comm->sysctl_tree), |
| 1250 | OID_AUTO, "status_frac", |
| 1251 | CTLTYPE_INT | CTLFLAG_RW, |
| 1252 | comm, 0, sysctl_stfrac, |
| 1253 | "I", "# of cycles before status is polled"); |
| 1254 | } |
| 1255 | SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), |
| 1256 | OID_AUTO, "tx_frac", CTLTYPE_INT | CTLFLAG_RW, |
| 1257 | comm, 0, sysctl_txfrac, |
| 1258 | "I", "# of cycles before TX is polled"); |
| 1259 | |
| 1260 | poll_common[cpuid] = comm; |
| 1261 | } |
| 1262 | |
| 1263 | static void |
| 1264 | poll_comm_start(int cpuid) |
| 1265 | { |
| 1266 | struct poll_comm *comm = poll_common[cpuid]; |
| 1267 | systimer_func_t func; |
| 1268 | |
| 1269 | /* |
| 1270 | * Initialize systimer |
| 1271 | */ |
| 1272 | if (cpuid == 0) |
| 1273 | func = poll_comm_systimer0; |
| 1274 | else |
| 1275 | func = poll_comm_systimer; |
| 1276 | systimer_init_periodic_nq(&comm->pollclock, func, comm, 1); |
| 1277 | } |
| 1278 | |
| 1279 | static void |
| 1280 | _poll_comm_systimer(struct poll_comm *comm) |
| 1281 | { |
| 1282 | if (comm->txfrac_count-- == 0) { |
| 1283 | comm->txfrac_count = comm->poll_txfrac; |
| 1284 | iopoll_clock(txpoll_context[comm->poll_cpuid]); |
| 1285 | } |
| 1286 | iopoll_clock(rxpoll_context[comm->poll_cpuid]); |
| 1287 | } |
| 1288 | |
| 1289 | static void |
| 1290 | poll_comm_systimer0(systimer_t info, int in_ipi __unused, |
| 1291 | struct intrframe *frame __unused) |
| 1292 | { |
| 1293 | struct poll_comm *comm = info->data; |
| 1294 | globaldata_t gd = mycpu; |
| 1295 | |
| 1296 | KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid == 0); |
| 1297 | |
| 1298 | crit_enter_gd(gd); |
| 1299 | |
| 1300 | if (comm->stfrac_count-- == 0) { |
| 1301 | comm->stfrac_count = comm->poll_stfrac; |
| 1302 | stpoll_clock(&stpoll_context); |
| 1303 | } |
| 1304 | _poll_comm_systimer(comm); |
| 1305 | |
| 1306 | crit_exit_gd(gd); |
| 1307 | } |
| 1308 | |
| 1309 | static void |
| 1310 | poll_comm_systimer(systimer_t info, int in_ipi __unused, |
| 1311 | struct intrframe *frame __unused) |
| 1312 | { |
| 1313 | struct poll_comm *comm = info->data; |
| 1314 | globaldata_t gd = mycpu; |
| 1315 | |
| 1316 | KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid != 0); |
| 1317 | |
| 1318 | crit_enter_gd(gd); |
| 1319 | _poll_comm_systimer(comm); |
| 1320 | crit_exit_gd(gd); |
| 1321 | } |
| 1322 | |
| 1323 | static void |
| 1324 | poll_comm_adjust_pollhz(struct poll_comm *comm) |
| 1325 | { |
| 1326 | uint32_t handlers; |
| 1327 | int pollhz = 1; |
| 1328 | |
| 1329 | KKASSERT(&curthread->td_msgport == netisr_portfn(comm->poll_cpuid)); |
| 1330 | |
| 1331 | /* |
| 1332 | * If there is no polling handler registered, set systimer |
| 1333 | * frequency to the lowest value. Polling systimer frequency |
| 1334 | * will be adjusted to the requested value, once there are |
| 1335 | * registered handlers. |
| 1336 | */ |
| 1337 | handlers = rxpoll_context[mycpuid]->poll_handlers + |
| 1338 | txpoll_context[mycpuid]->poll_handlers; |
| 1339 | if (comm->poll_cpuid == 0) |
| 1340 | handlers += stpoll_context.poll_handlers; |
| 1341 | if (handlers) |
| 1342 | pollhz = comm->pollhz; |
| 1343 | systimer_adjust_periodic(&comm->pollclock, pollhz); |
| 1344 | } |
| 1345 | |
| 1346 | static int |
| 1347 | sysctl_pollhz(SYSCTL_HANDLER_ARGS) |
| 1348 | { |
| 1349 | struct poll_comm *comm = arg1; |
| 1350 | struct netmsg_base nmsg; |
| 1351 | int error, phz; |
| 1352 | |
| 1353 | phz = comm->pollhz; |
| 1354 | error = sysctl_handle_int(oidp, &phz, 0, req); |
| 1355 | if (error || req->newptr == NULL) |
| 1356 | return error; |
| 1357 | if (phz <= 0) |
| 1358 | return EINVAL; |
| 1359 | else if (phz > IFPOLL_FREQ_MAX) |
| 1360 | phz = IFPOLL_FREQ_MAX; |
| 1361 | |
| 1362 | netmsg_init(&nmsg, NULL, &curthread->td_msgport, |
| 1363 | 0, sysctl_pollhz_handler); |
| 1364 | nmsg.lmsg.u.ms_result = phz; |
| 1365 | |
| 1366 | return lwkt_domsg(netisr_portfn(comm->poll_cpuid), &nmsg.lmsg, 0); |
| 1367 | } |
| 1368 | |
| 1369 | static void |
| 1370 | sysctl_pollhz_handler(netmsg_t nmsg) |
| 1371 | { |
| 1372 | struct poll_comm *comm = poll_common[mycpuid]; |
| 1373 | |
| 1374 | KKASSERT(&curthread->td_msgport == netisr_portfn(comm->poll_cpuid)); |
| 1375 | |
| 1376 | /* Save polling frequency */ |
| 1377 | comm->pollhz = nmsg->lmsg.u.ms_result; |
| 1378 | |
| 1379 | /* |
| 1380 | * Adjust cached pollhz |
| 1381 | */ |
| 1382 | rxpoll_context[mycpuid]->pollhz = comm->pollhz; |
| 1383 | txpoll_context[mycpuid]->pollhz = |
| 1384 | comm->pollhz / (comm->poll_txfrac + 1); |
| 1385 | |
| 1386 | /* |
| 1387 | * Adjust polling frequency |
| 1388 | */ |
| 1389 | poll_comm_adjust_pollhz(comm); |
| 1390 | |
| 1391 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 1392 | } |
| 1393 | |
| 1394 | static int |
| 1395 | sysctl_stfrac(SYSCTL_HANDLER_ARGS) |
| 1396 | { |
| 1397 | struct poll_comm *comm = arg1; |
| 1398 | struct netmsg_base nmsg; |
| 1399 | int error, stfrac; |
| 1400 | |
| 1401 | KKASSERT(comm->poll_cpuid == 0); |
| 1402 | |
| 1403 | stfrac = comm->poll_stfrac; |
| 1404 | error = sysctl_handle_int(oidp, &stfrac, 0, req); |
| 1405 | if (error || req->newptr == NULL) |
| 1406 | return error; |
| 1407 | if (stfrac < 0) |
| 1408 | return EINVAL; |
| 1409 | |
| 1410 | netmsg_init(&nmsg, NULL, &curthread->td_msgport, |
| 1411 | 0, sysctl_stfrac_handler); |
| 1412 | nmsg.lmsg.u.ms_result = stfrac; |
| 1413 | |
| 1414 | return lwkt_domsg(netisr_portfn(comm->poll_cpuid), &nmsg.lmsg, 0); |
| 1415 | } |
| 1416 | |
| 1417 | static void |
| 1418 | sysctl_stfrac_handler(netmsg_t nmsg) |
| 1419 | { |
| 1420 | struct poll_comm *comm = poll_common[mycpuid]; |
| 1421 | int stfrac = nmsg->lmsg.u.ms_result; |
| 1422 | |
| 1423 | KKASSERT(&curthread->td_msgport == netisr_portfn(comm->poll_cpuid)); |
| 1424 | |
| 1425 | crit_enter(); |
| 1426 | comm->poll_stfrac = stfrac; |
| 1427 | if (comm->stfrac_count > comm->poll_stfrac) |
| 1428 | comm->stfrac_count = comm->poll_stfrac; |
| 1429 | crit_exit(); |
| 1430 | |
| 1431 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 1432 | } |
| 1433 | |
| 1434 | static int |
| 1435 | sysctl_txfrac(SYSCTL_HANDLER_ARGS) |
| 1436 | { |
| 1437 | struct poll_comm *comm = arg1; |
| 1438 | struct netmsg_base nmsg; |
| 1439 | int error, txfrac; |
| 1440 | |
| 1441 | txfrac = comm->poll_txfrac; |
| 1442 | error = sysctl_handle_int(oidp, &txfrac, 0, req); |
| 1443 | if (error || req->newptr == NULL) |
| 1444 | return error; |
| 1445 | if (txfrac < 0) |
| 1446 | return EINVAL; |
| 1447 | |
| 1448 | netmsg_init(&nmsg, NULL, &curthread->td_msgport, |
| 1449 | 0, sysctl_txfrac_handler); |
| 1450 | nmsg.lmsg.u.ms_result = txfrac; |
| 1451 | |
| 1452 | return lwkt_domsg(netisr_portfn(comm->poll_cpuid), &nmsg.lmsg, 0); |
| 1453 | } |
| 1454 | |
| 1455 | static void |
| 1456 | sysctl_txfrac_handler(netmsg_t nmsg) |
| 1457 | { |
| 1458 | struct poll_comm *comm = poll_common[mycpuid]; |
| 1459 | int txfrac = nmsg->lmsg.u.ms_result; |
| 1460 | |
| 1461 | KKASSERT(&curthread->td_msgport == netisr_portfn(comm->poll_cpuid)); |
| 1462 | |
| 1463 | crit_enter(); |
| 1464 | comm->poll_txfrac = txfrac; |
| 1465 | if (comm->txfrac_count > comm->poll_txfrac) |
| 1466 | comm->txfrac_count = comm->poll_txfrac; |
| 1467 | crit_exit(); |
| 1468 | |
| 1469 | lwkt_replymsg(&nmsg->lmsg, 0); |
| 1470 | } |