| Commit | Line | Data |
|---|---|---|
| 66d6c637 JH |
1 | /* |
| 2 | * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. | |
| 3 | * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. | |
| 4 | * Copyright (c) 2003 Jonathan Lemon. All rights reserved. | |
| 5 | * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. | |
| 6 | * | |
| 7 | * This code is derived from software contributed to The DragonFly Project | |
| 8 | * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. | |
| 9 | * | |
| d849e575 MD |
10 | * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright |
| 11 | * into this one around July 8 2004. | |
| 12 | * | |
| 66d6c637 JH |
13 | * Redistribution and use in source and binary forms, with or without |
| 14 | * modification, are permitted provided that the following conditions | |
| 15 | * are met: | |
| 16 | * 1. Redistributions of source code must retain the above copyright | |
| 17 | * notice, this list of conditions and the following disclaimer. | |
| 18 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 19 | * notice, this list of conditions and the following disclaimer in the | |
| 20 | * documentation and/or other materials provided with the distribution. | |
| 21 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 22 | * contributors may be used to endorse or promote products derived | |
| 23 | * from this software without specific, prior written permission. | |
| 24 | * | |
| 25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 26 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 27 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 28 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 29 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 30 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 31 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 32 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 33 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 34 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 35 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| ef0fdad1 MD |
37 | */ |
| 38 | ||
| 39 | #include <sys/param.h> | |
| 40 | #include <sys/systm.h> | |
| bf82f9b7 | 41 | #include <sys/kernel.h> |
| 9eeaa8a9 | 42 | #include <sys/malloc.h> |
| bf82f9b7 | 43 | #include <sys/msgport.h> |
| ef0fdad1 MD |
44 | #include <sys/proc.h> |
| 45 | #include <sys/interrupt.h> | |
| 8bde602d JH |
46 | #include <sys/socket.h> |
| 47 | #include <sys/sysctl.h> | |
| 48e7b118 | 48 | #include <sys/socketvar.h> |
| 8bde602d JH |
49 | #include <net/if.h> |
| 50 | #include <net/if_var.h> | |
| ef0fdad1 MD |
51 | #include <net/netisr.h> |
| 52 | #include <machine/cpufunc.h> | |
| a91f9815 | 53 | #include <machine/smp.h> |
| ef0fdad1 | 54 | |
| 3227f1b8 MD |
55 | #include <sys/thread2.h> |
| 56 | #include <sys/msgport2.h> | |
| 4599cf19 | 57 | #include <net/netmsg2.h> |
| 684a93c4 | 58 | #include <sys/mplock2.h> |
| 3227f1b8 | 59 | |
| 002c1265 | 60 | static void netmsg_sync_func(netmsg_t msg); |
| c3c96e44 MD |
61 | static void netmsg_service_loop(void *arg); |
| 62 | static void cpu0_cpufn(struct mbuf **mp, int hoff); | |
| e6f77b88 | 63 | static void netisr_nohashck(struct mbuf *, const struct pktinfo *); |
| 5c703385 MD |
64 | |
| 65 | struct netmsg_port_registration { | |
| c3c96e44 MD |
66 | TAILQ_ENTRY(netmsg_port_registration) npr_entry; |
| 67 | lwkt_port_t npr_port; | |
| 68 | }; | |
| 69 | ||
| 70 | struct netmsg_rollup { | |
| 71 | TAILQ_ENTRY(netmsg_rollup) ru_entry; | |
| 72 | netisr_ru_t ru_func; | |
| 5c703385 MD |
73 | }; |
| 74 | ||
| a91f9815 SZ |
75 | struct netmsg_barrier { |
| 76 | struct netmsg_base base; | |
| 0503d1d0 | 77 | volatile cpumask_t *br_cpumask; |
| ca3321f8 | 78 | volatile uint32_t br_done; |
| a91f9815 SZ |
79 | }; |
| 80 | ||
| d0c7a72a SZ |
81 | #define NETISR_BR_NOTDONE 0x1 |
| 82 | #define NETISR_BR_WAITDONE 0x80000000 | |
| ca3321f8 | 83 | |
| a91f9815 SZ |
84 | struct netisr_barrier { |
| 85 | struct netmsg_barrier *br_msgs[MAXCPU]; | |
| 86 | int br_isset; | |
| 87 | }; | |
| 88 | ||
| bf82f9b7 | 89 | static struct netisr netisrs[NETISR_MAX]; |
| 5c703385 | 90 | static TAILQ_HEAD(,netmsg_port_registration) netreglist; |
| c3c96e44 | 91 | static TAILQ_HEAD(,netmsg_rollup) netrulist; |
| bf82f9b7 MD |
92 | |
| 93 | /* Per-CPU thread to handle any protocol. */ | |
| c3c96e44 | 94 | static struct thread netisr_cpu[MAXCPU]; |
| 3227f1b8 | 95 | lwkt_port netisr_afree_rport; |
| c3d495a1 | 96 | lwkt_port netisr_afree_free_so_rport; |
| a29576fc | 97 | lwkt_port netisr_adone_rport; |
| 6aad077d | 98 | lwkt_port netisr_apanic_rport; |
| 3efe7008 | 99 | lwkt_port netisr_sync_port; |
| 3227f1b8 | 100 | |
| fb0f29c4 MD |
101 | static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); |
| 102 | ||
| 92db3805 | 103 | SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); |
| ff4a1403 | 104 | |
| 3227f1b8 MD |
105 | /* |
| 106 | * netisr_afree_rport replymsg function, only used to handle async | |
| 107 | * messages which the sender has abandoned to their fate. | |
| 108 | */ | |
| 109 | static void | |
| 110 | netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) | |
| 111 | { | |
| c3c96e44 | 112 | kfree(msg, M_LWKTMSG); |
| 3227f1b8 | 113 | } |
| ef0fdad1 | 114 | |
| c3d495a1 MD |
115 | static void |
| 116 | netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg) | |
| 117 | { | |
| 118 | sofree(((netmsg_t)msg)->base.nm_so); | |
| 119 | kfree(msg, M_LWKTMSG); | |
| 120 | } | |
| 121 | ||
| dc22b3aa | 122 | /* |
| fb0f29c4 MD |
123 | * We need a custom putport function to handle the case where the |
| 124 | * message target is the current thread's message port. This case | |
| 125 | * can occur when the TCP or UDP stack does a direct callback to NFS and NFS | |
| 126 | * then turns around and executes a network operation synchronously. | |
| 3efe7008 | 127 | * |
| fb0f29c4 MD |
128 | * To prevent deadlocking, we must execute these self-referential messages |
| 129 | * synchronously, effectively turning the message into a glorified direct | |
| 130 | * procedure call back into the protocol stack. The operation must be | |
| 131 | * complete on return or we will deadlock, so panic if it isn't. | |
| 002c1265 MD |
132 | * |
| 133 | * However, the target function is under no obligation to immediately | |
| 134 | * reply the message. It may forward it elsewhere. | |
| dc22b3aa | 135 | */ |
| 5c703385 | 136 | static int |
| dc22b3aa JH |
137 | netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) |
| 138 | { | |
| 002c1265 | 139 | netmsg_base_t nmsg = (void *)lmsg; |
| c3c96e44 MD |
140 | |
| 141 | if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { | |
| 002c1265 | 142 | nmsg->nm_dispatch((netmsg_t)nmsg); |
| c3c96e44 MD |
143 | return(EASYNC); |
| 144 | } else { | |
| 145 | return(netmsg_fwd_port_fn(port, lmsg)); | |
| 146 | } | |
| dc22b3aa JH |
147 | } |
| 148 | ||
| 3efe7008 MD |
149 | /* |
| 150 | * UNIX DOMAIN sockets still have to run their uipc functions synchronously, | |
| 151 | * because they depend on the user proc context for a number of things | |
| 152 | * (like creds) which we have not yet incorporated into the message structure. | |
| 153 | * | |
| 154 | * However, we maintain or message/port abstraction. Having a special | |
| 155 | * synchronous port which runs the commands synchronously gives us the | |
| 156 | * ability to serialize operations in one place later on when we start | |
| 157 | * removing the BGL. | |
| 3efe7008 MD |
158 | */ |
| 159 | static int | |
| 160 | netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) | |
| 161 | { | |
| 002c1265 | 162 | netmsg_base_t nmsg = (void *)lmsg; |
| 3efe7008 | 163 | |
| c3c96e44 | 164 | KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); |
| e0383bf3 | 165 | |
| c3c96e44 | 166 | lmsg->ms_target_port = port; /* required for abort */ |
| 002c1265 | 167 | nmsg->nm_dispatch((netmsg_t)nmsg); |
| c3c96e44 | 168 | return(EASYNC); |
| 3efe7008 MD |
169 | } |
| 170 | ||
| 171 | static void | |
| bf82f9b7 | 172 | netisr_init(void) |
| ef0fdad1 | 173 | { |
| c3c96e44 MD |
174 | int i; |
| 175 | ||
| 176 | TAILQ_INIT(&netreglist); | |
| 177 | TAILQ_INIT(&netrulist); | |
| 178 | ||
| 179 | /* | |
| 180 | * Create default per-cpu threads for generic protocol handling. | |
| 181 | */ | |
| 182 | for (i = 0; i < ncpus; ++i) { | |
| 183 | lwkt_create(netmsg_service_loop, NULL, NULL, | |
| 4643740a | 184 | &netisr_cpu[i], TDF_NOSTART|TDF_FORCE_SPINPORT, |
| 392cd266 | 185 | i, "netisr_cpu %d", i); |
| c3c96e44 MD |
186 | netmsg_service_port_init(&netisr_cpu[i].td_msgport); |
| 187 | lwkt_schedule(&netisr_cpu[i]); | |
| 188 | } | |
| 189 | ||
| 190 | /* | |
| 191 | * The netisr_afree_rport is a special reply port which automatically | |
| 192 | * frees the replied message. The netisr_adone_rport simply marks | |
| 193 | * the message as being done. The netisr_apanic_rport panics if | |
| 194 | * the message is replied to. | |
| 195 | */ | |
| 196 | lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); | |
| c3d495a1 MD |
197 | lwkt_initport_replyonly(&netisr_afree_free_so_rport, |
| 198 | netisr_autofree_free_so_reply); | |
| c3c96e44 MD |
199 | lwkt_initport_replyonly_null(&netisr_adone_rport); |
| 200 | lwkt_initport_panic(&netisr_apanic_rport); | |
| 201 | ||
| 202 | /* | |
| 203 | * The netisr_syncport is a special port which executes the message | |
| 204 | * synchronously and waits for it if EASYNC is returned. | |
| 205 | */ | |
| 206 | lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); | |
| ef0fdad1 MD |
207 | } |
| 208 | ||
| b2632176 | 209 | SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); |
| bf82f9b7 | 210 | |
| 5c703385 MD |
211 | /* |
| 212 | * Finish initializing the message port for a netmsg service. This also | |
| 213 | * registers the port for synchronous cleanup operations such as when an | |
| 214 | * ifnet is being destroyed. There is no deregistration API yet. | |
| 215 | */ | |
| 216 | void | |
| 217 | netmsg_service_port_init(lwkt_port_t port) | |
| 218 | { | |
| c3c96e44 MD |
219 | struct netmsg_port_registration *reg; |
| 220 | ||
| 221 | /* | |
| 222 | * Override the putport function. Our custom function checks for | |
| 223 | * self-references and executes such commands synchronously. | |
| 224 | */ | |
| 225 | if (netmsg_fwd_port_fn == NULL) | |
| 226 | netmsg_fwd_port_fn = port->mp_putport; | |
| 227 | KKASSERT(netmsg_fwd_port_fn == port->mp_putport); | |
| 228 | port->mp_putport = netmsg_put_port; | |
| 229 | ||
| 230 | /* | |
| 231 | * Keep track of ports using the netmsg API so we can synchronize | |
| 232 | * certain operations (such as freeing an ifnet structure) across all | |
| 233 | * consumers. | |
| 234 | */ | |
| 235 | reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); | |
| 236 | reg->npr_port = port; | |
| 237 | TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); | |
| 5c703385 MD |
238 | } |
| 239 | ||
| 240 | /* | |
| 241 | * This function synchronizes the caller with all netmsg services. For | |
| 242 | * example, if an interface is being removed we must make sure that all | |
| 243 | * packets related to that interface complete processing before the structure | |
| 244 | * can actually be freed. This sort of synchronization is an alternative to | |
| 245 | * ref-counting the netif, removing the ref counting overhead in favor of | |
| 246 | * placing additional overhead in the netif freeing sequence (where it is | |
| 247 | * inconsequential). | |
| 248 | */ | |
| 249 | void | |
| 250 | netmsg_service_sync(void) | |
| 251 | { | |
| c3c96e44 | 252 | struct netmsg_port_registration *reg; |
| 002c1265 | 253 | struct netmsg_base smsg; |
| 5c703385 | 254 | |
| c3c96e44 | 255 | netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_func); |
| 5c703385 | 256 | |
| c3c96e44 | 257 | TAILQ_FOREACH(reg, &netreglist, npr_entry) { |
| 002c1265 | 258 | lwkt_domsg(reg->npr_port, &smsg.lmsg, 0); |
| c3c96e44 | 259 | } |
| 5c703385 MD |
260 | } |
| 261 | ||
| 262 | /* | |
| 263 | * The netmsg function simply replies the message. API semantics require | |
| 264 | * EASYNC to be returned if the netmsg function disposes of the message. | |
| 265 | */ | |
| 4599cf19 | 266 | static void |
| 002c1265 | 267 | netmsg_sync_func(netmsg_t msg) |
| 5c703385 | 268 | { |
| 002c1265 | 269 | lwkt_replymsg(&msg->lmsg, 0); |
| bf82f9b7 | 270 | } |
| 8bde602d | 271 | |
| bf82f9b7 | 272 | /* |
| 92db3805 SZ |
273 | * Generic netmsg service loop. Some protocols may roll their own but all |
| 274 | * must do the basic command dispatch function call done here. | |
| 95f8b5ce | 275 | */ |
| c3c96e44 | 276 | static void |
| 92db3805 | 277 | netmsg_service_loop(void *arg) |
| 95f8b5ce | 278 | { |
| c3c96e44 | 279 | struct netmsg_rollup *ru; |
| 002c1265 | 280 | netmsg_base_t msg; |
| c3c96e44 MD |
281 | thread_t td = curthread;; |
| 282 | int limit; | |
| 283 | ||
| 284 | while ((msg = lwkt_waitport(&td->td_msgport, 0))) { | |
| 285 | /* | |
| 286 | * Run up to 512 pending netmsgs. | |
| 287 | */ | |
| 288 | limit = 512; | |
| 289 | do { | |
| 290 | KASSERT(msg->nm_dispatch != NULL, | |
| 291 | ("netmsg_service isr %d badmsg\n", | |
| 002c1265 | 292 | msg->lmsg.u.ms_result)); |
| 0ce0603e MD |
293 | if (msg->nm_so && |
| 294 | msg->nm_so->so_port != &td->td_msgport) { | |
| 295 | /* | |
| 296 | * Sockets undergoing connect or disconnect | |
| 297 | * ops can change ports on us. Chase the | |
| 298 | * port. | |
| 299 | */ | |
| 300 | kprintf("netmsg_service_loop: Warning, " | |
| 301 | "port changed so=%p\n", msg->nm_so); | |
| 302 | lwkt_forwardmsg(msg->nm_so->so_port, | |
| 002c1265 | 303 | &msg->lmsg); |
| 0ce0603e MD |
304 | } else { |
| 305 | /* | |
| 306 | * We are on the correct port, dispatch it. | |
| 307 | */ | |
| 002c1265 | 308 | msg->nm_dispatch((netmsg_t)msg); |
| 0ce0603e | 309 | } |
| c3c96e44 MD |
310 | if (--limit == 0) |
| 311 | break; | |
| 312 | } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); | |
| 313 | ||
| 314 | /* | |
| 315 | * Run all registered rollup functions for this cpu | |
| 316 | * (e.g. tcp_willblock()). | |
| 317 | */ | |
| 318 | TAILQ_FOREACH(ru, &netrulist, ru_entry) | |
| 319 | ru->ru_func(); | |
| 320 | } | |
| 95f8b5ce SZ |
321 | } |
| 322 | ||
| 323 | /* | |
| c3c96e44 MD |
324 | * Forward a packet to a netisr service function. |
| 325 | * | |
| 326 | * If the packet has not been assigned to a protocol thread we call | |
| 327 | * the port characterization function to assign it. The caller must | |
| 328 | * clear M_HASH (or not have set it in the first place) if the caller | |
| 329 | * wishes the packet to be recharacterized. | |
| bf82f9b7 | 330 | */ |
| c3c96e44 MD |
331 | int |
| 332 | netisr_queue(int num, struct mbuf *m) | |
| bf82f9b7 | 333 | { |
| c3c96e44 MD |
334 | struct netisr *ni; |
| 335 | struct netmsg_packet *pmsg; | |
| 336 | lwkt_port_t port; | |
| 337 | ||
| c157ff7a | 338 | KASSERT((num > 0 && num <= NELEM(netisrs)), |
| c3c96e44 MD |
339 | ("Bad isr %d", num)); |
| 340 | ||
| 341 | ni = &netisrs[num]; | |
| 342 | if (ni->ni_handler == NULL) { | |
| 343 | kprintf("Unregistered isr %d\n", num); | |
| 344 | m_freem(m); | |
| 345 | return (EIO); | |
| 346 | } | |
| 347 | ||
| 348 | /* | |
| 349 | * Figure out which protocol thread to send to. This does not | |
| 350 | * have to be perfect but performance will be really good if it | |
| 351 | * is correct. Major protocol inputs such as ip_input() will | |
| 352 | * re-characterize the packet as necessary. | |
| 353 | */ | |
| 354 | if ((m->m_flags & M_HASH) == 0) { | |
| 355 | ni->ni_cpufn(&m, 0); | |
| 356 | if (m == NULL) { | |
| 357 | m_freem(m); | |
| 358 | return (EIO); | |
| 359 | } | |
| 360 | if ((m->m_flags & M_HASH) == 0) { | |
| 361 | kprintf("netisr_queue(%d): packet hash failed\n", num); | |
| 362 | m_freem(m); | |
| 363 | return (EIO); | |
| 364 | } | |
| 365 | } | |
| 366 | ||
| 367 | /* | |
| 368 | * Get the protocol port based on the packet hash, initialize | |
| 369 | * the netmsg, and send it off. | |
| 370 | */ | |
| 371 | port = cpu_portfn(m->m_pkthdr.hash); | |
| 372 | pmsg = &m->m_hdr.mh_netmsg; | |
| 002c1265 | 373 | netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, |
| c3c96e44 MD |
374 | 0, ni->ni_handler); |
| 375 | pmsg->nm_packet = m; | |
| 002c1265 MD |
376 | pmsg->base.lmsg.u.ms_result = num; |
| 377 | lwkt_sendmsg(port, &pmsg->base.lmsg); | |
| c3c96e44 MD |
378 | |
| 379 | return (0); | |
| 8bde602d JH |
380 | } |
| 381 | ||
| 382 | /* | |
| ebe4c2ae SZ |
383 | * Run a netisr service function on the packet. |
| 384 | * | |
| 385 | * The packet must have been correctly characterized! | |
| 386 | */ | |
| 387 | int | |
| 388 | netisr_handle(int num, struct mbuf *m) | |
| 389 | { | |
| 390 | struct netisr *ni; | |
| 391 | struct netmsg_packet *pmsg; | |
| 392 | lwkt_port_t port; | |
| 393 | ||
| 394 | /* | |
| 395 | * Get the protocol port based on the packet hash | |
| 396 | */ | |
| 397 | KASSERT((m->m_flags & M_HASH), ("packet not characterized\n")); | |
| 398 | port = cpu_portfn(m->m_pkthdr.hash); | |
| 399 | KASSERT(&curthread->td_msgport == port, ("wrong msgport\n")); | |
| 400 | ||
| 401 | KASSERT((num > 0 && num <= NELEM(netisrs)), ("bad isr %d", num)); | |
| 402 | ni = &netisrs[num]; | |
| 403 | if (ni->ni_handler == NULL) { | |
| 404 | kprintf("unregistered isr %d\n", num); | |
| 405 | m_freem(m); | |
| 406 | return EIO; | |
| 407 | } | |
| 408 | ||
| 409 | /* | |
| 410 | * Initialize the netmsg, and run the handler directly. | |
| 411 | */ | |
| 412 | pmsg = &m->m_hdr.mh_netmsg; | |
| 413 | netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, | |
| 414 | 0, ni->ni_handler); | |
| 415 | pmsg->nm_packet = m; | |
| 416 | pmsg->base.lmsg.u.ms_result = num; | |
| 417 | ni->ni_handler((netmsg_t)&pmsg->base); | |
| 418 | ||
| 419 | return 0; | |
| 420 | } | |
| 421 | ||
| 422 | /* | |
| c3c96e44 MD |
423 | * Pre-characterization of a deeper portion of the packet for the |
| 424 | * requested isr. | |
| 425 | * | |
| 426 | * The base of the ISR type (e.g. IP) that we want to characterize is | |
| 427 | * at (hoff) relative to the beginning of the mbuf. This allows | |
| 428 | * e.g. ether_input_chain() to not have to adjust the m_data/m_len. | |
| 8bde602d | 429 | */ |
| c3c96e44 MD |
430 | void |
| 431 | netisr_characterize(int num, struct mbuf **mp, int hoff) | |
| ef0fdad1 | 432 | { |
| c3c96e44 MD |
433 | struct netisr *ni; |
| 434 | struct mbuf *m; | |
| 435 | ||
| 436 | /* | |
| 437 | * Validation | |
| 438 | */ | |
| c3c96e44 MD |
439 | m = *mp; |
| 440 | KKASSERT(m != NULL); | |
| 441 | ||
| e6318d16 MD |
442 | if (num < 0 || num >= NETISR_MAX) { |
| 443 | if (num == NETISR_MAX) { | |
| 444 | m->m_flags |= M_HASH; | |
| 445 | m->m_pkthdr.hash = 0; | |
| 446 | return; | |
| 447 | } | |
| 448 | panic("Bad isr %d", num); | |
| 449 | } | |
| 450 | ||
| c3c96e44 MD |
451 | /* |
| 452 | * Valid netisr? | |
| 453 | */ | |
| 454 | ni = &netisrs[num]; | |
| 455 | if (ni->ni_handler == NULL) { | |
| 456 | kprintf("Unregistered isr %d\n", num); | |
| 457 | m_freem(m); | |
| 458 | *mp = NULL; | |
| 459 | } | |
| 460 | ||
| 461 | /* | |
| 462 | * Characterize the packet | |
| 463 | */ | |
| 464 | if ((m->m_flags & M_HASH) == 0) { | |
| 465 | ni->ni_cpufn(mp, hoff); | |
| 466 | m = *mp; | |
| 467 | if (m && (m->m_flags & M_HASH) == 0) | |
| 468 | kprintf("netisr_queue(%d): packet hash failed\n", num); | |
| 469 | } | |
| 8bde602d JH |
470 | } |
| 471 | ||
| bf82f9b7 | 472 | void |
| c3c96e44 | 473 | netisr_register(int num, netisr_fn_t handler, netisr_cpufn_t cpufn) |
| 8bde602d | 474 | { |
| c3c96e44 | 475 | struct netisr *ni; |
| ef0fdad1 | 476 | |
| c157ff7a | 477 | KASSERT((num > 0 && num <= NELEM(netisrs)), |
| c3c96e44 MD |
478 | ("netisr_register: bad isr %d", num)); |
| 479 | KKASSERT(handler != NULL); | |
| 480 | ||
| 481 | if (cpufn == NULL) | |
| 482 | cpufn = cpu0_cpufn; | |
| 8bde602d | 483 | |
| c3c96e44 MD |
484 | ni = &netisrs[num]; |
| 485 | ||
| 486 | ni->ni_handler = handler; | |
| e6f77b88 | 487 | ni->ni_hashck = netisr_nohashck; |
| c3c96e44 MD |
488 | ni->ni_cpufn = cpufn; |
| 489 | netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); | |
| ef0fdad1 | 490 | } |
| bf82f9b7 | 491 | |
| c3c96e44 | 492 | void |
| e6f77b88 SZ |
493 | netisr_register_hashcheck(int num, netisr_hashck_t hashck) |
| 494 | { | |
| 495 | struct netisr *ni; | |
| 496 | ||
| 497 | KASSERT((num > 0 && num <= NELEM(netisrs)), | |
| 498 | ("netisr_register: bad isr %d", num)); | |
| 499 | ||
| 500 | ni = &netisrs[num]; | |
| 501 | ni->ni_hashck = hashck; | |
| 502 | } | |
| 503 | ||
| 504 | void | |
| c3c96e44 | 505 | netisr_register_rollup(netisr_ru_t ru_func) |
| bf82f9b7 | 506 | { |
| c3c96e44 | 507 | struct netmsg_rollup *ru; |
| 97a43e72 | 508 | |
| c3c96e44 MD |
509 | ru = kmalloc(sizeof(*ru), M_TEMP, M_WAITOK|M_ZERO); |
| 510 | ru->ru_func = ru_func; | |
| 511 | TAILQ_INSERT_TAIL(&netrulist, ru, ru_entry); | |
| bf82f9b7 MD |
512 | } |
| 513 | ||
| c3c96e44 MD |
514 | /* |
| 515 | * Return the message port for the general protocol message servicing | |
| 516 | * thread for a particular cpu. | |
| 517 | */ | |
| ecdefdda MD |
518 | lwkt_port_t |
| 519 | cpu_portfn(int cpu) | |
| 520 | { | |
| c3c96e44 MD |
521 | KKASSERT(cpu >= 0 && cpu < ncpus); |
| 522 | return (&netisr_cpu[cpu].td_msgport); | |
| ecdefdda MD |
523 | } |
| 524 | ||
| c244d613 | 525 | /* |
| c3c96e44 | 526 | * Return the current cpu's network protocol thread. |
| c244d613 SZ |
527 | */ |
| 528 | lwkt_port_t | |
| 529 | cur_netport(void) | |
| 530 | { | |
| c3c96e44 | 531 | return(cpu_portfn(mycpu->gd_cpuid)); |
| c244d613 SZ |
532 | } |
| 533 | ||
| c3c96e44 | 534 | /* |
| c3c96e44 MD |
535 | * Return a default protocol control message processing thread port |
| 536 | */ | |
| 3efe7008 | 537 | lwkt_port_t |
| e3873585 SZ |
538 | cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, |
| 539 | void *extra __unused) | |
| 540 | { | |
| c3c96e44 | 541 | return (&netisr_cpu[0].td_msgport); |
| e3873585 SZ |
542 | } |
| 543 | ||
| c3c96e44 | 544 | /* |
| c3c96e44 MD |
545 | * This is a default netisr packet characterization function which |
| 546 | * sets M_HASH. If a netisr is registered with a NULL cpufn function | |
| 547 | * this one is assigned. | |
| 548 | * | |
| 549 | * This function makes no attempt to validate the packet. | |
| 550 | */ | |
| 551 | static void | |
| 552 | cpu0_cpufn(struct mbuf **mp, int hoff __unused) | |
| 553 | { | |
| 554 | struct mbuf *m = *mp; | |
| 555 | ||
| 556 | m->m_flags |= M_HASH; | |
| 557 | m->m_pkthdr.hash = 0; | |
| 3efe7008 MD |
558 | } |
| 559 | ||
| bf82f9b7 | 560 | /* |
| a29576fc | 561 | * schednetisr() is used to call the netisr handler from the appropriate |
| 9eeaa8a9 | 562 | * netisr thread for polling and other purposes. |
| a29576fc MD |
563 | * |
| 564 | * This function may be called from a hard interrupt or IPI and must be | |
| 565 | * MP SAFE and non-blocking. We use a fixed per-cpu message instead of | |
| 566 | * trying to allocate one. We must get ourselves onto the target cpu | |
| 567 | * to safely check the MSGF_DONE bit on the message but since the message | |
| 568 | * will be sent to that cpu anyway this does not add any extra work beyond | |
| 569 | * what lwkt_sendmsg() would have already had to do to schedule the target | |
| 570 | * thread. | |
| bf82f9b7 | 571 | */ |
| a29576fc MD |
572 | static void |
| 573 | schednetisr_remote(void *data) | |
| bf82f9b7 | 574 | { |
| c3c96e44 MD |
575 | int num = (int)(intptr_t)data; |
| 576 | struct netisr *ni = &netisrs[num]; | |
| 577 | lwkt_port_t port = &netisr_cpu[0].td_msgport; | |
| 002c1265 | 578 | netmsg_base_t pmsg; |
| c3c96e44 MD |
579 | |
| 580 | pmsg = &netisrs[num].ni_netmsg; | |
| 002c1265 | 581 | if (pmsg->lmsg.ms_flags & MSGF_DONE) { |
| c3c96e44 | 582 | netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); |
| 002c1265 MD |
583 | pmsg->lmsg.u.ms_result = num; |
| 584 | lwkt_sendmsg(port, &pmsg->lmsg); | |
| c3c96e44 | 585 | } |
| a29576fc MD |
586 | } |
| 587 | ||
| 588 | void | |
| 589 | schednetisr(int num) | |
| 590 | { | |
| c157ff7a | 591 | KASSERT((num > 0 && num <= NELEM(netisrs)), |
| c3c96e44 MD |
592 | ("schednetisr: bad isr %d", num)); |
| 593 | KKASSERT(netisrs[num].ni_handler != NULL); | |
| a29576fc | 594 | #ifdef SMP |
| c3c96e44 MD |
595 | if (mycpu->gd_cpuid != 0) { |
| 596 | lwkt_send_ipiq(globaldata_find(0), | |
| 597 | schednetisr_remote, (void *)(intptr_t)num); | |
| 598 | } else { | |
| 599 | crit_enter(); | |
| 600 | schednetisr_remote((void *)(intptr_t)num); | |
| 601 | crit_exit(); | |
| 602 | } | |
| a29576fc | 603 | #else |
| c3c96e44 MD |
604 | crit_enter(); |
| 605 | schednetisr_remote((void *)(intptr_t)num); | |
| 606 | crit_exit(); | |
| a29576fc | 607 | #endif |
| bf82f9b7 | 608 | } |
| a91f9815 SZ |
609 | |
| 610 | #ifdef SMP | |
| 611 | ||
| 612 | static void | |
| 613 | netisr_barrier_dispatch(netmsg_t nmsg) | |
| 614 | { | |
| 615 | struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg; | |
| 616 | ||
| 617 | atomic_clear_cpumask(msg->br_cpumask, mycpu->gd_cpumask); | |
| 618 | if (*msg->br_cpumask == 0) | |
| 619 | wakeup(msg->br_cpumask); | |
| 620 | ||
| d0c7a72a SZ |
621 | for (;;) { |
| 622 | uint32_t done = msg->br_done; | |
| 623 | ||
| ca3321f8 | 624 | cpu_ccfence(); |
| d0c7a72a SZ |
625 | if ((done & NETISR_BR_NOTDONE) == 0) |
| 626 | break; | |
| 627 | ||
| ca3321f8 SZ |
628 | tsleep_interlock(&msg->br_done, 0); |
| 629 | if (atomic_cmpset_int(&msg->br_done, | |
| d0c7a72a | 630 | done, done | NETISR_BR_WAITDONE)) |
| ca3321f8 SZ |
631 | tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0); |
| 632 | } | |
| a91f9815 SZ |
633 | |
| 634 | lwkt_replymsg(&nmsg->lmsg, 0); | |
| 635 | } | |
| 636 | ||
| 637 | #endif | |
| 638 | ||
| 639 | struct netisr_barrier * | |
| 640 | netisr_barrier_create(void) | |
| 641 | { | |
| 642 | struct netisr_barrier *br; | |
| 643 | ||
| 644 | br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO); | |
| 645 | return br; | |
| 646 | } | |
| 647 | ||
| 648 | void | |
| 649 | netisr_barrier_set(struct netisr_barrier *br) | |
| 650 | { | |
| 651 | #ifdef SMP | |
| 0503d1d0 | 652 | volatile cpumask_t other_cpumask; |
| a91f9815 SZ |
653 | int i, cur_cpuid; |
| 654 | ||
| 655 | KKASSERT(&curthread->td_msgport == cpu_portfn(0)); | |
| 656 | KKASSERT(!br->br_isset); | |
| 657 | ||
| 658 | other_cpumask = mycpu->gd_other_cpus & smp_active_mask; | |
| 659 | cur_cpuid = mycpuid; | |
| 660 | ||
| 661 | for (i = 0; i < ncpus; ++i) { | |
| 662 | struct netmsg_barrier *msg; | |
| 663 | ||
| 664 | if (i == cur_cpuid) | |
| 665 | continue; | |
| 666 | ||
| 667 | msg = kmalloc(sizeof(struct netmsg_barrier), | |
| 668 | M_LWKTMSG, M_WAITOK); | |
| 669 | netmsg_init(&msg->base, NULL, &netisr_afree_rport, | |
| 670 | MSGF_PRIORITY, netisr_barrier_dispatch); | |
| 671 | msg->br_cpumask = &other_cpumask; | |
| ca3321f8 | 672 | msg->br_done = NETISR_BR_NOTDONE; |
| a91f9815 SZ |
673 | |
| 674 | KKASSERT(br->br_msgs[i] == NULL); | |
| 675 | br->br_msgs[i] = msg; | |
| 676 | } | |
| 677 | ||
| 678 | for (i = 0; i < ncpus; ++i) { | |
| 679 | if (i == cur_cpuid) | |
| 680 | continue; | |
| 681 | lwkt_sendmsg(cpu_portfn(i), &br->br_msgs[i]->base.lmsg); | |
| 682 | } | |
| 683 | ||
| 684 | while (other_cpumask != 0) { | |
| 685 | tsleep_interlock(&other_cpumask, 0); | |
| 686 | if (other_cpumask != 0) | |
| 687 | tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0); | |
| 688 | } | |
| 689 | #endif | |
| 690 | br->br_isset = 1; | |
| 691 | } | |
| 692 | ||
| 693 | void | |
| 694 | netisr_barrier_rem(struct netisr_barrier *br) | |
| 695 | { | |
| 696 | #ifdef SMP | |
| 697 | int i, cur_cpuid; | |
| 698 | ||
| 699 | KKASSERT(&curthread->td_msgport == cpu_portfn(0)); | |
| 700 | KKASSERT(br->br_isset); | |
| 701 | ||
| 702 | cur_cpuid = mycpuid; | |
| 703 | for (i = 0; i < ncpus; ++i) { | |
| 704 | struct netmsg_barrier *msg = br->br_msgs[i]; | |
| d0c7a72a | 705 | uint32_t done; |
| a91f9815 SZ |
706 | |
| 707 | msg = br->br_msgs[i]; | |
| 708 | br->br_msgs[i] = NULL; | |
| 709 | ||
| 710 | if (i == cur_cpuid) | |
| 711 | continue; | |
| 712 | ||
| d0c7a72a SZ |
713 | done = atomic_swap_int(&msg->br_done, 0); |
| 714 | if (done & NETISR_BR_WAITDONE) | |
| 715 | wakeup(&msg->br_done); | |
| a91f9815 SZ |
716 | } |
| 717 | #endif | |
| 718 | br->br_isset = 0; | |
| 719 | } | |
| e6f77b88 SZ |
720 | |
| 721 | static void | |
| 722 | netisr_nohashck(struct mbuf *m, const struct pktinfo *pi __unused) | |
| 723 | { | |
| 724 | m->m_flags &= ~M_HASH; | |
| 725 | } | |
| 726 | ||
| 727 | void | |
| 728 | netisr_hashcheck(int num, struct mbuf *m, const struct pktinfo *pi) | |
| 729 | { | |
| 730 | struct netisr *ni; | |
| 731 | ||
| 732 | if (num < 0 || num >= NETISR_MAX) | |
| 733 | panic("Bad isr %d", num); | |
| 734 | ||
| 735 | /* | |
| 736 | * Valid netisr? | |
| 737 | */ | |
| 738 | ni = &netisrs[num]; | |
| 739 | if (ni->ni_handler == NULL) | |
| 740 | panic("Unregistered isr %d\n", num); | |
| 741 | ||
| 742 | ni->ni_hashck(m, pi); | |
| 743 | } |