| Commit | Line | Data |
|---|---|---|
| 66d6c637 JH |
1 | /* |
| 2 | * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. | |
| 3 | * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. | |
| 4 | * Copyright (c) 2003 Jonathan Lemon. All rights reserved. | |
| 5 | * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. | |
| 6 | * | |
| 7 | * This code is derived from software contributed to The DragonFly Project | |
| 8 | * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. | |
| 9 | * | |
| d849e575 MD |
10 | * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright |
| 11 | * into this one around July 8 2004. | |
| 12 | * | |
| 66d6c637 JH |
13 | * Redistribution and use in source and binary forms, with or without |
| 14 | * modification, are permitted provided that the following conditions | |
| 15 | * are met: | |
| 16 | * 1. Redistributions of source code must retain the above copyright | |
| 17 | * notice, this list of conditions and the following disclaimer. | |
| 18 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 19 | * notice, this list of conditions and the following disclaimer in the | |
| 20 | * documentation and/or other materials provided with the distribution. | |
| 21 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 22 | * contributors may be used to endorse or promote products derived | |
| 23 | * from this software without specific, prior written permission. | |
| 24 | * | |
| 25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 26 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 27 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 28 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 29 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 30 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 31 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 32 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 33 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 34 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 35 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| ef0fdad1 MD |
37 | */ |
| 38 | ||
| 39 | #include <sys/param.h> | |
| 40 | #include <sys/systm.h> | |
| bf82f9b7 | 41 | #include <sys/kernel.h> |
| 9eeaa8a9 | 42 | #include <sys/malloc.h> |
| bf82f9b7 | 43 | #include <sys/msgport.h> |
| ef0fdad1 MD |
44 | #include <sys/proc.h> |
| 45 | #include <sys/interrupt.h> | |
| 8bde602d JH |
46 | #include <sys/socket.h> |
| 47 | #include <sys/sysctl.h> | |
| 48e7b118 | 48 | #include <sys/socketvar.h> |
| 8bde602d JH |
49 | #include <net/if.h> |
| 50 | #include <net/if_var.h> | |
| ef0fdad1 MD |
51 | #include <net/netisr.h> |
| 52 | #include <machine/cpufunc.h> | |
| a91f9815 | 53 | #include <machine/smp.h> |
| ef0fdad1 | 54 | |
| 3227f1b8 MD |
55 | #include <sys/thread2.h> |
| 56 | #include <sys/msgport2.h> | |
| 4599cf19 | 57 | #include <net/netmsg2.h> |
| 684a93c4 | 58 | #include <sys/mplock2.h> |
| 3227f1b8 | 59 | |
| 002c1265 | 60 | static void netmsg_sync_func(netmsg_t msg); |
| c3c96e44 MD |
61 | static void netmsg_service_loop(void *arg); |
| 62 | static void cpu0_cpufn(struct mbuf **mp, int hoff); | |
| 5c703385 MD |
63 | |
| 64 | struct netmsg_port_registration { | |
| c3c96e44 MD |
65 | TAILQ_ENTRY(netmsg_port_registration) npr_entry; |
| 66 | lwkt_port_t npr_port; | |
| 67 | }; | |
| 68 | ||
| 69 | struct netmsg_rollup { | |
| 70 | TAILQ_ENTRY(netmsg_rollup) ru_entry; | |
| 71 | netisr_ru_t ru_func; | |
| 5c703385 MD |
72 | }; |
| 73 | ||
| a91f9815 SZ |
74 | struct netmsg_barrier { |
| 75 | struct netmsg_base base; | |
| 0503d1d0 | 76 | volatile cpumask_t *br_cpumask; |
| ca3321f8 | 77 | volatile uint32_t br_done; |
| a91f9815 SZ |
78 | }; |
| 79 | ||
| ca3321f8 SZ |
80 | #define NETISR_BR_NOTDONE 0 |
| 81 | #define NETISR_BR_DONE 1 | |
| 82 | #define NETISR_BR_WAITDONE 2 | |
| 83 | ||
| a91f9815 SZ |
84 | struct netisr_barrier { |
| 85 | struct netmsg_barrier *br_msgs[MAXCPU]; | |
| 86 | int br_isset; | |
| 87 | }; | |
| 88 | ||
| bf82f9b7 | 89 | static struct netisr netisrs[NETISR_MAX]; |
| 5c703385 | 90 | static TAILQ_HEAD(,netmsg_port_registration) netreglist; |
| c3c96e44 | 91 | static TAILQ_HEAD(,netmsg_rollup) netrulist; |
| bf82f9b7 MD |
92 | |
| 93 | /* Per-CPU thread to handle any protocol. */ | |
| c3c96e44 | 94 | static struct thread netisr_cpu[MAXCPU]; |
| 3227f1b8 | 95 | lwkt_port netisr_afree_rport; |
| c3d495a1 | 96 | lwkt_port netisr_afree_free_so_rport; |
| a29576fc | 97 | lwkt_port netisr_adone_rport; |
| 6aad077d | 98 | lwkt_port netisr_apanic_rport; |
| 3efe7008 | 99 | lwkt_port netisr_sync_port; |
| 3227f1b8 | 100 | |
| fb0f29c4 MD |
101 | static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); |
| 102 | ||
| 92db3805 | 103 | SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); |
| ff4a1403 | 104 | |
| 3227f1b8 MD |
105 | /* |
| 106 | * netisr_afree_rport replymsg function, only used to handle async | |
| 107 | * messages which the sender has abandoned to their fate. | |
| 108 | */ | |
| 109 | static void | |
| 110 | netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) | |
| 111 | { | |
| c3c96e44 | 112 | kfree(msg, M_LWKTMSG); |
| 3227f1b8 | 113 | } |
| ef0fdad1 | 114 | |
| c3d495a1 MD |
115 | static void |
| 116 | netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg) | |
| 117 | { | |
| 118 | sofree(((netmsg_t)msg)->base.nm_so); | |
| 119 | kfree(msg, M_LWKTMSG); | |
| 120 | } | |
| 121 | ||
| dc22b3aa | 122 | /* |
| fb0f29c4 MD |
123 | * We need a custom putport function to handle the case where the |
| 124 | * message target is the current thread's message port. This case | |
| 125 | * can occur when the TCP or UDP stack does a direct callback to NFS and NFS | |
| 126 | * then turns around and executes a network operation synchronously. | |
| 3efe7008 | 127 | * |
| fb0f29c4 MD |
128 | * To prevent deadlocking, we must execute these self-referential messages |
| 129 | * synchronously, effectively turning the message into a glorified direct | |
| 130 | * procedure call back into the protocol stack. The operation must be | |
| 131 | * complete on return or we will deadlock, so panic if it isn't. | |
| 002c1265 MD |
132 | * |
| 133 | * However, the target function is under no obligation to immediately | |
| 134 | * reply the message. It may forward it elsewhere. | |
| dc22b3aa | 135 | */ |
| 5c703385 | 136 | static int |
| dc22b3aa JH |
137 | netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) |
| 138 | { | |
| 002c1265 | 139 | netmsg_base_t nmsg = (void *)lmsg; |
| c3c96e44 MD |
140 | |
| 141 | if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { | |
| 002c1265 | 142 | nmsg->nm_dispatch((netmsg_t)nmsg); |
| c3c96e44 MD |
143 | return(EASYNC); |
| 144 | } else { | |
| 145 | return(netmsg_fwd_port_fn(port, lmsg)); | |
| 146 | } | |
| dc22b3aa JH |
147 | } |
| 148 | ||
| 3efe7008 MD |
149 | /* |
| 150 | * UNIX DOMAIN sockets still have to run their uipc functions synchronously, | |
| 151 | * because they depend on the user proc context for a number of things | |
| 152 | * (like creds) which we have not yet incorporated into the message structure. | |
| 153 | * | |
| 154 | * However, we maintain or message/port abstraction. Having a special | |
| 155 | * synchronous port which runs the commands synchronously gives us the | |
| 156 | * ability to serialize operations in one place later on when we start | |
| 157 | * removing the BGL. | |
| 3efe7008 MD |
158 | */ |
| 159 | static int | |
| 160 | netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) | |
| 161 | { | |
| 002c1265 | 162 | netmsg_base_t nmsg = (void *)lmsg; |
| 3efe7008 | 163 | |
| c3c96e44 | 164 | KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); |
| e0383bf3 | 165 | |
| c3c96e44 | 166 | lmsg->ms_target_port = port; /* required for abort */ |
| 002c1265 | 167 | nmsg->nm_dispatch((netmsg_t)nmsg); |
| c3c96e44 | 168 | return(EASYNC); |
| 3efe7008 MD |
169 | } |
| 170 | ||
| 171 | static void | |
| bf82f9b7 | 172 | netisr_init(void) |
| ef0fdad1 | 173 | { |
| c3c96e44 MD |
174 | int i; |
| 175 | ||
| 176 | TAILQ_INIT(&netreglist); | |
| 177 | TAILQ_INIT(&netrulist); | |
| 178 | ||
| 179 | /* | |
| 180 | * Create default per-cpu threads for generic protocol handling. | |
| 181 | */ | |
| 182 | for (i = 0; i < ncpus; ++i) { | |
| 183 | lwkt_create(netmsg_service_loop, NULL, NULL, | |
| 184 | &netisr_cpu[i], TDF_STOPREQ, i, | |
| 185 | "netisr_cpu %d", i); | |
| 186 | netmsg_service_port_init(&netisr_cpu[i].td_msgport); | |
| 187 | lwkt_schedule(&netisr_cpu[i]); | |
| 188 | } | |
| 189 | ||
| 190 | /* | |
| 191 | * The netisr_afree_rport is a special reply port which automatically | |
| 192 | * frees the replied message. The netisr_adone_rport simply marks | |
| 193 | * the message as being done. The netisr_apanic_rport panics if | |
| 194 | * the message is replied to. | |
| 195 | */ | |
| 196 | lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); | |
| c3d495a1 MD |
197 | lwkt_initport_replyonly(&netisr_afree_free_so_rport, |
| 198 | netisr_autofree_free_so_reply); | |
| c3c96e44 MD |
199 | lwkt_initport_replyonly_null(&netisr_adone_rport); |
| 200 | lwkt_initport_panic(&netisr_apanic_rport); | |
| 201 | ||
| 202 | /* | |
| 203 | * The netisr_syncport is a special port which executes the message | |
| 204 | * synchronously and waits for it if EASYNC is returned. | |
| 205 | */ | |
| 206 | lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); | |
| ef0fdad1 MD |
207 | } |
| 208 | ||
| b2632176 | 209 | SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); |
| bf82f9b7 | 210 | |
| 5c703385 MD |
211 | /* |
| 212 | * Finish initializing the message port for a netmsg service. This also | |
| 213 | * registers the port for synchronous cleanup operations such as when an | |
| 214 | * ifnet is being destroyed. There is no deregistration API yet. | |
| 215 | */ | |
| 216 | void | |
| 217 | netmsg_service_port_init(lwkt_port_t port) | |
| 218 | { | |
| c3c96e44 MD |
219 | struct netmsg_port_registration *reg; |
| 220 | ||
| 221 | /* | |
| 222 | * Override the putport function. Our custom function checks for | |
| 223 | * self-references and executes such commands synchronously. | |
| 224 | */ | |
| 225 | if (netmsg_fwd_port_fn == NULL) | |
| 226 | netmsg_fwd_port_fn = port->mp_putport; | |
| 227 | KKASSERT(netmsg_fwd_port_fn == port->mp_putport); | |
| 228 | port->mp_putport = netmsg_put_port; | |
| 229 | ||
| 230 | /* | |
| 231 | * Keep track of ports using the netmsg API so we can synchronize | |
| 232 | * certain operations (such as freeing an ifnet structure) across all | |
| 233 | * consumers. | |
| 234 | */ | |
| 235 | reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); | |
| 236 | reg->npr_port = port; | |
| 237 | TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); | |
| 5c703385 MD |
238 | } |
| 239 | ||
| 240 | /* | |
| 241 | * This function synchronizes the caller with all netmsg services. For | |
| 242 | * example, if an interface is being removed we must make sure that all | |
| 243 | * packets related to that interface complete processing before the structure | |
| 244 | * can actually be freed. This sort of synchronization is an alternative to | |
| 245 | * ref-counting the netif, removing the ref counting overhead in favor of | |
| 246 | * placing additional overhead in the netif freeing sequence (where it is | |
| 247 | * inconsequential). | |
| 248 | */ | |
| 249 | void | |
| 250 | netmsg_service_sync(void) | |
| 251 | { | |
| c3c96e44 | 252 | struct netmsg_port_registration *reg; |
| 002c1265 | 253 | struct netmsg_base smsg; |
| 5c703385 | 254 | |
| c3c96e44 | 255 | netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_func); |
| 5c703385 | 256 | |
| c3c96e44 | 257 | TAILQ_FOREACH(reg, &netreglist, npr_entry) { |
| 002c1265 | 258 | lwkt_domsg(reg->npr_port, &smsg.lmsg, 0); |
| c3c96e44 | 259 | } |
| 5c703385 MD |
260 | } |
| 261 | ||
| 262 | /* | |
| 263 | * The netmsg function simply replies the message. API semantics require | |
| 264 | * EASYNC to be returned if the netmsg function disposes of the message. | |
| 265 | */ | |
| 4599cf19 | 266 | static void |
| 002c1265 | 267 | netmsg_sync_func(netmsg_t msg) |
| 5c703385 | 268 | { |
| 002c1265 | 269 | lwkt_replymsg(&msg->lmsg, 0); |
| bf82f9b7 | 270 | } |
| 8bde602d | 271 | |
| bf82f9b7 | 272 | /* |
| 92db3805 SZ |
273 | * Generic netmsg service loop. Some protocols may roll their own but all |
| 274 | * must do the basic command dispatch function call done here. | |
| 95f8b5ce | 275 | */ |
| c3c96e44 | 276 | static void |
| 92db3805 | 277 | netmsg_service_loop(void *arg) |
| 95f8b5ce | 278 | { |
| c3c96e44 | 279 | struct netmsg_rollup *ru; |
| 002c1265 | 280 | netmsg_base_t msg; |
| c3c96e44 MD |
281 | thread_t td = curthread;; |
| 282 | int limit; | |
| 283 | ||
| 284 | while ((msg = lwkt_waitport(&td->td_msgport, 0))) { | |
| 285 | /* | |
| 286 | * Run up to 512 pending netmsgs. | |
| 287 | */ | |
| 288 | limit = 512; | |
| 289 | do { | |
| 290 | KASSERT(msg->nm_dispatch != NULL, | |
| 291 | ("netmsg_service isr %d badmsg\n", | |
| 002c1265 | 292 | msg->lmsg.u.ms_result)); |
| 0ce0603e MD |
293 | if (msg->nm_so && |
| 294 | msg->nm_so->so_port != &td->td_msgport) { | |
| 295 | /* | |
| 296 | * Sockets undergoing connect or disconnect | |
| 297 | * ops can change ports on us. Chase the | |
| 298 | * port. | |
| 299 | */ | |
| 300 | kprintf("netmsg_service_loop: Warning, " | |
| 301 | "port changed so=%p\n", msg->nm_so); | |
| 302 | lwkt_forwardmsg(msg->nm_so->so_port, | |
| 002c1265 | 303 | &msg->lmsg); |
| 0ce0603e MD |
304 | } else { |
| 305 | /* | |
| 306 | * We are on the correct port, dispatch it. | |
| 307 | */ | |
| 002c1265 | 308 | msg->nm_dispatch((netmsg_t)msg); |
| 0ce0603e | 309 | } |
| c3c96e44 MD |
310 | if (--limit == 0) |
| 311 | break; | |
| 312 | } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); | |
| 313 | ||
| 314 | /* | |
| 315 | * Run all registered rollup functions for this cpu | |
| 316 | * (e.g. tcp_willblock()). | |
| 317 | */ | |
| 318 | TAILQ_FOREACH(ru, &netrulist, ru_entry) | |
| 319 | ru->ru_func(); | |
| 320 | } | |
| 95f8b5ce SZ |
321 | } |
| 322 | ||
| 323 | /* | |
| c3c96e44 MD |
324 | * Forward a packet to a netisr service function. |
| 325 | * | |
| 326 | * If the packet has not been assigned to a protocol thread we call | |
| 327 | * the port characterization function to assign it. The caller must | |
| 328 | * clear M_HASH (or not have set it in the first place) if the caller | |
| 329 | * wishes the packet to be recharacterized. | |
| bf82f9b7 | 330 | */ |
| c3c96e44 MD |
331 | int |
| 332 | netisr_queue(int num, struct mbuf *m) | |
| bf82f9b7 | 333 | { |
| c3c96e44 MD |
334 | struct netisr *ni; |
| 335 | struct netmsg_packet *pmsg; | |
| 336 | lwkt_port_t port; | |
| 337 | ||
| c157ff7a | 338 | KASSERT((num > 0 && num <= NELEM(netisrs)), |
| c3c96e44 MD |
339 | ("Bad isr %d", num)); |
| 340 | ||
| 341 | ni = &netisrs[num]; | |
| 342 | if (ni->ni_handler == NULL) { | |
| 343 | kprintf("Unregistered isr %d\n", num); | |
| 344 | m_freem(m); | |
| 345 | return (EIO); | |
| 346 | } | |
| 347 | ||
| 348 | /* | |
| 349 | * Figure out which protocol thread to send to. This does not | |
| 350 | * have to be perfect but performance will be really good if it | |
| 351 | * is correct. Major protocol inputs such as ip_input() will | |
| 352 | * re-characterize the packet as necessary. | |
| 353 | */ | |
| 354 | if ((m->m_flags & M_HASH) == 0) { | |
| 355 | ni->ni_cpufn(&m, 0); | |
| 356 | if (m == NULL) { | |
| 357 | m_freem(m); | |
| 358 | return (EIO); | |
| 359 | } | |
| 360 | if ((m->m_flags & M_HASH) == 0) { | |
| 361 | kprintf("netisr_queue(%d): packet hash failed\n", num); | |
| 362 | m_freem(m); | |
| 363 | return (EIO); | |
| 364 | } | |
| 365 | } | |
| 366 | ||
| 367 | /* | |
| 368 | * Get the protocol port based on the packet hash, initialize | |
| 369 | * the netmsg, and send it off. | |
| 370 | */ | |
| 371 | port = cpu_portfn(m->m_pkthdr.hash); | |
| 372 | pmsg = &m->m_hdr.mh_netmsg; | |
| 002c1265 | 373 | netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, |
| c3c96e44 MD |
374 | 0, ni->ni_handler); |
| 375 | pmsg->nm_packet = m; | |
| 002c1265 MD |
376 | pmsg->base.lmsg.u.ms_result = num; |
| 377 | lwkt_sendmsg(port, &pmsg->base.lmsg); | |
| c3c96e44 MD |
378 | |
| 379 | return (0); | |
| 8bde602d JH |
380 | } |
| 381 | ||
| 382 | /* | |
| c3c96e44 MD |
383 | * Pre-characterization of a deeper portion of the packet for the |
| 384 | * requested isr. | |
| 385 | * | |
| 386 | * The base of the ISR type (e.g. IP) that we want to characterize is | |
| 387 | * at (hoff) relative to the beginning of the mbuf. This allows | |
| 388 | * e.g. ether_input_chain() to not have to adjust the m_data/m_len. | |
| 8bde602d | 389 | */ |
| c3c96e44 MD |
390 | void |
| 391 | netisr_characterize(int num, struct mbuf **mp, int hoff) | |
| ef0fdad1 | 392 | { |
| c3c96e44 MD |
393 | struct netisr *ni; |
| 394 | struct mbuf *m; | |
| 395 | ||
| 396 | /* | |
| 397 | * Validation | |
| 398 | */ | |
| c3c96e44 MD |
399 | m = *mp; |
| 400 | KKASSERT(m != NULL); | |
| 401 | ||
| e6318d16 MD |
402 | if (num < 0 || num >= NETISR_MAX) { |
| 403 | if (num == NETISR_MAX) { | |
| 404 | m->m_flags |= M_HASH; | |
| 405 | m->m_pkthdr.hash = 0; | |
| 406 | return; | |
| 407 | } | |
| 408 | panic("Bad isr %d", num); | |
| 409 | } | |
| 410 | ||
| c3c96e44 MD |
411 | /* |
| 412 | * Valid netisr? | |
| 413 | */ | |
| 414 | ni = &netisrs[num]; | |
| 415 | if (ni->ni_handler == NULL) { | |
| 416 | kprintf("Unregistered isr %d\n", num); | |
| 417 | m_freem(m); | |
| 418 | *mp = NULL; | |
| 419 | } | |
| 420 | ||
| 421 | /* | |
| 422 | * Characterize the packet | |
| 423 | */ | |
| 424 | if ((m->m_flags & M_HASH) == 0) { | |
| 425 | ni->ni_cpufn(mp, hoff); | |
| 426 | m = *mp; | |
| 427 | if (m && (m->m_flags & M_HASH) == 0) | |
| 428 | kprintf("netisr_queue(%d): packet hash failed\n", num); | |
| 429 | } | |
| 8bde602d JH |
430 | } |
| 431 | ||
| bf82f9b7 | 432 | void |
| c3c96e44 | 433 | netisr_register(int num, netisr_fn_t handler, netisr_cpufn_t cpufn) |
| 8bde602d | 434 | { |
| c3c96e44 | 435 | struct netisr *ni; |
| ef0fdad1 | 436 | |
| c157ff7a | 437 | KASSERT((num > 0 && num <= NELEM(netisrs)), |
| c3c96e44 MD |
438 | ("netisr_register: bad isr %d", num)); |
| 439 | KKASSERT(handler != NULL); | |
| 440 | ||
| 441 | if (cpufn == NULL) | |
| 442 | cpufn = cpu0_cpufn; | |
| 8bde602d | 443 | |
| c3c96e44 MD |
444 | ni = &netisrs[num]; |
| 445 | ||
| 446 | ni->ni_handler = handler; | |
| 447 | ni->ni_cpufn = cpufn; | |
| 448 | netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); | |
| ef0fdad1 | 449 | } |
| bf82f9b7 | 450 | |
| c3c96e44 MD |
451 | void |
| 452 | netisr_register_rollup(netisr_ru_t ru_func) | |
| bf82f9b7 | 453 | { |
| c3c96e44 | 454 | struct netmsg_rollup *ru; |
| 97a43e72 | 455 | |
| c3c96e44 MD |
456 | ru = kmalloc(sizeof(*ru), M_TEMP, M_WAITOK|M_ZERO); |
| 457 | ru->ru_func = ru_func; | |
| 458 | TAILQ_INSERT_TAIL(&netrulist, ru, ru_entry); | |
| bf82f9b7 MD |
459 | } |
| 460 | ||
| c3c96e44 MD |
461 | /* |
| 462 | * Return the message port for the general protocol message servicing | |
| 463 | * thread for a particular cpu. | |
| 464 | */ | |
| ecdefdda MD |
465 | lwkt_port_t |
| 466 | cpu_portfn(int cpu) | |
| 467 | { | |
| c3c96e44 MD |
468 | KKASSERT(cpu >= 0 && cpu < ncpus); |
| 469 | return (&netisr_cpu[cpu].td_msgport); | |
| ecdefdda MD |
470 | } |
| 471 | ||
| c244d613 | 472 | /* |
| c3c96e44 | 473 | * Return the current cpu's network protocol thread. |
| c244d613 SZ |
474 | */ |
| 475 | lwkt_port_t | |
| 476 | cur_netport(void) | |
| 477 | { | |
| c3c96e44 | 478 | return(cpu_portfn(mycpu->gd_cpuid)); |
| c244d613 SZ |
479 | } |
| 480 | ||
| c3c96e44 | 481 | /* |
| c3c96e44 MD |
482 | * Return a default protocol control message processing thread port |
| 483 | */ | |
| 3efe7008 | 484 | lwkt_port_t |
| e3873585 SZ |
485 | cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, |
| 486 | void *extra __unused) | |
| 487 | { | |
| c3c96e44 | 488 | return (&netisr_cpu[0].td_msgport); |
| e3873585 SZ |
489 | } |
| 490 | ||
| c3c96e44 | 491 | /* |
| c3c96e44 MD |
492 | * This is a default netisr packet characterization function which |
| 493 | * sets M_HASH. If a netisr is registered with a NULL cpufn function | |
| 494 | * this one is assigned. | |
| 495 | * | |
| 496 | * This function makes no attempt to validate the packet. | |
| 497 | */ | |
| 498 | static void | |
| 499 | cpu0_cpufn(struct mbuf **mp, int hoff __unused) | |
| 500 | { | |
| 501 | struct mbuf *m = *mp; | |
| 502 | ||
| 503 | m->m_flags |= M_HASH; | |
| 504 | m->m_pkthdr.hash = 0; | |
| 3efe7008 MD |
505 | } |
| 506 | ||
| bf82f9b7 | 507 | /* |
| a29576fc | 508 | * schednetisr() is used to call the netisr handler from the appropriate |
| 9eeaa8a9 | 509 | * netisr thread for polling and other purposes. |
| a29576fc MD |
510 | * |
| 511 | * This function may be called from a hard interrupt or IPI and must be | |
| 512 | * MP SAFE and non-blocking. We use a fixed per-cpu message instead of | |
| 513 | * trying to allocate one. We must get ourselves onto the target cpu | |
| 514 | * to safely check the MSGF_DONE bit on the message but since the message | |
| 515 | * will be sent to that cpu anyway this does not add any extra work beyond | |
| 516 | * what lwkt_sendmsg() would have already had to do to schedule the target | |
| 517 | * thread. | |
| bf82f9b7 | 518 | */ |
| a29576fc MD |
519 | static void |
| 520 | schednetisr_remote(void *data) | |
| bf82f9b7 | 521 | { |
| c3c96e44 MD |
522 | int num = (int)(intptr_t)data; |
| 523 | struct netisr *ni = &netisrs[num]; | |
| 524 | lwkt_port_t port = &netisr_cpu[0].td_msgport; | |
| 002c1265 | 525 | netmsg_base_t pmsg; |
| c3c96e44 MD |
526 | |
| 527 | pmsg = &netisrs[num].ni_netmsg; | |
| 002c1265 | 528 | if (pmsg->lmsg.ms_flags & MSGF_DONE) { |
| c3c96e44 | 529 | netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); |
| 002c1265 MD |
530 | pmsg->lmsg.u.ms_result = num; |
| 531 | lwkt_sendmsg(port, &pmsg->lmsg); | |
| c3c96e44 | 532 | } |
| a29576fc MD |
533 | } |
| 534 | ||
| 535 | void | |
| 536 | schednetisr(int num) | |
| 537 | { | |
| c157ff7a | 538 | KASSERT((num > 0 && num <= NELEM(netisrs)), |
| c3c96e44 MD |
539 | ("schednetisr: bad isr %d", num)); |
| 540 | KKASSERT(netisrs[num].ni_handler != NULL); | |
| a29576fc | 541 | #ifdef SMP |
| c3c96e44 MD |
542 | if (mycpu->gd_cpuid != 0) { |
| 543 | lwkt_send_ipiq(globaldata_find(0), | |
| 544 | schednetisr_remote, (void *)(intptr_t)num); | |
| 545 | } else { | |
| 546 | crit_enter(); | |
| 547 | schednetisr_remote((void *)(intptr_t)num); | |
| 548 | crit_exit(); | |
| 549 | } | |
| a29576fc | 550 | #else |
| c3c96e44 MD |
551 | crit_enter(); |
| 552 | schednetisr_remote((void *)(intptr_t)num); | |
| 553 | crit_exit(); | |
| a29576fc | 554 | #endif |
| bf82f9b7 | 555 | } |
| a91f9815 SZ |
556 | |
| 557 | #ifdef SMP | |
| 558 | ||
| 559 | static void | |
| 560 | netisr_barrier_dispatch(netmsg_t nmsg) | |
| 561 | { | |
| 562 | struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg; | |
| 563 | ||
| 564 | atomic_clear_cpumask(msg->br_cpumask, mycpu->gd_cpumask); | |
| 565 | if (*msg->br_cpumask == 0) | |
| 566 | wakeup(msg->br_cpumask); | |
| 567 | ||
| ca3321f8 SZ |
568 | while (msg->br_done == NETISR_BR_NOTDONE) { |
| 569 | cpu_ccfence(); | |
| 570 | tsleep_interlock(&msg->br_done, 0); | |
| 571 | if (atomic_cmpset_int(&msg->br_done, | |
| 572 | NETISR_BR_NOTDONE, NETISR_BR_WAITDONE)) | |
| 573 | tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0); | |
| 574 | } | |
| a91f9815 SZ |
575 | |
| 576 | lwkt_replymsg(&nmsg->lmsg, 0); | |
| 577 | } | |
| 578 | ||
| 579 | #endif | |
| 580 | ||
| 581 | struct netisr_barrier * | |
| 582 | netisr_barrier_create(void) | |
| 583 | { | |
| 584 | struct netisr_barrier *br; | |
| 585 | ||
| 586 | br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO); | |
| 587 | return br; | |
| 588 | } | |
| 589 | ||
| 590 | void | |
| 591 | netisr_barrier_set(struct netisr_barrier *br) | |
| 592 | { | |
| 593 | #ifdef SMP | |
| 0503d1d0 | 594 | volatile cpumask_t other_cpumask; |
| a91f9815 SZ |
595 | int i, cur_cpuid; |
| 596 | ||
| 597 | KKASSERT(&curthread->td_msgport == cpu_portfn(0)); | |
| 598 | KKASSERT(!br->br_isset); | |
| 599 | ||
| 600 | other_cpumask = mycpu->gd_other_cpus & smp_active_mask; | |
| 601 | cur_cpuid = mycpuid; | |
| 602 | ||
| 603 | for (i = 0; i < ncpus; ++i) { | |
| 604 | struct netmsg_barrier *msg; | |
| 605 | ||
| 606 | if (i == cur_cpuid) | |
| 607 | continue; | |
| 608 | ||
| 609 | msg = kmalloc(sizeof(struct netmsg_barrier), | |
| 610 | M_LWKTMSG, M_WAITOK); | |
| 611 | netmsg_init(&msg->base, NULL, &netisr_afree_rport, | |
| 612 | MSGF_PRIORITY, netisr_barrier_dispatch); | |
| 613 | msg->br_cpumask = &other_cpumask; | |
| ca3321f8 | 614 | msg->br_done = NETISR_BR_NOTDONE; |
| a91f9815 SZ |
615 | |
| 616 | KKASSERT(br->br_msgs[i] == NULL); | |
| 617 | br->br_msgs[i] = msg; | |
| 618 | } | |
| 619 | ||
| 620 | for (i = 0; i < ncpus; ++i) { | |
| 621 | if (i == cur_cpuid) | |
| 622 | continue; | |
| 623 | lwkt_sendmsg(cpu_portfn(i), &br->br_msgs[i]->base.lmsg); | |
| 624 | } | |
| 625 | ||
| 626 | while (other_cpumask != 0) { | |
| 627 | tsleep_interlock(&other_cpumask, 0); | |
| 628 | if (other_cpumask != 0) | |
| 629 | tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0); | |
| 630 | } | |
| 631 | #endif | |
| 632 | br->br_isset = 1; | |
| 633 | } | |
| 634 | ||
| 635 | void | |
| 636 | netisr_barrier_rem(struct netisr_barrier *br) | |
| 637 | { | |
| 638 | #ifdef SMP | |
| 639 | int i, cur_cpuid; | |
| 640 | ||
| 641 | KKASSERT(&curthread->td_msgport == cpu_portfn(0)); | |
| 642 | KKASSERT(br->br_isset); | |
| 643 | ||
| 644 | cur_cpuid = mycpuid; | |
| 645 | for (i = 0; i < ncpus; ++i) { | |
| 646 | struct netmsg_barrier *msg = br->br_msgs[i]; | |
| 647 | ||
| 648 | msg = br->br_msgs[i]; | |
| 649 | br->br_msgs[i] = NULL; | |
| 650 | ||
| 651 | if (i == cur_cpuid) | |
| 652 | continue; | |
| 653 | ||
| ca3321f8 SZ |
654 | for (;;) { |
| 655 | if (atomic_cmpset_int(&msg->br_done, | |
| 656 | NETISR_BR_WAITDONE, NETISR_BR_DONE)) { | |
| 657 | wakeup(&msg->br_done); | |
| 658 | break; | |
| 659 | } | |
| 660 | } | |
| a91f9815 SZ |
661 | } |
| 662 | #endif | |
| 663 | br->br_isset = 0; | |
| 664 | } |