From c3c96e4421a1087a390825eac6c01c9ed9182387 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 9 Sep 2010 15:13:36 -0700 Subject: [PATCH] network - Completely revamp the netisr / dispatch code * All netisrs are dispatched MPSAFE (followup in later commits) * Centralize the protocol threads. There is now just one thread per cpu managed by the netisr code. No more separate tcp/udp threads on each cpu. * Make the mbuf M_HASH/m_pkthdr.hash mechanic the central routing mechanic for netmsgs. * Remove the netisr ip_mport and pktinfo_portfn stuff and replace with a cpufn function which handles M_HASH/m_pkthdr.hash when M_HASH is not already set. * Seriously clean up the packet input paths. Adjust ether_input_chain() and friends to not have to adjust the mbuf forwards and backwards, instead pass a header offset to the ni_cpufn function. The ip pullup and other related code will use the offset to determine where the ip header is within the packet. --- sys/bus/usb/usb_ethersubr.c | 7 +- sys/dev/acpica5/acpi_cpu_pstate.c | 14 +- sys/dev/netif/ic/if_ic.c | 2 +- sys/kern/kern_poll.c | 18 +- sys/net/bpf.c | 2 +- sys/net/bridge/if_bridge.c | 2 +- sys/net/ef/if_ef.c | 8 +- sys/net/faith/if_faith.c | 3 +- sys/net/gif/if_gif.c | 3 +- sys/net/if.c | 22 +- sys/net/if_atmsubr.c | 3 +- sys/net/if_ethersubr.c | 146 ++---- sys/net/if_poll.c | 20 +- sys/net/if_var.h | 1 - sys/net/ipfw/ip_fw2.c | 4 +- sys/net/netisr.c | 667 ++++++++++++--------------- sys/net/netisr.h | 39 +- sys/net/netmsg.h | 174 +++---- sys/net/netmsg2.h | 12 +- sys/net/ppp/if_ppp.c | 7 +- sys/net/sppp/if_spppsubr.c | 13 +- sys/net/stf/if_stf.c | 2 +- sys/net/tun/if_tun.c | 2 +- sys/netbt/bt_input.c | 4 + sys/netbt/bt_proto.c | 3 +- sys/netgraph/iface/ng_iface.c | 2 +- sys/netgraph/netgraph/ng_base.c | 8 +- sys/netgraph7/ng_base.c | 4 +- sys/netgraph7/ng_iface.c | 2 +- sys/netgraph7/ng_ip_input.c | 2 +- sys/netinet/if_ether.c | 20 +- sys/netinet/in_proto.c | 7 +- sys/netinet/ip_demux.c | 178 +++---- sys/netinet/ip_divert.c | 15 +- sys/netinet/ip_flow.c | 6 +- sys/netinet/ip_gre.c | 4 +- sys/netinet/ip_input.c | 121 ++--- sys/netinet/ip_var.h | 9 +- sys/netinet/tcp_subr.c | 65 +-- sys/netinet/tcp_syncache.c | 4 +- sys/netinet/tcp_usrreq.c | 4 +- sys/netinet/tcp_var.h | 1 - sys/netinet/udp_usrreq.c | 16 +- sys/netinet6/ah_input.c | 2 +- sys/netinet6/esp_input.c | 2 +- sys/netinet6/frag6.c | 5 +- sys/netinet6/in6_proto.c | 7 +- sys/netinet6/ip6_input.c | 5 +- sys/netinet6/ip6_var.h | 2 +- sys/netproto/atalk/aarp.c | 5 +- sys/netproto/atalk/ddp_input.c | 7 +- sys/netproto/atalk/ddp_usrreq.c | 9 +- sys/netproto/atm/atm_subr.c | 5 +- sys/netproto/atm/ipatm/ipatm_input.c | 2 +- sys/netproto/ipx/ipx_input.c | 8 +- sys/netproto/ipx/ipx_ip.c | 2 +- sys/netproto/mpls/mpls_demux.c | 39 +- sys/netproto/mpls/mpls_input.c | 15 +- sys/netproto/mpls/mpls_var.h | 3 +- sys/netproto/natm/natm.c | 11 +- sys/netproto/ns/ns_input.c | 8 +- sys/netproto/ns/ns_ip.c | 2 +- sys/sys/mbuf.h | 3 +- sys/sys/thread.h | 4 +- 64 files changed, 783 insertions(+), 1009 deletions(-) diff --git a/sys/bus/usb/usb_ethersubr.c b/sys/bus/usb/usb_ethersubr.c index 0cda6b4ead..c284b98ced 100644 --- a/sys/bus/usb/usb_ethersubr.c +++ b/sys/bus/usb/usb_ethersubr.c @@ -59,6 +59,7 @@ #include #include +#include #include #include @@ -79,9 +80,12 @@ usbintr(struct netmsg *msg) struct mbuf *m = ((struct netmsg_packet *)msg)->nm_packet; struct ifnet *ifp; + /* not MPSAFE */ + get_mplock(); ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); /* the msg is embedded in the mbuf, do not reply it */ + rel_mplock(); } void @@ -89,8 +93,7 @@ usb_register_netisr(void) { if (netisr_inited == 0) { netisr_inited = 1; - netisr_register(NETISR_USB, cpu0_portfn, pktinfo_portfn_notsupp, - usbintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_USB, usbintr, NULL); } } diff --git a/sys/dev/acpica5/acpi_cpu_pstate.c b/sys/dev/acpica5/acpi_cpu_pstate.c index 1e59491fbf..97f2ccbcde 100644 --- a/sys/dev/acpica5/acpi_cpu_pstate.c +++ b/sys/dev/acpica5/acpi_cpu_pstate.c @@ -940,8 +940,7 @@ acpi_pst_check_csr(struct acpi_pst_softc *sc) return 0; netmsg_init(&msg.nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE | MSGF_PRIORITY, - acpi_pst_check_csr_handler); + MSGF_PRIORITY, acpi_pst_check_csr_handler); msg.ctrl = &sc->pst_creg; msg.status = &sc->pst_sreg; @@ -966,8 +965,7 @@ acpi_pst_check_pstates(struct acpi_pst_softc *sc) return 0; netmsg_init(&nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE | MSGF_PRIORITY, - acpi_pst_check_pstates_handler); + MSGF_PRIORITY, acpi_pst_check_pstates_handler); return lwkt_domsg(cpu_portfn(sc->pst_cpuid), &nmsg.nm_lmsg, 0); } @@ -991,7 +989,7 @@ acpi_pst_init(struct acpi_pst_softc *sc) return 0; netmsg_init(&msg.nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE | MSGF_PRIORITY, acpi_pst_init_handler); + MSGF_PRIORITY, acpi_pst_init_handler); msg.ctrl = &sc->pst_creg; msg.status = &sc->pst_sreg; @@ -1017,8 +1015,7 @@ acpi_pst_set_pstate(struct acpi_pst_softc *sc, const struct acpi_pstate *pstate) KKASSERT(acpi_pst_md != NULL); netmsg_init(&msg.nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE | MSGF_PRIORITY, - acpi_pst_set_pstate_handler); + MSGF_PRIORITY, acpi_pst_set_pstate_handler); msg.nmsg.nm_lmsg.u.ms_resultp = __DECONST(void *, pstate); msg.ctrl = &sc->pst_creg; msg.status = &sc->pst_sreg; @@ -1047,8 +1044,7 @@ acpi_pst_get_pstate(struct acpi_pst_softc *sc) return 0; netmsg_init(&msg.nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE | MSGF_PRIORITY, - acpi_pst_get_pstate_handler); + MSGF_PRIORITY, acpi_pst_get_pstate_handler); msg.status = &sc->pst_sreg; lwkt_domsg(cpu_portfn(sc->pst_cpuid), &msg.nmsg.nm_lmsg, 0); diff --git a/sys/dev/netif/ic/if_ic.c b/sys/dev/netif/ic/if_ic.c index 1be8d0f760..72620dc2d3 100644 --- a/sys/dev/netif/ic/if_ic.c +++ b/sys/dev/netif/ic/if_ic.c @@ -296,7 +296,7 @@ icintr (device_t dev, int event, char *ptr) top = m_devget(sc->ic_ifbuf + ICHDRLEN, len, 0, &sc->ic_if, 0); if (top) - netisr_dispatch(NETISR_IP, top); + netisr_queue(NETISR_IP, top); break; err: diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c index 1504b37c19..97a983eb4d 100644 --- a/sys/kern/kern_poll.c +++ b/sys/kern/kern_poll.c @@ -258,13 +258,13 @@ init_device_poll_pcpu(int cpuid) poll_reset_state(pctx); netmsg_init(&pctx->poll_netmsg, NULL, &netisr_adone_rport, - MSGF_MPSAFE, netisr_poll); + 0, netisr_poll); #ifdef INVARIANTS pctx->poll_netmsg.nm_lmsg.u.ms_resultp = pctx; #endif netmsg_init(&pctx->poll_more_netmsg, NULL, &netisr_adone_rport, - MSGF_MPSAFE, netisr_pollmore); + 0, netisr_pollmore); #ifdef INVARIANTS pctx->poll_more_netmsg.nm_lmsg.u.ms_resultp = pctx; #endif @@ -342,7 +342,7 @@ sysctl_pollhz(SYSCTL_HANDLER_ARGS) phz = DEVICE_POLLING_FREQ_MAX; netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_sysctl_pollhz); + 0, poll_sysctl_pollhz); msg.nm_lmsg.u.ms_result = phz; port = cpu_portfn(pctx->poll_cpuid); @@ -367,7 +367,7 @@ sysctl_polling(SYSCTL_HANDLER_ARGS) return error; netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_sysctl_polling); + 0, poll_sysctl_polling); msg.nm_lmsg.u.ms_result = enabled; port = cpu_portfn(pctx->poll_cpuid); @@ -390,7 +390,7 @@ sysctl_regfrac(SYSCTL_HANDLER_ARGS) return error; netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_sysctl_regfrac); + 0, poll_sysctl_regfrac); msg.nm_lmsg.u.ms_result = reg_frac; port = cpu_portfn(pctx->poll_cpuid); @@ -417,7 +417,7 @@ sysctl_burstmax(SYSCTL_HANDLER_ARGS) burst_max = MAX_POLL_BURST_MAX; netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_sysctl_burstmax); + 0, poll_sysctl_burstmax); msg.nm_lmsg.u.ms_result = burst_max; port = cpu_portfn(pctx->poll_cpuid); @@ -440,7 +440,7 @@ sysctl_eachburst(SYSCTL_HANDLER_ARGS) return error; netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_sysctl_eachburst); + 0, poll_sysctl_eachburst); msg.nm_lmsg.u.ms_result = each_burst; port = cpu_portfn(pctx->poll_cpuid); @@ -767,7 +767,7 @@ ether_pollcpu_register(struct ifnet *ifp, int cpuid) ifnet_deserialize_all(ifp); netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_register); + 0, poll_register); msg.nm_lmsg.u.ms_resultp = ifp; port = cpu_portfn(cpuid); @@ -861,7 +861,7 @@ ether_poll_deregister(struct ifnet *ifp) ifnet_deserialize_all(ifp); netmsg_init(&msg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, poll_deregister); + 0, poll_deregister); msg.nm_lmsg.u.ms_resultp = ifp; port = cpu_portfn(cpuid); diff --git a/sys/net/bpf.c b/sys/net/bpf.c index afc885ec95..d8bbda106e 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -587,7 +587,7 @@ bpfwrite(struct dev_write_args *ap) dst.sa_family = pseudo_AF_HDRCMPLT; netmsg_init(&bmsg.nm_netmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, bpf_output_dispatch); + 0, bpf_output_dispatch); bmsg.nm_mbuf = m; bmsg.nm_ifp = ifp; bmsg.nm_dst = &dst; diff --git a/sys/net/bridge/if_bridge.c b/sys/net/bridge/if_bridge.c index a3e4e7104b..58ca156646 100644 --- a/sys/net/bridge/if_bridge.c +++ b/sys/net/bridge/if_bridge.c @@ -1830,7 +1830,7 @@ bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m) nmp->nm_packet = m; nmp->nm_netmsg.nm_lmsg.u.ms_resultp = dst_ifp; - lwkt_sendmsg(curnetport, &nmp->nm_netmsg.nm_lmsg); + lwkt_sendmsg(ifnet_portfn(mycpu->gd_cpuid), &nmp->nm_netmsg.nm_lmsg); } /* diff --git a/sys/net/ef/if_ef.c b/sys/net/ef/if_ef.c index 3f6ead08d9..86f617093e 100644 --- a/sys/net/ef/if_ef.c +++ b/sys/net/ef/if_ef.c @@ -253,7 +253,7 @@ ef_inputEII(struct mbuf *m, struct llc* l, u_short ether_type) default: return (EPROTONOSUPPORT); } - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } @@ -272,7 +272,7 @@ ef_inputSNAP(struct mbuf *m, struct llc* l, u_short ether_type) default: return (EPROTONOSUPPORT); } - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } @@ -291,7 +291,7 @@ ef_input8022(struct mbuf *m, struct llc* l, u_short ether_type) default: return (EPROTONOSUPPORT); } - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } @@ -384,7 +384,7 @@ ef_input(struct ifnet *ifp, const struct ether_header *eh, struct mbuf *m) ft, ether_type); return (EPROTONOSUPPORT); } - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } diff --git a/sys/net/faith/if_faith.c b/sys/net/faith/if_faith.c index 8663d17cd3..d452c49230 100644 --- a/sys/net/faith/if_faith.c +++ b/sys/net/faith/if_faith.c @@ -245,9 +245,10 @@ faithoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, /* XXX do we need more sanity checks? */ m->m_pkthdr.rcvif = ifp; + m->m_flags &= ~M_HASH; ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } diff --git a/sys/net/gif/if_gif.c b/sys/net/gif/if_gif.c index c21de80232..2c7fb3348b 100644 --- a/sys/net/gif/if_gif.c +++ b/sys/net/gif/if_gif.c @@ -427,7 +427,8 @@ gif_input(struct mbuf *m, int af, struct ifnet *ifp) ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; - netisr_dispatch(isr, m); + m->m_flags &= ~M_HASH; + netisr_queue(isr, m); return; } diff --git a/sys/net/if.c b/sys/net/if.c index 8fd4813c96..38664b70a7 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -171,7 +171,6 @@ struct callout if_slowtimo_timer; int if_index = 0; struct ifnet **ifindex2ifnet = NULL; static struct thread ifnet_threads[MAXCPU]; -static int ifnet_mpsafe_thread = NETMSG_SERVICE_MPSAFE; #define IFQ_KTR_STRING "ifq=%p" #define IFQ_KTR_ARG_SIZE (sizeof(void *)) @@ -2486,6 +2485,21 @@ ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu) lwkt_sendmsg(ifnet_portfn(cpu), lmsg); } +/* + * Generic netmsg service loop. Some protocols may roll their own but all + * must do the basic command dispatch function call done here. + */ +static void +ifnet_service_loop(void *arg __unused) +{ + struct netmsg *msg; + + while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { + KASSERT(msg->nm_dispatch, ("ifnet_service: badmsg")); + msg->nm_dispatch(msg); + } +} + static void ifnetinit(void *dummy __unused) { @@ -2494,10 +2508,10 @@ ifnetinit(void *dummy __unused) for (i = 0; i < ncpus; ++i) { struct thread *thr = &ifnet_threads[i]; - lwkt_create(netmsg_service_loop, &ifnet_mpsafe_thread, NULL, - thr, TDF_NETWORK, i, - "ifnet %d", i); + lwkt_create(ifnet_service_loop, NULL, NULL, + thr, TDF_STOPREQ, i, "ifnet %d", i); netmsg_service_port_init(&thr->td_msgport); + lwkt_schedule(thr); } } diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c index ffe7c701d6..f1df178abc 100644 --- a/sys/net/if_atmsubr.c +++ b/sys/net/if_atmsubr.c @@ -297,7 +297,8 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m, } } - netisr_dispatch(isr, m); + m->m_flags &= ~M_HASH; + netisr_queue(isr, m); } /* diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 2b736bc715..259af0412f 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -358,7 +358,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, */ if (bcmp(edst, &ns_thishost, ETHER_ADDR_LEN) == 0) { m->m_pkthdr.rcvif = ifp; - netisr_dispatch(NETISR_NS, m); + netisr_queue(NETISR_NS, m); return (error); } if (bcmp(edst, &ns_broadhost, ETHER_ADDR_LEN) == 0) @@ -1035,19 +1035,13 @@ static void ether_input_ipifunc(void *arg) { struct mbuf *m, *next; - lwkt_port_t port; + lwkt_port_t port = cpu_portfn(mycpu->gd_cpuid); m = arg; do { next = m->m_nextpkt; m->m_nextpkt = NULL; - - port = m->m_pkthdr.header; - m->m_pkthdr.header = NULL; - - lwkt_sendmsg(port, - &m->m_hdr.mh_netmsg.nm_netmsg.nm_lmsg); - + lwkt_sendmsg(port, &m->m_hdr.mh_netmsg.nm_netmsg.nm_lmsg); m = next; } while (m != NULL); } @@ -1062,7 +1056,7 @@ ether_input_dispatch(struct mbuf_chain *chain) for (i = 0; i < ncpus; ++i) { if (chain[i].mc_head != NULL) { lwkt_send_ipiq(globaldata_find(i), - ether_input_ipifunc, chain[i].mc_head); + ether_input_ipifunc, chain[i].mc_head); } } #else @@ -1222,7 +1216,7 @@ post_stats: #ifdef INET case ETHERTYPE_IP: if ((m->m_flags & M_LENCHECKED) == 0) { - if (!ip_lengthcheck(&m)) + if (!ip_lengthcheck(&m, 0)) return; } if (ipflow_fastforward(m)) @@ -1361,17 +1355,14 @@ dropanyway: return; } - if (!redispatch) - netisr_run(isr, m); - else - netisr_dispatch(isr, m); + netisr_queue(isr, m); } /* * First we perform any link layer operations, then continue to the * upper layers with ether_demux_oncpu(). */ -void +static void ether_input_oncpu(struct ifnet *ifp, struct mbuf *m) { if ((ifp->if_flags & (IFF_UP | IFF_MONITOR)) != IFF_UP) { @@ -1532,48 +1523,24 @@ ether_input_handler(struct netmsg *nmsg) ether_input_oncpu(ifp, m); } -static __inline void -ether_init_netpacket(int num, struct mbuf *m) +/* + * Send the packet to the target msgport or queue it into 'chain'. + */ +static void +ether_dispatch(int isr, struct mbuf *m, struct mbuf_chain *chain) { struct netmsg_packet *pmsg; + KKASSERT(m->m_flags & M_HASH); pmsg = &m->m_hdr.mh_netmsg; netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, - MSGF_MPSAFE, ether_input_handler); + 0, ether_input_handler); pmsg->nm_packet = m; - pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; -} - -static __inline struct lwkt_port * -ether_mport(int num, struct mbuf **m) -{ - if (num == NETISR_MAX) { - /* - * All packets whose target msgports can't be - * determined here are dispatched to netisr0, - * where further dispatching may happen. - */ - return cpu_portfn(0); - } - return netisr_find_port(num, m); -} - -/* - * Send the packet to the target msgport or - * queue it into 'chain'. - */ -static void -ether_dispatch(int isr, struct lwkt_port *port, struct mbuf *m, - struct mbuf_chain *chain) -{ - ether_init_netpacket(isr, m); + pmsg->nm_netmsg.nm_lmsg.u.ms_result = isr; if (chain != NULL) { + int cpuid = m->m_pkthdr.hash; struct mbuf_chain *c; - int cpuid; - - m->m_pkthdr.header = port; /* XXX */ - cpuid = port->mpu_td->td_gd->gd_cpuid; c = &chain[cpuid]; if (c->mc_head == NULL) { @@ -1584,7 +1551,8 @@ ether_dispatch(int isr, struct lwkt_port *port, struct mbuf *m, } m->m_nextpkt = NULL; } else { - lwkt_sendmsg(port, &m->m_hdr.mh_netmsg.nm_netmsg.nm_lmsg); + lwkt_sendmsg(cpu_portfn(m->m_pkthdr.hash), + &pmsg->nm_netmsg.nm_lmsg); } } @@ -1595,13 +1563,10 @@ ether_dispatch(int isr, struct lwkt_port *port, struct mbuf *m, * MUST MAKE SURE that there are at least sizeof(struct ether_header) * bytes in the first mbuf. * - * We first try to find the target msgport for this ether frame, if - * there is no target msgport for it, this ether frame is discarded, - * else we do following processing according to whether 'chain' is - * NULL or not: * - If 'chain' is NULL, this ether frame is sent to the target msgport * immediately. This situation happens when ether_input_chain is * accessed through ifnet.if_input. + * * - If 'chain' is not NULL, this ether frame is queued to the 'chain' * bucket indexed by the target msgport's cpuid and the target msgport * is saved in mbuf's m_pkthdr.m_head. Caller of ether_input_chain @@ -1613,8 +1578,7 @@ void ether_input_chain(struct ifnet *ifp, struct mbuf *m, const struct pktinfo *pi, struct mbuf_chain *chain) { - struct ether_header *eh, *save_eh, save_eh0; - struct lwkt_port *port; + struct ether_header *eh; uint16_t ether_type; int isr; @@ -1654,27 +1618,21 @@ ether_input_chain(struct ifnet *ifp, struct mbuf *m, const struct pktinfo *pi, return; } + /* + * If the packet has been characterized (pi->pi_netisr / M_HASH) + * we can dispatch it immediately without further inspection. + */ if (pi != NULL && (m->m_flags & M_HASH)) { #ifdef RSS_DEBUG ether_pktinfo_try++; #endif - /* Try finding the port using the packet info */ - port = netisr_find_pktinfo_port(pi, m); - if (port != NULL) { + ether_dispatch(pi->pi_netisr, m, chain); + #ifdef RSS_DEBUG - ether_pktinfo_hit++; + ether_pktinfo_hit++; #endif - ether_dispatch(pi->pi_netisr, port, m, chain); - - logether(chain_end, ifp); - return; - } - - /* - * The packet info does not contain enough - * information, we will have to check the - * packet content. - */ + logether(chain_end, ifp); + return; } #ifdef RSS_DEBUG else if (ifp->if_capenable & IFCAP_RSS) { @@ -1753,58 +1711,26 @@ ether_input_chain(struct ifnet *ifp, struct mbuf *m, const struct pktinfo *pi, default: /* * NETISR_MAX is an invalid value; it is chosen to let - * ether_mport() know that we are not able to decide - * this packet's msgport here. */ isr = NETISR_MAX; break; } /* - * If the packet is in contiguous memory, following - * m_adj() could ensure that the hidden ether header - * will not be destroyed, else we will have to save - * the ether header for the later restoration. - */ - if (m->m_pkthdr.len != m->m_len) { - save_eh0 = *eh; - save_eh = &save_eh0; - } else { - save_eh = NULL; - } - - /* - * Temporarily remove ether header; ether_mport() - * expects a packet without ether header. + * Ask the isr to characterize the packet since we couldn't. + * This is an attempt to optimally get us onto the correct protocol + * thread. */ - m_adj(m, sizeof(struct ether_header)); - - /* - * Find the packet's target msgport. - */ - port = ether_mport(isr, &m); - if (port == NULL) { - KKASSERT(m == NULL); + netisr_characterize(isr, &m, sizeof(struct ether_header)); + if (m == NULL) { logether(chain_end, ifp); return; } /* - * Restore ether header. + * Finally dispatch it */ - if (save_eh != NULL) { - ether_restore_header(&m, eh, save_eh); - if (m == NULL) { - logether(chain_end, ifp); - return; - } - } else { - m->m_data -= ETHER_HDR_LEN; - m->m_len += ETHER_HDR_LEN; - m->m_pkthdr.len += ETHER_HDR_LEN; - } - - ether_dispatch(isr, port, m, chain); + ether_dispatch(isr, m, chain); logether(chain_end, ifp); } diff --git a/sys/net/if_poll.c b/sys/net/if_poll.c index 541aa17ff2..3a11cb0ffd 100644 --- a/sys/net/if_poll.c +++ b/sys/net/if_poll.c @@ -375,7 +375,7 @@ ifpoll_register(struct ifnet *ifp) ifnet_deserialize_all(ifp); netmsg_init(&nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, ifpoll_register_handler); + 0, ifpoll_register_handler); nmsg.nm_lmsg.u.ms_resultp = &info; error = ifnet_domsg(&nmsg.nm_lmsg, 0); @@ -408,7 +408,7 @@ ifpoll_deregister(struct ifnet *ifp) ifnet_deserialize_all(ifp); netmsg_init(&nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, ifpoll_deregister_handler); + 0, ifpoll_deregister_handler); nmsg.nm_lmsg.u.ms_resultp = ifp; error = ifnet_domsg(&nmsg.nm_lmsg, 0); @@ -504,7 +504,7 @@ stpoll_init(void) "Number of registered status poll handlers"); netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport, - MSGF_MPSAFE, stpoll_handler); + 0, stpoll_handler); } /* @@ -691,11 +691,11 @@ iopoll_ctx_create(int cpuid, int poll_type) iopoll_reset_state(io_ctx); netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport, - MSGF_MPSAFE, iopoll_handler); + 0, iopoll_handler); io_ctx->poll_netmsg.nm_lmsg.u.ms_resultp = io_ctx; netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport, - MSGF_MPSAFE, iopollmore_handler); + 0, iopollmore_handler); io_ctx->poll_more_netmsg.nm_lmsg.u.ms_resultp = io_ctx; /* @@ -1006,7 +1006,7 @@ sysctl_burstmax(SYSCTL_HANDLER_ARGS) nmsg = &msg.nmsg; netmsg_init(nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, sysctl_burstmax_handler); + 0, sysctl_burstmax_handler); nmsg->nm_lmsg.u.ms_result = burst_max; msg.ctx = io_ctx; @@ -1049,7 +1049,7 @@ sysctl_eachburst(SYSCTL_HANDLER_ARGS) nmsg = &msg.nmsg; netmsg_init(nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, sysctl_eachburst_handler); + 0, sysctl_eachburst_handler); nmsg->nm_lmsg.u.ms_result = each_burst; msg.ctx = io_ctx; @@ -1270,7 +1270,7 @@ sysctl_pollhz(SYSCTL_HANDLER_ARGS) phz = IFPOLL_FREQ_MAX; netmsg_init(&nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, sysctl_pollhz_handler); + 0, sysctl_pollhz_handler); nmsg.nm_lmsg.u.ms_result = phz; return ifnet_domsg(&nmsg.nm_lmsg, comm->poll_cpuid); @@ -1320,7 +1320,7 @@ sysctl_stfrac(SYSCTL_HANDLER_ARGS) return EINVAL; netmsg_init(&nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, sysctl_stfrac_handler); + 0, sysctl_stfrac_handler); nmsg.nm_lmsg.u.ms_result = stfrac; return ifnet_domsg(&nmsg.nm_lmsg, comm->poll_cpuid); @@ -1358,7 +1358,7 @@ sysctl_txfrac(SYSCTL_HANDLER_ARGS) return EINVAL; netmsg_init(&nmsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, sysctl_txfrac_handler); + 0, sysctl_txfrac_handler); nmsg.nm_lmsg.u.ms_result = txfrac; return ifnet_domsg(&nmsg.nm_lmsg, comm->poll_cpuid); diff --git a/sys/net/if_var.h b/sys/net/if_var.h index c016aa8e7b..b1fe38a40f 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -721,7 +721,6 @@ void ether_ifattach_bpf(struct ifnet *, uint8_t *, u_int, u_int, struct lwkt_serialize *); void ether_ifdetach(struct ifnet *); void ether_demux_oncpu(struct ifnet *, struct mbuf *); -void ether_input_oncpu(struct ifnet *, struct mbuf *); void ether_reinput_oncpu(struct ifnet *, struct mbuf *, int); void ether_input_chain(struct ifnet *, struct mbuf *, const struct pktinfo *, struct mbuf_chain *); diff --git a/sys/net/ipfw/ip_fw2.c b/sys/net/ipfw/ip_fw2.c index 20bf240707..f6d3090dc3 100644 --- a/sys/net/ipfw/ip_fw2.c +++ b/sys/net/ipfw/ip_fw2.c @@ -2484,7 +2484,7 @@ ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) pkt->pipe_nr = pipe_nr; pkt->cpuid = mycpuid; - pkt->msgport = curnetport; + pkt->msgport = cur_netport(); id = &fwa->f_id; fid = &pkt->id; @@ -4440,7 +4440,7 @@ ipfw_init_dispatch(struct netmsg *nmsg) callout_init_mp(&ipfw_timeout_h); netmsg_init(&ipfw_timeout_netmsg, NULL, &netisr_adone_rport, - MSGF_MPSAFE | MSGF_DROPABLE | MSGF_PRIORITY, + MSGF_DROPABLE | MSGF_PRIORITY, ipfw_tick_dispatch); lockinit(&dyn_lock, "ipfw_dyn", 0, 0); diff --git a/sys/net/netisr.c b/sys/net/netisr.c index f28626e2e3..baf005ed57 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -58,30 +58,26 @@ #include #include -#define NETISR_GET_MPLOCK(ni) \ -do { \ - if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ - get_mplock(); \ -} while (0) - -#define NETISR_REL_MPLOCK(ni) \ -do { \ - if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ - rel_mplock(); \ -} while (0) - static void netmsg_sync_func(struct netmsg *msg); +static void netmsg_service_loop(void *arg); +static void cpu0_cpufn(struct mbuf **mp, int hoff); struct netmsg_port_registration { - TAILQ_ENTRY(netmsg_port_registration) npr_entry; - lwkt_port_t npr_port; + TAILQ_ENTRY(netmsg_port_registration) npr_entry; + lwkt_port_t npr_port; +}; + +struct netmsg_rollup { + TAILQ_ENTRY(netmsg_rollup) ru_entry; + netisr_ru_t ru_func; }; static struct netisr netisrs[NETISR_MAX]; static TAILQ_HEAD(,netmsg_port_registration) netreglist; +static TAILQ_HEAD(,netmsg_rollup) netrulist; /* Per-CPU thread to handle any protocol. */ -struct thread netisr_cpu[MAXCPU]; +static struct thread netisr_cpu[MAXCPU]; lwkt_port netisr_afree_rport; lwkt_port netisr_adone_rport; lwkt_port netisr_apanic_rport; @@ -89,23 +85,7 @@ lwkt_port netisr_sync_port; static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); -static int netisr_mpsafe_thread = NETMSG_SERVICE_ADAPTIVE; -TUNABLE_INT("net.netisr.mpsafe_thread", &netisr_mpsafe_thread); - SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); -SYSCTL_INT(_net_netisr, OID_AUTO, mpsafe_thread, CTLFLAG_RW, - &netisr_mpsafe_thread, 0, - "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); - -static __inline int -NETISR_TO_MSGF(const struct netisr *ni) -{ - int msg_flags = 0; - - if (ni->ni_flags & NETISR_FLAG_MPSAFE) - msg_flags |= MSGF_MPSAFE; - return msg_flags; -} /* * netisr_afree_rport replymsg function, only used to handle async @@ -114,7 +94,7 @@ NETISR_TO_MSGF(const struct netisr *ni) static void netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) { - kfree(msg, M_LWKTMSG); + kfree(msg, M_LWKTMSG); } /* @@ -131,16 +111,18 @@ netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) static int netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) { - netmsg_t netmsg = (void *)lmsg; - - if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { - netmsg->nm_dispatch(netmsg); - if ((lmsg->ms_flags & MSGF_DONE) == 0) - panic("netmsg_put_port: self-referential deadlock on netport"); - return(EASYNC); - } else { - return(netmsg_fwd_port_fn(port, lmsg)); - } + netmsg_t netmsg = (void *)lmsg; + + if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { + netmsg->nm_dispatch(netmsg); + if ((lmsg->ms_flags & MSGF_DONE) == 0) { + panic("netmsg_put_port: self-referential " + "deadlock on netport"); + } + return(EASYNC); + } else { + return(netmsg_fwd_port_fn(port, lmsg)); + } } /* @@ -156,47 +138,49 @@ netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) static int netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) { - netmsg_t netmsg = (void *)lmsg; + netmsg_t netmsg = (void *)lmsg; - KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); + KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); - lmsg->ms_target_port = port; /* required for abort */ - netmsg->nm_dispatch(netmsg); - return(EASYNC); + lmsg->ms_target_port = port; /* required for abort */ + netmsg->nm_dispatch(netmsg); + return(EASYNC); } static void netisr_init(void) { - int i; - - TAILQ_INIT(&netreglist); - - /* - * Create default per-cpu threads for generic protocol handling. - */ - for (i = 0; i < ncpus; ++i) { - lwkt_create(netmsg_service_loop, &netisr_mpsafe_thread, NULL, - &netisr_cpu[i], TDF_NETWORK, i, - "netisr_cpu %d", i); - netmsg_service_port_init(&netisr_cpu[i].td_msgport); - } - - /* - * The netisr_afree_rport is a special reply port which automatically - * frees the replied message. The netisr_adone_rport simply marks - * the message as being done. The netisr_apanic_rport panics if - * the message is replied to. - */ - lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); - lwkt_initport_replyonly_null(&netisr_adone_rport); - lwkt_initport_panic(&netisr_apanic_rport); - - /* - * The netisr_syncport is a special port which executes the message - * synchronously and waits for it if EASYNC is returned. - */ - lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); + int i; + + TAILQ_INIT(&netreglist); + TAILQ_INIT(&netrulist); + + /* + * Create default per-cpu threads for generic protocol handling. + */ + for (i = 0; i < ncpus; ++i) { + lwkt_create(netmsg_service_loop, NULL, NULL, + &netisr_cpu[i], TDF_STOPREQ, i, + "netisr_cpu %d", i); + netmsg_service_port_init(&netisr_cpu[i].td_msgport); + lwkt_schedule(&netisr_cpu[i]); + } + + /* + * The netisr_afree_rport is a special reply port which automatically + * frees the replied message. The netisr_adone_rport simply marks + * the message as being done. The netisr_apanic_rport panics if + * the message is replied to. + */ + lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); + lwkt_initport_replyonly_null(&netisr_adone_rport); + lwkt_initport_panic(&netisr_apanic_rport); + + /* + * The netisr_syncport is a special port which executes the message + * synchronously and waits for it if EASYNC is returned. + */ + lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); } SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); @@ -209,25 +193,25 @@ SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); void netmsg_service_port_init(lwkt_port_t port) { - struct netmsg_port_registration *reg; - - /* - * Override the putport function. Our custom function checks for - * self-references and executes such commands synchronously. - */ - if (netmsg_fwd_port_fn == NULL) - netmsg_fwd_port_fn = port->mp_putport; - KKASSERT(netmsg_fwd_port_fn == port->mp_putport); - port->mp_putport = netmsg_put_port; - - /* - * Keep track of ports using the netmsg API so we can synchronize - * certain operations (such as freeing an ifnet structure) across all - * consumers. - */ - reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); - reg->npr_port = port; - TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); + struct netmsg_port_registration *reg; + + /* + * Override the putport function. Our custom function checks for + * self-references and executes such commands synchronously. + */ + if (netmsg_fwd_port_fn == NULL) + netmsg_fwd_port_fn = port->mp_putport; + KKASSERT(netmsg_fwd_port_fn == port->mp_putport); + port->mp_putport = netmsg_put_port; + + /* + * Keep track of ports using the netmsg API so we can synchronize + * certain operations (such as freeing an ifnet structure) across all + * consumers. + */ + reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); + reg->npr_port = port; + TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); } /* @@ -242,15 +226,14 @@ netmsg_service_port_init(lwkt_port_t port) void netmsg_service_sync(void) { - struct netmsg_port_registration *reg; - struct netmsg smsg; + struct netmsg_port_registration *reg; + struct netmsg smsg; - netmsg_init(&smsg, NULL, &curthread->td_msgport, - MSGF_MPSAFE, netmsg_sync_func); + netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_func); - TAILQ_FOREACH(reg, &netreglist, npr_entry) { - lwkt_domsg(reg->npr_port, &smsg.nm_lmsg, 0); - } + TAILQ_FOREACH(reg, &netreglist, npr_entry) { + lwkt_domsg(reg->npr_port, &smsg.nm_lmsg, 0); + } } /* @@ -260,233 +243,240 @@ netmsg_service_sync(void) static void netmsg_sync_func(struct netmsg *msg) { - lwkt_replymsg(&msg->nm_lmsg, 0); -} - -/* - * Service a netmsg request and modify the BGL lock state if appropriate. - * The new BGL lock state is returned (1:locked, 0:unlocked). - */ -int -netmsg_service(struct netmsg *msg, int mpsafe_mode, int mplocked) -{ - /* - * If nm_so is non-NULL the message is related to a socket. Sockets - * can migrate between protocol processing threads when they connect, - * due to an implied connect during a sendmsg(), or when a connection - * is accepted. - * - * If this occurs any messages already queued to the original thread - * or which race the change must be forwarded to the new protocol - * processing port. - * - * MPSAFE - socket changes are synchronous to the current protocol port - * so if the port can only change out from under us if it is - * already different from the current port anyway so we forward - * it. It is possible to chase a changing port, which is fine. - */ - if (msg->nm_so && msg->nm_so->so_port != &curthread->td_msgport) { - lwkt_forwardmsg(msg->nm_so->so_port, &msg->nm_lmsg); - return(mplocked); - } - - /* - * Adjust the mplock dynamically. - */ - switch (mpsafe_mode) { - case NETMSG_SERVICE_ADAPTIVE: /* Adaptive BGL */ - if (msg->nm_lmsg.ms_flags & MSGF_MPSAFE) { - if (mplocked) { - rel_mplock(); - mplocked = 0; - } - msg->nm_dispatch(msg); - /* Leave mpunlocked */ - } else { - if (!mplocked) { - get_mplock(); - /* mplocked = 1; not needed */ - } - msg->nm_dispatch(msg); - rel_mplock(); - mplocked = 0; - /* Leave mpunlocked, next msg might be mpsafe */ - } - break; - - case NETMSG_SERVICE_MPSAFE: /* No BGL */ - if (mplocked) { - rel_mplock(); - mplocked = 0; - } - msg->nm_dispatch(msg); - /* Leave mpunlocked */ - break; - - default: /* BGL */ - if (!mplocked) { - get_mplock(); - mplocked = 1; - } - msg->nm_dispatch(msg); - /* Leave mplocked */ - break; - } - return mplocked; + lwkt_replymsg(&msg->nm_lmsg, 0); } /* * Generic netmsg service loop. Some protocols may roll their own but all * must do the basic command dispatch function call done here. */ -void +static void netmsg_service_loop(void *arg) { - struct netmsg *msg; - int mplocked, *mpsafe_mode = arg; - - /* - * Threads always start mpsafe. - */ - mplocked = 0; - - /* - * Loop on netmsgs - */ - while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { - mplocked = netmsg_service(msg, *mpsafe_mode, mplocked); - } + struct netmsg_rollup *ru; + struct netmsg *msg; + thread_t td = curthread;; + int limit; + + while ((msg = lwkt_waitport(&td->td_msgport, 0))) { + /* + * Run up to 512 pending netmsgs. + */ + limit = 512; + do { + KASSERT(msg->nm_dispatch != NULL, + ("netmsg_service isr %d badmsg\n", + msg->nm_lmsg.u.ms_result)); + msg->nm_dispatch(msg); + if (--limit == 0) + break; + } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); + + /* + * Run all registered rollup functions for this cpu + * (e.g. tcp_willblock()). + */ + TAILQ_FOREACH(ru, &netrulist, ru_entry) + ru->ru_func(); + } } /* - * Call the netisr directly. - * Queueing may be done in the msg port layer at its discretion. + * Forward a packet to a netisr service function. + * + * If the packet has not been assigned to a protocol thread we call + * the port characterization function to assign it. The caller must + * clear M_HASH (or not have set it in the first place) if the caller + * wishes the packet to be recharacterized. */ -void -netisr_dispatch(int num, struct mbuf *m) +int +netisr_queue(int num, struct mbuf *m) { - /* just queue it for now XXX JH */ - netisr_queue(num, m); + struct netisr *ni; + struct netmsg_packet *pmsg; + lwkt_port_t port; + + KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), + ("Bad isr %d", num)); + + ni = &netisrs[num]; + if (ni->ni_handler == NULL) { + kprintf("Unregistered isr %d\n", num); + m_freem(m); + return (EIO); + } + + /* + * Figure out which protocol thread to send to. This does not + * have to be perfect but performance will be really good if it + * is correct. Major protocol inputs such as ip_input() will + * re-characterize the packet as necessary. + */ + if ((m->m_flags & M_HASH) == 0) { + ni->ni_cpufn(&m, 0); + if (m == NULL) { + m_freem(m); + return (EIO); + } + if ((m->m_flags & M_HASH) == 0) { + kprintf("netisr_queue(%d): packet hash failed\n", num); + m_freem(m); + return (EIO); + } + } + + /* + * Get the protocol port based on the packet hash, initialize + * the netmsg, and send it off. + */ + port = cpu_portfn(m->m_pkthdr.hash); + pmsg = &m->m_hdr.mh_netmsg; + netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, + 0, ni->ni_handler); + pmsg->nm_packet = m; + pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; + lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg); + + return (0); } /* - * Same as netisr_dispatch(), but always queue. - * This is either used in places where we are not confident that - * direct dispatch is possible, or where queueing is required. + * Pre-characterization of a deeper portion of the packet for the + * requested isr. + * + * The base of the ISR type (e.g. IP) that we want to characterize is + * at (hoff) relative to the beginning of the mbuf. This allows + * e.g. ether_input_chain() to not have to adjust the m_data/m_len. */ -int -netisr_queue(int num, struct mbuf *m) +void +netisr_characterize(int num, struct mbuf **mp, int hoff) { - struct netisr *ni; - struct netmsg_packet *pmsg; - lwkt_port_t port; - - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("%s: bad isr %d", __func__, num)); - - ni = &netisrs[num]; - if (ni->ni_handler == NULL) { - kprintf("%s: unregistered isr %d\n", __func__, num); - m_freem(m); - return (EIO); - } - - if ((port = ni->ni_mport(&m)) == NULL) - return (EIO); - - pmsg = &m->m_hdr.mh_netmsg; - - netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, - NETISR_TO_MSGF(ni), ni->ni_handler); - pmsg->nm_packet = m; - pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; - lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg); - return (0); + struct netisr *ni; + struct mbuf *m; + + /* + * Validation + */ + KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), + ("Bad isr %d", num)); + m = *mp; + KKASSERT(m != NULL); + + /* + * Valid netisr? + */ + ni = &netisrs[num]; + if (ni->ni_handler == NULL) { + kprintf("Unregistered isr %d\n", num); + m_freem(m); + *mp = NULL; + } + + /* + * Characterize the packet + */ + if ((m->m_flags & M_HASH) == 0) { + ni->ni_cpufn(mp, hoff); + m = *mp; + if (m && (m->m_flags & M_HASH) == 0) + kprintf("netisr_queue(%d): packet hash failed\n", num); + } } void -netisr_register(int num, pkt_portfn_t mportfn, - pktinfo_portfn_t mportfn_pktinfo, netisr_fn_t handler, - uint32_t flags) +netisr_register(int num, netisr_fn_t handler, netisr_cpufn_t cpufn) { - struct netisr *ni; - - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("netisr_register: bad isr %d", num)); - ni = &netisrs[num]; - - ni->ni_mport = mportfn; - ni->ni_mport_pktinfo = mportfn_pktinfo; - ni->ni_handler = handler; - ni->ni_flags = flags; - netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, - NETISR_TO_MSGF(ni), NULL); -} + struct netisr *ni; -int -netisr_unregister(int num) -{ - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("unregister_netisr: bad isr number: %d\n", num)); + KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), + ("netisr_register: bad isr %d", num)); + KKASSERT(handler != NULL); + + if (cpufn == NULL) + cpufn = cpu0_cpufn; - /* XXX JH */ - return (0); + ni = &netisrs[num]; + + ni->ni_handler = handler; + ni->ni_cpufn = cpufn; + netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); } -/* - * Return message port for default handler thread on CPU 0. - */ -lwkt_port_t -cpu0_portfn(struct mbuf **mptr) +void +netisr_register_rollup(netisr_ru_t ru_func) { - struct mbuf *m = *mptr; - int cpu = 0; + struct netmsg_rollup *ru; - m->m_pkthdr.hash = cpu; - m->m_flags |= M_HASH; - return (&netisr_cpu[cpu].td_msgport); + ru = kmalloc(sizeof(*ru), M_TEMP, M_WAITOK|M_ZERO); + ru->ru_func = ru_func; + TAILQ_INSERT_TAIL(&netrulist, ru, ru_entry); } +/* + * Return the message port for the general protocol message servicing + * thread for a particular cpu. + */ lwkt_port_t cpu_portfn(int cpu) { - return (&netisr_cpu[cpu].td_msgport); + KKASSERT(cpu >= 0 && cpu < ncpus); + return (&netisr_cpu[cpu].td_msgport); } /* - * If the current thread is a network protocol thread (TDF_NETWORK), - * then return the current thread's message port. - * XXX Else, return the current CPU's netisr message port. + * Return the current cpu's network protocol thread. */ lwkt_port_t cur_netport(void) { - if (curthread->td_flags & TDF_NETWORK) - return &curthread->td_msgport; - else - return cpu_portfn(mycpuid); + return(cpu_portfn(mycpu->gd_cpuid)); } -/* ARGSUSED */ +/* + * Return a default protocol mbuf processing thread port + */ lwkt_port_t cpu0_soport(struct socket *so __unused, struct sockaddr *nam __unused, struct mbuf **dummy __unused) { - return (&netisr_cpu[0].td_msgport); + return (&netisr_cpu[0].td_msgport); } +/* + * Return a default protocol control message processing thread port + */ lwkt_port_t cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, void *extra __unused) { - return (&netisr_cpu[0].td_msgport); + return (&netisr_cpu[0].td_msgport); } +/* + * This is a dummy port that causes a message to be executed synchronously + * instead of being queued to a port. + */ lwkt_port_t sync_soport(struct socket *so __unused, struct sockaddr *nam __unused, struct mbuf **dummy __unused) { - return (&netisr_sync_port); + return (&netisr_sync_port); +} + +/* + * This is a default netisr packet characterization function which + * sets M_HASH. If a netisr is registered with a NULL cpufn function + * this one is assigned. + * + * This function makes no attempt to validate the packet. + */ +static void +cpu0_cpufn(struct mbuf **mp, int hoff __unused) +{ + struct mbuf *m = *mp; + + m->m_flags |= M_HASH; + m->m_pkthdr.hash = 0; } /* @@ -504,122 +494,37 @@ sync_soport(struct socket *so __unused, struct sockaddr *nam __unused, static void schednetisr_remote(void *data) { - int num = (int)(intptr_t)data; - struct netisr *ni = &netisrs[num]; - lwkt_port_t port = &netisr_cpu[0].td_msgport; - struct netmsg *pmsg; - - pmsg = &netisrs[num].ni_netmsg; - crit_enter(); - if (pmsg->nm_lmsg.ms_flags & MSGF_DONE) { - netmsg_init(pmsg, NULL, &netisr_adone_rport, - NETISR_TO_MSGF(ni), ni->ni_handler); - pmsg->nm_lmsg.u.ms_result = num; - lwkt_sendmsg(port, &pmsg->nm_lmsg); - } - crit_exit(); + int num = (int)(intptr_t)data; + struct netisr *ni = &netisrs[num]; + lwkt_port_t port = &netisr_cpu[0].td_msgport; + struct netmsg *pmsg; + + pmsg = &netisrs[num].ni_netmsg; + if (pmsg->nm_lmsg.ms_flags & MSGF_DONE) { + netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); + pmsg->nm_lmsg.u.ms_result = num; + lwkt_sendmsg(port, &pmsg->nm_lmsg); + } } void schednetisr(int num) { - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("schednetisr: bad isr %d", num)); + KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), + ("schednetisr: bad isr %d", num)); + KKASSERT(netisrs[num].ni_handler != NULL); #ifdef SMP - if (mycpu->gd_cpuid != 0) { - lwkt_send_ipiq(globaldata_find(0), - schednetisr_remote, (void *)(intptr_t)num); - } else { - schednetisr_remote((void *)(intptr_t)num); - } + if (mycpu->gd_cpuid != 0) { + lwkt_send_ipiq(globaldata_find(0), + schednetisr_remote, (void *)(intptr_t)num); + } else { + crit_enter(); + schednetisr_remote((void *)(intptr_t)num); + crit_exit(); + } #else - schednetisr_remote((void *)(intptr_t)num); + crit_enter(); + schednetisr_remote((void *)(intptr_t)num); + crit_exit(); #endif } - -lwkt_port_t -netisr_find_port(int num, struct mbuf **m0) -{ - struct netisr *ni; - lwkt_port_t port; - struct mbuf *m = *m0; - - *m0 = NULL; - - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("%s: bad isr %d", __func__, num)); - - ni = &netisrs[num]; - if (ni->ni_mport == NULL) { - kprintf("%s: unregistered isr %d\n", __func__, num); - m_freem(m); - return NULL; - } - - if ((port = ni->ni_mport(&m)) == NULL) - return NULL; - - *m0 = m; - return port; -} - -void -netisr_run(int num, struct mbuf *m) -{ - struct netisr *ni; - struct netmsg_packet *pmsg; - - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("%s: bad isr %d", __func__, num)); - - ni = &netisrs[num]; - if (ni->ni_handler == NULL) { - kprintf("%s: unregistered isr %d\n", __func__, num); - m_freem(m); - return; - } - - pmsg = &m->m_hdr.mh_netmsg; - - netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, - 0, ni->ni_handler); - pmsg->nm_packet = m; - pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; - - NETISR_GET_MPLOCK(ni); - ni->ni_handler(&pmsg->nm_netmsg); - NETISR_REL_MPLOCK(ni); -} - -lwkt_port_t -pktinfo_portfn_cpu0(const struct pktinfo *dummy __unused, - struct mbuf *m) -{ - m->m_pkthdr.hash = 0; - return &netisr_cpu[0].td_msgport; -} - -lwkt_port_t -pktinfo_portfn_notsupp(const struct pktinfo *dummy __unused, - struct mbuf *m __unused) -{ - return NULL; -} - -lwkt_port_t -netisr_find_pktinfo_port(const struct pktinfo *pi, struct mbuf *m) -{ - struct netisr *ni; - int num = pi->pi_netisr; - - KASSERT(m->m_flags & M_HASH, ("packet does not contain hash\n")); - KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), - ("%s: bad isr %d", __func__, num)); - - ni = &netisrs[num]; - if (ni->ni_mport_pktinfo == NULL) { - kprintf("%s: unregistered isr %d\n", __func__, num); - return NULL; - } - return ni->ni_mport_pktinfo(pi, m); -} diff --git a/sys/net/netisr.h b/sys/net/netisr.h index 74c79a2278..9a90419fee 100644 --- a/sys/net/netisr.h +++ b/sys/net/netisr.h @@ -197,29 +197,28 @@ void netmsg_so_notify_doabort(lwkt_msg_t); #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) +/* + * Temporary pktinfo structure passed directly from the driver to + * ether_input_chain(), allows us to bypass numerous checks. + */ struct pktinfo { int pi_netisr; /* netisr index, e.g. NETISR_IP */ uint32_t pi_flags; /* PKTINFO_FLAG_ */ int pi_l3proto; /* layer3 protocol number */ }; -#define PKTINFO_FLAG_FRAG 0x1 - -typedef lwkt_port_t (*pkt_portfn_t)(struct mbuf **); -typedef lwkt_port_t (*pktinfo_portfn_t)(const struct pktinfo *, struct mbuf *); +#define PKTINFO_FLAG_FRAG 0x1 +/* + * NETISR_xxx registrations + */ struct netisr { - lwkt_port ni_port; /* must be first */ - pkt_portfn_t ni_mport; - pktinfo_portfn_t ni_mport_pktinfo; - netisr_fn_t ni_handler; + netisr_fn_t ni_handler; /* packet handler function */ + netisr_ru_t ni_rufunc; /* rollup function */ + netisr_cpufn_t ni_cpufn; /* characterize pkt return cpu */ struct netmsg ni_netmsg; /* for sched_netisr() (no-data) */ - uint32_t ni_flags; /* NETISR_FLAG_ */ }; -#define NETISR_FLAG_NOTMPSAFE 0x0 /* ni_handler is not MPSAFE */ -#define NETISR_FLAG_MPSAFE 0x1 /* ni_handler is MPSAFE */ - #endif #ifdef _KERNEL @@ -231,24 +230,16 @@ extern lwkt_port netisr_adone_rport; extern lwkt_port netisr_afree_rport; extern lwkt_port netisr_apanic_rport; -lwkt_port_t cpu0_portfn(struct mbuf **mptr); lwkt_port_t cpu_portfn(int cpu); -lwkt_port_t pktinfo_portfn_cpu0(const struct pktinfo *, struct mbuf *); -lwkt_port_t pktinfo_portfn_notsupp(const struct pktinfo *, struct mbuf *); lwkt_port_t cur_netport(void); -lwkt_port_t netisr_find_port(int, struct mbuf **); -lwkt_port_t netisr_find_pktinfo_port(const struct pktinfo *, struct mbuf *); -void netisr_dispatch(int, struct mbuf *); -void netisr_run(int, struct mbuf *); +void netisr_register(int, netisr_fn_t, netisr_cpufn_t); +void netisr_register_rollup(netisr_ru_t ru_func); + +void netisr_characterize(int num, struct mbuf **mp, int hoff); int netisr_queue(int, struct mbuf *); -void netisr_register(int, pkt_portfn_t, pktinfo_portfn_t, - netisr_fn_t, uint32_t); -int netisr_unregister(int); void netmsg_service_port_init(lwkt_port_t); -void netmsg_service_loop(void *arg); -int netmsg_service(struct netmsg *, int, int); void netmsg_service_sync(void); void schednetisr(int); diff --git a/sys/net/netmsg.h b/sys/net/netmsg.h index bf642de0b0..328f2d0ff7 100644 --- a/sys/net/netmsg.h +++ b/sys/net/netmsg.h @@ -43,166 +43,166 @@ struct netmsg; typedef void (*netisr_fn_t)(struct netmsg *); +typedef void (*netisr_ru_t)(void); +typedef void (*netisr_cpufn_t)(struct mbuf **, int); /* * Base netmsg */ typedef struct netmsg { - struct lwkt_msg nm_lmsg; - netisr_fn_t nm_dispatch; - struct socket *nm_so; + struct lwkt_msg nm_lmsg; + netisr_fn_t nm_dispatch; + struct socket *nm_so; } *netmsg_t; -#define MSGF_MPSAFE MSGF_USER0 - #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) /* * User protocol requests messages. */ struct netmsg_pru_abort { - struct netmsg nm_netmsg; - pru_abort_fn_t nm_prufn; + struct netmsg nm_netmsg; + pru_abort_fn_t nm_prufn; }; struct netmsg_pru_accept { - struct netmsg nm_netmsg; - pru_accept_fn_t nm_prufn; - struct sockaddr **nm_nam; + struct netmsg nm_netmsg; + pru_accept_fn_t nm_prufn; + struct sockaddr **nm_nam; }; struct netmsg_pru_attach { - struct netmsg nm_netmsg; - pru_attach_fn_t nm_prufn; - int nm_proto; - struct pru_attach_info *nm_ai; + struct netmsg nm_netmsg; + pru_attach_fn_t nm_prufn; + int nm_proto; + struct pru_attach_info *nm_ai; }; struct netmsg_pru_bind { - struct netmsg nm_netmsg; - pru_bind_fn_t nm_prufn; - struct sockaddr *nm_nam; - struct thread *nm_td; + struct netmsg nm_netmsg; + pru_bind_fn_t nm_prufn; + struct sockaddr *nm_nam; + struct thread *nm_td; }; struct netmsg_pru_connect { - struct netmsg nm_netmsg; - pru_connect_fn_t nm_prufn; - struct sockaddr *nm_nam; - struct thread *nm_td; + struct netmsg nm_netmsg; + pru_connect_fn_t nm_prufn; + struct sockaddr *nm_nam; + struct thread *nm_td; }; struct netmsg_pru_connect2 { - struct netmsg nm_netmsg; - pru_connect2_fn_t nm_prufn; - struct socket *nm_so1; - struct socket *nm_so2; + struct netmsg nm_netmsg; + pru_connect2_fn_t nm_prufn; + struct socket *nm_so1; + struct socket *nm_so2; }; struct netmsg_pru_control { - struct netmsg nm_netmsg; - pru_control_fn_t nm_prufn; - u_long nm_cmd; - caddr_t nm_data; - struct ifnet *nm_ifp; - struct thread *nm_td; + struct netmsg nm_netmsg; + pru_control_fn_t nm_prufn; + u_long nm_cmd; + caddr_t nm_data; + struct ifnet *nm_ifp; + struct thread *nm_td; }; struct netmsg_pru_detach { - struct netmsg nm_netmsg; - pru_detach_fn_t nm_prufn; + struct netmsg nm_netmsg; + pru_detach_fn_t nm_prufn; }; struct netmsg_pru_disconnect { - struct netmsg nm_netmsg; - pru_disconnect_fn_t nm_prufn; + struct netmsg nm_netmsg; + pru_disconnect_fn_t nm_prufn; }; struct netmsg_pru_listen { - struct netmsg nm_netmsg; - pru_listen_fn_t nm_prufn; - struct thread *nm_td; + struct netmsg nm_netmsg; + pru_listen_fn_t nm_prufn; + struct thread *nm_td; }; struct netmsg_pru_peeraddr { - struct netmsg nm_netmsg; - pru_peeraddr_fn_t nm_prufn; - struct sockaddr **nm_nam; + struct netmsg nm_netmsg; + pru_peeraddr_fn_t nm_prufn; + struct sockaddr **nm_nam; }; struct netmsg_pru_rcvd { - struct netmsg nm_netmsg; - pru_rcvd_fn_t nm_prufn; - int nm_flags; + struct netmsg nm_netmsg; + pru_rcvd_fn_t nm_prufn; + int nm_flags; }; struct netmsg_pru_rcvoob { - struct netmsg nm_netmsg; - pru_rcvoob_fn_t nm_prufn; - struct mbuf *nm_m; - int nm_flags; + struct netmsg nm_netmsg; + pru_rcvoob_fn_t nm_prufn; + struct mbuf *nm_m; + int nm_flags; }; struct netmsg_pru_send { - struct netmsg nm_netmsg; - pru_send_fn_t nm_prufn; - int nm_flags; - struct mbuf *nm_m; - struct sockaddr *nm_addr; - struct mbuf *nm_control; - struct thread *nm_td; + struct netmsg nm_netmsg; + pru_send_fn_t nm_prufn; + int nm_flags; + struct mbuf *nm_m; + struct sockaddr *nm_addr; + struct mbuf *nm_control; + struct thread *nm_td; }; struct netmsg_pru_sense { - struct netmsg nm_netmsg; - pru_sense_fn_t nm_prufn; - struct stat *nm_stat; + struct netmsg nm_netmsg; + pru_sense_fn_t nm_prufn; + struct stat *nm_stat; }; struct netmsg_pru_shutdown { - struct netmsg nm_netmsg; - pru_shutdown_fn_t nm_prufn; + struct netmsg nm_netmsg; + pru_shutdown_fn_t nm_prufn; }; struct netmsg_pru_sockaddr { - struct netmsg nm_netmsg; - pru_sockaddr_fn_t nm_prufn; - struct sockaddr **nm_nam; + struct netmsg nm_netmsg; + pru_sockaddr_fn_t nm_prufn; + struct sockaddr **nm_nam; }; struct netmsg_pru_sosend { - struct netmsg nm_netmsg; - pru_sosend_fn_t nm_prufn; - struct sockaddr *nm_addr; - struct uio *nm_uio; - struct mbuf *nm_top; - struct mbuf *nm_control; - int nm_flags; - struct thread *nm_td; + struct netmsg nm_netmsg; + pru_sosend_fn_t nm_prufn; + struct sockaddr *nm_addr; + struct uio *nm_uio; + struct mbuf *nm_top; + struct mbuf *nm_control; + int nm_flags; + struct thread *nm_td; }; struct netmsg_pru_soreceive { - struct netmsg nm_netmsg; - struct sockaddr *nm_addr; - struct sockaddr **nm_paddr; - struct uio *nm_uio; - struct sockbuf *nm_sio; - struct mbuf **nm_controlp; - int *nm_flagsp; + struct netmsg nm_netmsg; + struct sockaddr *nm_addr; + struct sockaddr **nm_paddr; + struct uio *nm_uio; + struct sockbuf *nm_sio; + struct mbuf **nm_controlp; + int *nm_flagsp; }; struct netmsg_pru_ctloutput { - struct netmsg nm_netmsg; - pru_ctloutput_fn_t nm_prufn; - struct sockopt *nm_sopt; + struct netmsg nm_netmsg; + pru_ctloutput_fn_t nm_prufn; + struct sockopt *nm_sopt; }; struct netmsg_pru_ctlinput { - struct netmsg nm_netmsg; - pru_ctlinput_fn_t nm_prufn; - int nm_cmd; - struct sockaddr *nm_arg; - void *nm_extra; + struct netmsg nm_netmsg; + pru_ctlinput_fn_t nm_prufn; + int nm_cmd; + struct sockaddr *nm_arg; + void *nm_extra; }; #endif /* _KERNEL || _KERNEL_STRUCTURES */ diff --git a/sys/net/netmsg2.h b/sys/net/netmsg2.h index b55e9bb7d0..da5299b1fe 100644 --- a/sys/net/netmsg2.h +++ b/sys/net/netmsg2.h @@ -49,9 +49,9 @@ static __inline void netmsg_init(netmsg_t msg, struct socket *so, lwkt_port_t rport, int flags, netisr_fn_t dispatch) { - lwkt_initmsg(&msg->nm_lmsg, rport, flags); - msg->nm_dispatch = dispatch; - msg->nm_so = so; + lwkt_initmsg(&msg->nm_lmsg, rport, flags); + msg->nm_dispatch = dispatch; + msg->nm_so = so; } static __inline void @@ -59,9 +59,9 @@ netmsg_init_abortable(netmsg_t msg, struct socket *so, lwkt_port_t rport, int flags, netisr_fn_t dispatch, void (*abortfn)(lwkt_msg_t)) { - lwkt_initmsg_abortable(&msg->nm_lmsg, rport, flags, abortfn); - msg->nm_dispatch = dispatch; - msg->nm_so = so; + lwkt_initmsg_abortable(&msg->nm_lmsg, rport, flags, abortfn); + msg->nm_dispatch = dispatch; + msg->nm_so = so; } #endif /* _NET_NETMSG2_H_ */ diff --git a/sys/net/ppp/if_ppp.c b/sys/net/ppp/if_ppp.c index 23c42fa295..94fc7512f9 100644 --- a/sys/net/ppp/if_ppp.c +++ b/sys/net/ppp/if_ppp.c @@ -98,6 +98,7 @@ #include #include +#include #include #include @@ -211,6 +212,8 @@ pppintr(struct netmsg *msg) */ lwkt_replymsg(&msg->nm_lmsg, 0); + get_mplock(); + sc = ppp_softc; for (i = 0; i < NPPP; ++i, ++sc) { ifnet_serialize_all(&sc->sc_if); @@ -227,6 +230,7 @@ pppintr(struct netmsg *msg) } ifnet_deserialize_all(&sc->sc_if); } + rel_mplock(); } /* @@ -257,8 +261,7 @@ pppattach(void *dummy) if_attach(&sc->sc_if, NULL); bpfattach(&sc->sc_if, DLT_PPP, PPP_HDRLEN); } - netisr_register(NETISR_PPP, cpu0_portfn, pktinfo_portfn_cpu0, - pppintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_PPP, pppintr, NULL); /* * XXX layering violation - if_ppp can work over any lower level * transport that cares to attach to it. diff --git a/sys/net/sppp/if_spppsubr.c b/sys/net/sppp/if_spppsubr.c index 90f2c02c12..b28a3427f8 100644 --- a/sys/net/sppp/if_spppsubr.c +++ b/sys/net/sppp/if_spppsubr.c @@ -728,13 +728,14 @@ sppp_input(struct ifnet *ifp, struct mbuf *m) /* Check queue. */ - netisr_dispatch(isr, m); + netisr_queue(isr, m); + + /* + * Do only account for network packets, not for control + * packets. This is used by some subsystems to detect + * idle lines. + */ if (do_account) - /* - * Do only account for network packets, not for control - * packets. This is used by some subsystems to detect - * idle lines. - */ sp->pp_last_recv = time_second; } diff --git a/sys/net/stf/if_stf.c b/sys/net/stf/if_stf.c index 9a886574d7..5d6d2b2c7c 100644 --- a/sys/net/stf/if_stf.c +++ b/sys/net/stf/if_stf.c @@ -587,7 +587,7 @@ in_stf_input(struct mbuf *m, ...) */ ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; - netisr_dispatch(NETISR_IPV6, m); + netisr_queue(NETISR_IPV6, m); } /* ARGSUSED */ diff --git a/sys/net/tun/if_tun.c b/sys/net/tun/if_tun.c index 6d20a0519a..6c851c5276 100644 --- a/sys/net/tun/if_tun.c +++ b/sys/net/tun/if_tun.c @@ -697,7 +697,7 @@ tunwrite(struct dev_write_args *ap) return (EAFNOSUPPORT); } - netisr_dispatch(isr, top); + netisr_queue(isr, top); return (0); } diff --git a/sys/netbt/bt_input.c b/sys/netbt/bt_input.c index 8f8931051a..92ce6f51dc 100644 --- a/sys/netbt/bt_input.c +++ b/sys/netbt/bt_input.c @@ -24,6 +24,8 @@ #include +#include + #include void @@ -31,7 +33,9 @@ btintr(struct netmsg *msg) { struct hci_unit *unit; + get_mplock(); TAILQ_FOREACH(unit, &hci_unit_list, hci_next) { hci_intr(unit); } + rel_mplock(); } diff --git a/sys/netbt/bt_proto.c b/sys/netbt/bt_proto.c index 67afacdff1..2ccfca52cf 100644 --- a/sys/netbt/bt_proto.c +++ b/sys/netbt/bt_proto.c @@ -239,8 +239,7 @@ SYSCTL_INT(_net_bluetooth_sco, OID_AUTO, recvspace, CTLFLAG_RW, &sco_recvspace, static void netisr_netbt_setup(void *dummy __unused) { - netisr_register(NETISR_BLUETOOTH, cpu0_portfn, pktinfo_portfn_cpu0, - btintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_BLUETOOTH, btintr, NULL); } SYSINIT(netbt_setup, SI_BOOT2_KLD, SI_ORDER_ANY, netisr_netbt_setup, NULL); diff --git a/sys/netgraph/iface/ng_iface.c b/sys/netgraph/iface/ng_iface.c index 943df9b419..8f971c25f8 100644 --- a/sys/netgraph/iface/ng_iface.c +++ b/sys/netgraph/iface/ng_iface.c @@ -792,7 +792,7 @@ ng_iface_rcvdata(hook_p hook, struct mbuf *m, meta_p meta) m_freem(m); return (EAFNOSUPPORT); } - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } diff --git a/sys/netgraph/netgraph/ng_base.c b/sys/netgraph/netgraph/ng_base.c index c83f335f48..dad04926cf 100644 --- a/sys/netgraph/netgraph/ng_base.c +++ b/sys/netgraph/netgraph/ng_base.c @@ -1842,9 +1842,7 @@ ngb_mod_event(module_t mod, int event, void *data) crit_exit(); break; } - netisr_register(NETISR_NETGRAPH, cpu0_portfn, - pktinfo_portfn_notsupp, ngintr, - NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_NETGRAPH, ngintr, NULL); error = 0; crit_exit(); break; @@ -2058,6 +2056,8 @@ ngintr(struct netmsg *pmsg) */ lwkt_replymsg(&pmsg->nm_lmsg, 0); + get_mplock(); + while (1) { crit_enter(); if ((ngq = ngqbase)) { @@ -2096,7 +2096,7 @@ ngintr(struct netmsg *pmsg) } } out: - ; + rel_mplock(); } diff --git a/sys/netgraph7/ng_base.c b/sys/netgraph7/ng_base.c index 204fff5ac0..3b19fae545 100644 --- a/sys/netgraph7/ng_base.c +++ b/sys/netgraph7/ng_base.c @@ -3068,8 +3068,7 @@ ngb_mod_event(module_t mod, int event, void *data) ng_qdzone = uma_zcreate("NetGraph data items", sizeof(struct ng_item), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(ng_qdzone, maxdata); - netisr_register(NETISR_NETGRAPH, (netisr_t *)ngintr, NULL, - NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_NETGRAPH, (netisr_t *)ngintr, NULL); break; case MOD_UNLOAD: /* You can't unload it because an interface may be using it. */ @@ -3233,6 +3232,7 @@ SYSCTL_PROC(_debug, OID_AUTO, ng_dump_items, CTLTYPE_INT | CTLFLAG_RW, static void ngintr(void) { + XXX replymsg XXX for (;;) { node_p node; diff --git a/sys/netgraph7/ng_iface.c b/sys/netgraph7/ng_iface.c index 7162da3448..0982beed3d 100644 --- a/sys/netgraph7/ng_iface.c +++ b/sys/netgraph7/ng_iface.c @@ -756,7 +756,7 @@ ng_iface_rcvdata(hook_p hook, item_p item) /* First chunk of an mbuf contains good junk */ if (harvest.point_to_point) random_harvest(m, 16, 3, 0, RANDOM_NET); - netisr_dispatch(isr, m); + netisr_queue(isr, m); return (0); } diff --git a/sys/netgraph7/ng_ip_input.c b/sys/netgraph7/ng_ip_input.c index ae1adc11b5..df5b9874d7 100644 --- a/sys/netgraph7/ng_ip_input.c +++ b/sys/netgraph7/ng_ip_input.c @@ -121,7 +121,7 @@ ngipi_rcvdata(hook_p hook, item_p item) NGI_GET_M(item, m); NG_FREE_ITEM(item); - netisr_dispatch(NETISR_IP, m); + netisr_queue(NETISR_IP, m); return 0; } diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 11b86f6ad2..02a4702b17 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -149,9 +149,6 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &arp_proxyall, 0, "Enable proxy ARP for all suitable requests"); -static int arp_mpsafe = 1; -TUNABLE_INT("net.link.ether.inet.arp_mpsafe", &arp_mpsafe); - static void arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static void arprequest(struct ifnet *, const struct in_addr *, const struct in_addr *, const u_char *); @@ -417,7 +414,7 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip, /* * Same as arprequest(), except: * - Caller is allowed to hold ifp's serializer - * - Network output is done in TDF_NETWORK kernel thread + * - Network output is done in protocol thead */ static void arprequest_async(struct ifnet *ifp, const struct in_addr *sip, @@ -527,7 +524,7 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, if (la->la_hold != NULL) m_freem(la->la_hold); la->la_hold = m; - la->la_msgport = curnetport; + la->la_msgport = cur_netport(); if (rt->rt_expire || ((rt->rt_flags & RTF_STATIC) && !sdl->sdl_alen)) { rt->rt_flags &= ~RTF_REJECT; if (la->la_asked == 0 || rt->rt_expire != time_second) { @@ -747,8 +744,7 @@ arp_update_oncpu(struct mbuf *m, in_addr_t saddr, boolean_t create, pmsg = &m->m_hdr.mh_netmsg; netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, - MSGF_MPSAFE | MSGF_PRIORITY, - arp_hold_output); + MSGF_PRIORITY, arp_hold_output); pmsg->nm_packet = m; /* Record necessary information */ @@ -1118,20 +1114,12 @@ arp_iainit(struct ifnet *ifp, const struct in_addr *addr, const u_char *enaddr) static void arp_init(void) { - uint32_t flags; int cpu; for (cpu = 0; cpu < ncpus2; cpu++) LIST_INIT(&llinfo_arp_list[cpu]); - if (arp_mpsafe) { - flags = NETISR_FLAG_MPSAFE; - kprintf("arp: MPSAFE\n"); - } else { - flags = NETISR_FLAG_NOTMPSAFE; - } - netisr_register(NETISR_ARP, cpu0_portfn, pktinfo_portfn_cpu0, - arpintr, flags); + netisr_register(NETISR_ARP, arpintr, NULL); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 76d0719568..ece006f8c9 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -122,14 +122,15 @@ struct protosw inetsw[] = { ip_init, 0, ip_slowtimo, ip_drain, &nousrreqs }, -{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, +{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR|PR_MPSAFE, udp_input, 0, udp_ctlinput, ip_ctloutput, udp_soport, udp_ctlport, udp_init, 0, 0, 0, &udp_usrreqs }, -{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, - PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, +{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED | + PR_IMPLOPCL | PR_WANTRCVD | + PR_MPSAFE, tcp_input, 0, tcp_ctlinput, tcp_ctloutput, tcp_soport, tcp_ctlport, tcp_init, 0, tcp_slowtimo, tcp_drain, diff --git a/sys/netinet/ip_demux.c b/sys/netinet/ip_demux.c index faf2cc9064..e3e17d52c7 100644 --- a/sys/netinet/ip_demux.c +++ b/sys/netinet/ip_demux.c @@ -61,12 +61,11 @@ #include #include -extern struct thread netisr_cpu[]; extern int udp_mpsafe_thread; -static struct thread tcp_thread[MAXCPU]; -static struct thread udp_thread[MAXCPU]; - +/* + * Toeplitz hash functions - the idea is to match the hardware. + */ static __inline int INP_MPORT_HASH_UDP(in_addr_t faddr, in_addr_t laddr, in_port_t fport, in_port_t lport) @@ -82,6 +81,21 @@ INP_MPORT_HASH_TCP(in_addr_t faddr, in_addr_t laddr, toeplitz_rawhash_addrport(faddr, laddr, fport, lport)); } +/* + * Map a network address to a processor. + */ +int +tcp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) +{ + return (INP_MPORT_HASH_TCP(faddr, laddr, fport, lport)); +} + +int +udp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) +{ + return (INP_MPORT_HASH_UDP(faddr, laddr, fport, lport)); +} + /* * If the packet is a valid IP datagram, upon returning of this function * following things are promised: @@ -104,30 +118,35 @@ INP_MPORT_HASH_TCP(in_addr_t faddr, in_addr_t laddr, * o IP total length is not less than (IP header length + TCP header length). */ boolean_t -ip_lengthcheck(struct mbuf **mp) +ip_lengthcheck(struct mbuf **mp, int hoff) { struct mbuf *m = *mp; struct ip *ip; - int iphlen, iplen; + int len, iphlen, iplen; struct tcphdr *th; int thoff; /* TCP data offset */ + len = hoff + sizeof(struct ip); + /* The packet must be at least the size of an IP header. */ - if (m->m_pkthdr.len < sizeof(struct ip)) { + if (m->m_pkthdr.len < len) { + kprintf("pkthdr %d %d < %d\n", (m->m_flags & M_PKTHDR), + m->m_pkthdr.len, len); ipstat.ips_tooshort++; goto fail; } /* The fixed IP header must reside completely in the first mbuf. */ - if (m->m_len < sizeof(struct ip)) { - m = m_pullup(m, sizeof(struct ip)); + if (m->m_len < len) { + m = m_pullup(m, len); if (m == NULL) { + kprintf("can't pullup %d\n", len); ipstat.ips_toosmall++; goto fail; } } - ip = mtod(m, struct ip *); + ip = mtodoff(m, struct ip *, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; @@ -137,13 +156,13 @@ ip_lengthcheck(struct mbuf **mp) } /* The full IP header must reside completely in the one mbuf. */ - if (m->m_len < iphlen) { - m = m_pullup(m, iphlen); + if (m->m_len < hoff + iphlen) { + m = m_pullup(m, hoff + iphlen); if (m == NULL) { ipstat.ips_badhlen++; goto fail; } - ip = mtod(m, struct ip *); + ip = mtodoff(m, struct ip *, hoff); } iplen = ntohs(ip->ip_len); @@ -152,7 +171,10 @@ ip_lengthcheck(struct mbuf **mp) * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ - if (m->m_pkthdr.len < iplen) { + if (m->m_pkthdr.len < hoff + iplen) { + kprintf("data in buffer not enough %d - %d vs %d+%d\n", + (m->m_flags & M_PKTHDR), + m->m_pkthdr.len, hoff, iplen); ipstat.ips_tooshort++; goto fail; } @@ -179,13 +201,13 @@ ip_lengthcheck(struct mbuf **mp) ++tcpstat.tcps_rcvshort; goto fail; } - if (m->m_len < iphlen + sizeof(struct tcphdr)) { - m = m_pullup(m, iphlen + sizeof(struct tcphdr)); + if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) { + m = m_pullup(m, hoff + iphlen + sizeof(struct tcphdr)); if (m == NULL) { tcpstat.tcps_rcvshort++; goto fail; } - ip = mtod(m, struct ip *); + ip = mtodoff(m, struct ip *, hoff); } th = (struct tcphdr *)((caddr_t)ip + iphlen); thoff = th->th_off << 2; @@ -194,8 +216,8 @@ ip_lengthcheck(struct mbuf **mp) tcpstat.tcps_rcvbadoff++; goto fail; } - if (m->m_len < iphlen + thoff) { - m = m_pullup(m, iphlen + thoff); + if (m->m_len < hoff + iphlen + thoff) { + m = m_pullup(m, hoff + iphlen + thoff); if (m == NULL) { tcpstat.tcps_rcvshort++; goto fail; @@ -207,8 +229,8 @@ ip_lengthcheck(struct mbuf **mp) ++udpstat.udps_hdrops; goto fail; } - if (m->m_len < iphlen + sizeof(struct udphdr)) { - m = m_pullup(m, iphlen + sizeof(struct udphdr)); + if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) { + m = m_pullup(m, hoff + iphlen + sizeof(struct udphdr)); if (m == NULL) { udpstat.udps_hdrops++; goto fail; @@ -236,13 +258,13 @@ fail: } /* - * Map a packet to a protocol processing thread and return the thread's port. - * If an error occurs, the passed mbuf will be freed, *mptr will be set - * to NULL, and NULL will be returned. If no error occurs, the passed mbuf - * may be modified and a port pointer will be returned. + * Assign a protocol processing thread to a packet. The IP header is at + * offset (hoff) in the packet (i.e. the mac header might still be intact). + * + * This function can blow away the mbuf if the packet is malformed. */ -lwkt_port_t -ip_mport(struct mbuf **mptr, int dir) +void +ip_cpufn(struct mbuf **mptr, int hoff, int dir) { struct ip *ip; int iphlen; @@ -250,14 +272,13 @@ ip_mport(struct mbuf **mptr, int dir) struct udphdr *uh; struct mbuf *m; int thoff; /* TCP data offset */ - lwkt_port_t port; int cpu; - if (!ip_lengthcheck(mptr)) - return (NULL); + if (!ip_lengthcheck(mptr, hoff)) + return; m = *mptr; - ip = mtod(m, struct ip *); + ip = mtodoff(m, struct ip *, hoff); iphlen = ip->ip_hl << 2; /* @@ -265,7 +286,6 @@ ip_mport(struct mbuf **mptr, int dir) */ if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { cpu = 0; - port = &netisr_cpu[cpu].td_msgport; goto back; } @@ -277,7 +297,6 @@ ip_mport(struct mbuf **mptr, int dir) ip->ip_dst.s_addr, th->th_sport, th->th_dport); - port = &tcp_thread[cpu].td_msgport; break; case IPPROTO_UDP: @@ -287,29 +306,28 @@ ip_mport(struct mbuf **mptr, int dir) ip->ip_dst.s_addr, uh->uh_sport, uh->uh_dport); - port = &udp_thread[cpu].td_msgport; break; default: cpu = 0; - port = &netisr_cpu[cpu].td_msgport; break; } back: m->m_flags |= M_HASH; m->m_pkthdr.hash = cpu; - return (port); } -lwkt_port_t -ip_mport_in(struct mbuf **mptr) +void +ip_cpufn_in(struct mbuf **mptr, int hoff) { - return ip_mport(mptr, IP_MPORT_IN); + ip_cpufn(mptr, hoff, IP_MPORT_IN); } +#if 0 + /* * Map a packet to a protocol processing thread and return the thread's port. - * Unlike ip_mport(), the packet content is not accessed. The packet info + * Unlike ip_cpufn(), the packet content is not accessed. The packet info * (pi) and the hash of the packet (m_pkthdr.hash) is used instead. NULL is * returned if the packet info does not contain enough information. * @@ -329,16 +347,16 @@ ip_mport_pktinfo(const struct pktinfo *pi, struct mbuf *m) */ if (pi->pi_flags & PKTINFO_FLAG_FRAG) { m->m_pkthdr.hash = 0; - return &netisr_cpu[0].td_msgport; + return cpu_portfn(0); } switch (pi->pi_l3proto) { case IPPROTO_TCP: - port = &tcp_thread[m->m_pkthdr.hash].td_msgport; + port = cpu_portfn(m->m_pkthdr.hash); break; case IPPROTO_UDP: - port = &udp_thread[m->m_pkthdr.hash].td_msgport; + port = cpu_portfn(m->m_pkthdr.hash); break; default: @@ -348,6 +366,8 @@ ip_mport_pktinfo(const struct pktinfo *pi, struct mbuf *m) return port; } +#endif + /* * Initital port when creating the socket, generally before * binding or connect. @@ -355,7 +375,7 @@ ip_mport_pktinfo(const struct pktinfo *pi, struct mbuf *m) lwkt_port_t tcp_soport_attach(struct socket *so) { - return(&tcp_thread[0].td_msgport); + return(cpu_portfn(0)); } /* @@ -406,27 +426,25 @@ tcp_ctlport(int cmd, struct sockaddr *sa, void *vip) cpu = tcp_addrcpu(faddr.s_addr, th->th_dport, ip->ip_src.s_addr, th->th_sport); } - return(&tcp_thread[cpu].td_msgport); + return(cpu_portfn(cpu)); } lwkt_port_t tcp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) { - return (&tcp_thread[tcp_addrcpu(faddr, fport, - laddr, lport)].td_msgport); + return(cpu_portfn(tcp_addrcpu(faddr, fport, laddr, lport))); } lwkt_port_t tcp_addrport0(void) { - return (&tcp_thread[0].td_msgport); + return(cpu_portfn(0)); } lwkt_port_t udp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) { - return (&udp_thread[udp_addrcpu(faddr, fport, - laddr, lport)].td_msgport); + return(cpu_portfn(udp_addrcpu(faddr, fport, laddr, lport))); } /* @@ -436,7 +454,7 @@ udp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) lwkt_port_t udp_soport_attach(struct socket *so) { - return(&udp_thread[0].td_msgport); + return(cpu_portfn(0)); } /* @@ -487,61 +505,5 @@ udp_ctlport(int cmd, struct sockaddr *sa, void *vip) cpu = INP_MPORT_HASH_UDP(faddr.s_addr, ip->ip_src.s_addr, uh->uh_dport, uh->uh_sport); } - return (&udp_thread[cpu].td_msgport); -} - -/* - * Map a network address to a processor. - */ -int -tcp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) -{ - return (INP_MPORT_HASH_TCP(faddr, laddr, fport, lport)); -} - -int -udp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) -{ - return (INP_MPORT_HASH_UDP(faddr, laddr, fport, lport)); -} - -/* - * Return LWKT port for cpu. - */ -lwkt_port_t -tcp_cport(int cpu) -{ - return (&tcp_thread[cpu].td_msgport); -} - -lwkt_port_t -udp_cport(int cpu) -{ - return (&udp_thread[cpu].td_msgport); -} - -void -tcp_thread_init(void) -{ - int cpu; - - for (cpu = 0; cpu < ncpus2; cpu++) { - lwkt_create(tcpmsg_service_loop, NULL, NULL, - &tcp_thread[cpu], TDF_NETWORK, cpu, - "tcp_thread %d", cpu); - netmsg_service_port_init(&tcp_thread[cpu].td_msgport); - } -} - -void -udp_thread_init(void) -{ - int cpu; - - for (cpu = 0; cpu < ncpus2; cpu++) { - lwkt_create(netmsg_service_loop, &udp_mpsafe_thread, NULL, - &udp_thread[cpu], TDF_NETWORK, cpu, - "udp_thread %d", cpu); - netmsg_service_port_init(&udp_thread[cpu].td_msgport); - } + return (cpu_portfn(cpu)); } diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 07b2c473be..bec5dc2f3a 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -215,7 +215,16 @@ div_soport(struct socket *so, struct sockaddr *nam, struct mbuf **mptr) m->m_pkthdr.rcvif = ifa->ifa_ifp; } - return ip_mport(mptr, dir); + /* + * Recalculate the protocol thread. + */ + ip_cpufn(mptr, 0, dir); + m = *mptr; + if (m) { + KKASSERT(m->m_flash & M_HASH); + return(cpu_portfn(m->m_pkthdr.hash)); + } + return(NULL); } /* @@ -369,7 +378,7 @@ divert_packet(struct mbuf *m, int incoming) nmp = &m->m_hdr.mh_netmsg; netmsg_init(&nmp->nm_netmsg, NULL, &netisr_apanic_rport, - MSGF_MPSAFE, div_packet_handler); + 0, div_packet_handler); nmp->nm_packet = m; msg = &nmp->nm_netmsg.nm_lmsg; @@ -543,7 +552,7 @@ static int div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { - /* Length check already done in ip_mport() */ + /* Length check already done in ip_cpufn() */ KASSERT(m->m_len >= sizeof(struct ip), ("IP header not in one mbuf")); /* Send packet */ diff --git a/sys/netinet/ip_flow.c b/sys/netinet/ip_flow.c index 7603027eb7..5c8f19b645 100644 --- a/sys/netinet/ip_flow.c +++ b/sys/netinet/ip_flow.c @@ -204,7 +204,7 @@ ipflow_fastforward(struct mbuf *m) if (m->m_flags & (M_BCAST | M_MCAST)) return 0; - /* length checks already done in ip_mport() */ + /* length checks already done in ip_cpufn() */ KASSERT(m->m_len >= sizeof(struct ip), ("IP header not in one mbuf")); ip = mtod(m, struct ip *); @@ -215,7 +215,7 @@ ipflow_fastforward(struct mbuf *m) return 0; iplen = ntohs(ip->ip_len); - /* length checks already done in ip_mport() */ + /* length checks already done in ip_cpufn() */ KASSERT(iplen >= sizeof(struct ip), ("total length less then header length")); KASSERT(m->m_pkthdr.len >= iplen, ("mbuf too short")); @@ -573,7 +573,7 @@ ipflow_init(void) for (i = 0; i < ncpus; ++i) { netmsg_init(&ipflow_timo_netmsgs[i], NULL, &netisr_adone_rport, - MSGF_MPSAFE, ipflow_timo_dispatch); + 0, ipflow_timo_dispatch); ksnprintf(oid_name, sizeof(oid_name), "inuse%d", i); diff --git a/sys/netinet/ip_gre.c b/sys/netinet/ip_gre.c index 4eba6de670..7ecf5cbb84 100644 --- a/sys/netinet/ip_gre.c +++ b/sys/netinet/ip_gre.c @@ -206,7 +206,7 @@ gre_input2(struct mbuf *m ,int hlen, u_char proto) bpf_ptap(sc->sc_if.if_bpf, m, &af, sizeof(af)); m->m_pkthdr.rcvif = &sc->sc_if; - netisr_dispatch(isr, m); + netisr_queue(isr, m); return(1); /* packet is done, no further processing needed */ } @@ -278,7 +278,7 @@ gre_mobile_input(struct mbuf *m, ...) m->m_pkthdr.rcvif = &sc->sc_if; - netisr_dispatch(NETISR_IP, m); + netisr_queue(NETISR_IP, m); } /* diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 428f217796..171a39f0d7 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -140,9 +140,6 @@ int rsvp_on = 0; static int ip_rsvp_on; struct socket *ip_rsvpd; -int ip_mpsafe = 1; -TUNABLE_INT("net.inet.ip.mpsafe", &ip_mpsafe); - int ipforwarding = 0; SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, &ipforwarding, 0, "Enable IP forwarding between interfaces"); @@ -206,15 +203,25 @@ static int ip_checkinterface = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, &ip_checkinterface, 0, "Verify packet arrives on correct interface"); +static int ip_dispatch_fast = 0; +static int ip_dispatch_slow = 0; +static int ip_dispatch_recheck = 0; +static int ip_dispatch_software = 0; +SYSCTL_INT(_net_inet_ip, OID_AUTO, dispatch_fast_count, CTLFLAG_RW, + &ip_dispatch_fast, 0, ""); +SYSCTL_INT(_net_inet_ip, OID_AUTO, dispatch_slow_count, CTLFLAG_RW, + &ip_dispatch_slow, 0, ""); +SYSCTL_INT(_net_inet_ip, OID_AUTO, dispatch_software_count, CTLFLAG_RW, + &ip_dispatch_software, 0, ""); +SYSCTL_INT(_net_inet_ip, OID_AUTO, dispatch_recheck_count, CTLFLAG_RW, + &ip_dispatch_recheck, 0, ""); + static struct lwkt_token ipq_token = LWKT_TOKEN_MP_INITIALIZER(ipq_token); #ifdef DIAGNOSTIC static int ipprintfs = 0; #endif -extern int udp_mpsafe_proto; -extern int tcp_mpsafe_proto; - extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; @@ -315,7 +322,6 @@ void ip_init(void) { struct protosw *pr; - uint32_t flags; int i; #ifdef SMP int cpu; @@ -342,19 +348,6 @@ ip_init(void) if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol) { if (pr->pr_protocol != IPPROTO_RAW) ip_protox[pr->pr_protocol] = pr - inetsw; - - /* XXX */ - switch (pr->pr_protocol) { - case IPPROTO_TCP: - if (tcp_mpsafe_proto) - pr->pr_flags |= PR_MPSAFE; - break; - - case IPPROTO_UDP: - if (udp_mpsafe_proto) - pr->pr_flags |= PR_MPSAFE; - break; - } } } @@ -385,19 +378,7 @@ ip_init(void) bzero(&ipstat, sizeof(struct ip_stats)); #endif -#if defined(IPSEC) || defined(FAST_IPSEC) - /* XXX IPSEC is not MPSAFE yet */ - flags = NETISR_FLAG_NOTMPSAFE; -#else - if (ip_mpsafe) { - kprintf("ip: MPSAFE\n"); - flags = NETISR_FLAG_MPSAFE; - } else { - flags = NETISR_FLAG_NOTMPSAFE; - } -#endif - netisr_register(NETISR_IP, ip_mport_in, ip_mport_pktinfo, - ip_input_handler, flags); + netisr_register(NETISR_IP, ip_input_handler, ip_cpufn_in); } /* Do transport protocol processing. */ @@ -464,16 +445,16 @@ ip_input(struct mbuf *m) M_ASSERTPKTHDR(m); /* - * This does necessary pullups and figures out the protocol - * port. If the packet is really badly formed it will blow - * it away and return NULL. - * - * We do not necessarily make use of the port (forwarding, - * defragmentation, etc). + * This routine is called from numerous places which may not have + * characterized the packet. */ - port = ip_mport(&m, IP_MPORT_IN); - if (port == NULL) - return; + if ((m->m_flags & M_HASH) == 0) { + ++ip_dispatch_software; + ip_cpufn(&m, 0, IP_MPORT_IN); + if (m == NULL) + return; + KKASSERT(m->m_flags & M_HASH); + } ip = mtod(m, struct ip *); /* @@ -495,7 +476,7 @@ ip_input(struct mbuf *m) ipstat.ips_total++; - /* length checks already done in ip_mport() */ + /* length checks already done in ip_cpufn() */ KASSERT(m->m_len >= sizeof(struct ip), ("IP header not in one mbuf")); if (IP_VHL_V(ip->ip_vhl) != IPVERSION) { @@ -504,7 +485,7 @@ ip_input(struct mbuf *m) } hlen = IP_VHL_HL(ip->ip_vhl) << 2; - /* length checks already done in ip_mport() */ + /* length checks already done in ip_cpufn() */ KASSERT(hlen >= sizeof(struct ip), ("IP header len too small")); KASSERT(m->m_len >= hlen, ("complete IP header not in one mbuf")); @@ -542,7 +523,7 @@ ip_input(struct mbuf *m) ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); - /* length checks already done in ip_mport() */ + /* length checks already done in ip_cpufn() */ KASSERT(ip->ip_len >= hlen, ("total length less then header length")); KASSERT(m->m_pkthdr.len >= ip->ip_len, ("mbuf too short")); @@ -602,9 +583,8 @@ iphack: /* * Run through list of hooks for input packets. * - * NB: Beware of the destination address changing (e.g. - * by NAT rewriting). When this happens, tell - * ip_forward to do the right thing. + * NOTE! If the packet is rewritten pf/ipfw/whoever must + * clear M_HASH. */ odst = ip->ip_dst; if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN)) @@ -870,8 +850,11 @@ ours: */ if (ip->ip_off & (IP_MF | IP_OFFMASK)) { /* - * Attempt reassembly; if it succeeds, proceed. - * ip_reass() will return a different mbuf. + * Attempt reassembly; if it succeeds, proceed. ip_reass() + * will return a different mbuf. + * + * NOTE: ip_reass() returns m with M_HASH cleared to force + * us to recharacterize the packet. */ m = ip_reass(m); if (m == NULL) @@ -937,25 +920,45 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ #endif /* FAST_IPSEC */ /* - * NOTE: ip_len is now in host form and adjusted down by hlen for - * protocol processing. - * * We must forward the packet to the correct protocol thread if * we are not already in it. + * + * NOTE: ip_len is now in host form. ip_len is not adjusted + * further for protocol processing, instead we pass hlen + * to the protosw and let it deal with it. */ ipstat.ips_delivered++; + if ((m->m_flags & M_HASH) == 0) { + ++ip_dispatch_recheck; + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + + ip_cpufn(&m, 0, IP_MPORT_IN); + if (m == NULL) + return; + + ip = mtod(m, struct ip *); + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + KKASSERT(m->m_flags & M_HASH); + } + port = cpu_portfn(m->m_pkthdr.hash); + if (port != &curthread->td_msgport) { struct netmsg_packet *pmsg; + ++ip_dispatch_slow; + pmsg = &m->m_hdr.mh_netmsg; netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, - MSGF_MPSAFE, transport_processing_handler); + 0, transport_processing_handler); pmsg->nm_packet = m; pmsg->nm_netmsg.nm_lmsg.u.ms_result = hlen; lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg); } else { + ++ip_dispatch_fast; transport_processing_oncpu(m, hlen, ip); } return; @@ -1242,6 +1245,16 @@ inserted: m->m_pkthdr.len = plen; } + /* + * Reassembly complete, return the next protocol. + * + * Be sure to clear M_HASH to force the packet + * to be re-characterized. + * + * Clear M_FRAG, we are no longer a fragment. + */ + m->m_flags &= ~(M_HASH | M_FRAG); + ipstat.ips_reassembled++; lwkt_reltoken(&ipq_token); return (m); diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 8be331bfa8..140ccbaff4 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -169,7 +169,7 @@ extern struct ip_stats ipstats_percpu[MAXCPU]; #define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */ #define IP_DEBUGROUTE 0x10000 /* debug route */ -/* direction passed to ip_mport as last parameter */ +/* direction passed to ip_cpufn as last parameter */ #define IP_MPORT_IN 0 /* Find lwkt port for incoming packets */ #define IP_MPORT_OUT 1 /* Find lwkt port for outgoing packets */ @@ -202,12 +202,11 @@ void ip_init(void); extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); -struct lwkt_port *ip_mport(struct mbuf **, int); -struct lwkt_port *ip_mport_in(struct mbuf **); -struct lwkt_port *ip_mport_pktinfo(const struct pktinfo *, struct mbuf *); +void ip_cpufn(struct mbuf **, int, int); +void ip_cpufn_in(struct mbuf **, int); boolean_t - ip_lengthcheck(struct mbuf **); + ip_lengthcheck(struct mbuf **, int); int ip_output(struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *, struct inpcb *); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7f07a7d70d..15cf85aa03 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -152,23 +152,16 @@ #define KTR_TCP KTR_ALL #endif KTR_INFO_MASTER(tcp); +/* KTR_INFO(KTR_TCP, tcp, rxmsg, 0, "tcp getmsg", 0); KTR_INFO(KTR_TCP, tcp, wait, 1, "tcp waitmsg", 0); KTR_INFO(KTR_TCP, tcp, delayed, 2, "tcp execute delayed ops", 0); +*/ #define logtcp(name) KTR_LOG(tcp_ ## name) struct inpcbinfo tcbinfo[MAXCPU]; struct tcpcbackqhead tcpcbackq[MAXCPU]; -int tcp_mpsafe_proto = 0; -TUNABLE_INT("net.inet.tcp.mpsafe_proto", &tcp_mpsafe_proto); - -static int tcp_mpsafe_thread = NETMSG_SERVICE_ADAPTIVE; -TUNABLE_INT("net.inet.tcp.mpsafe_thread", &tcp_mpsafe_thread); -SYSCTL_INT(_net_inet_tcp, OID_AUTO, mpsafe_thread, CTLFLAG_RW, - &tcp_mpsafe_thread, 0, - "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); - int tcp_mssdflt = TCP_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, &tcp_mssdflt, 0, "Default TCP Maximum Segment Size"); @@ -254,7 +247,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW, static MALLOC_DEFINE(M_TCPTEMP, "tcptemp", "TCP Templates for Keepalives"); static struct malloc_pipe tcptemp_mpipe; -static void tcp_willblock(int); +static void tcp_willblock(void); static void tcp_notify (struct inpcb *, int); struct tcp_stats tcpstats_percpu[MAXCPU]; @@ -392,48 +385,14 @@ tcp_init(void) #endif syncache_init(); - tcp_thread_init(); -} - -void -tcpmsg_service_loop(void *dummy) -{ - struct netmsg *msg; - int mplocked; - - /* - * Threads always start mpsafe. - */ - mplocked = 0; - - while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { - do { - logtcp(rxmsg); - mplocked = netmsg_service(msg, tcp_mpsafe_thread, - mplocked); - } while ((msg = lwkt_getport(&curthread->td_msgport)) != NULL); - - logtcp(delayed); - tcp_willblock(mplocked); - logtcp(wait); - } + netisr_register_rollup(tcp_willblock); } static void -tcp_willblock(int mplocked) +tcp_willblock(void) { struct tcpcb *tp; int cpu = mycpu->gd_cpuid; - int unlock = 0; - - if (!mplocked && !tcp_mpsafe_proto) { - if (TAILQ_EMPTY(&tcpcbackq[cpu])) - return; - - get_mplock(); - mplocked = 1; - unlock = 1; - } while ((tp = TAILQ_FIRST(&tcpcbackq[cpu])) != NULL) { KKASSERT(tp->t_flags & TF_ONOUTPUTQ); @@ -441,12 +400,8 @@ tcp_willblock(int mplocked) TAILQ_REMOVE(&tcpcbackq[cpu], tp, t_outputq); tcp_output(tp); } - - if (unlock) - rel_mplock(); } - /* * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. * tcp_template used to store this data in mbufs, but we now recopy it out @@ -819,7 +774,7 @@ in_pcbremwildcardhash_handler(struct netmsg *msg0) in_pcbremwildcardhash_oncpu(msg->nm_inp, msg->nm_pcbinfo); cpu = (cpu + 1) % ncpus2; msg->nm_pcbinfo = &tcbinfo[cpu]; - lwkt_forwardmsg(tcp_cport(cpu), &msg->nm_netmsg.nm_lmsg); + lwkt_forwardmsg(cpu_portfn(cpu), &msg->nm_netmsg.nm_lmsg); } } @@ -1021,7 +976,7 @@ no_valid_rt: #endif msg->nm_inp = inp; msg->nm_pcbinfo = &tcbinfo[cpu]; - lwkt_sendmsg(tcp_cport(cpu), &msg->nm_netmsg.nm_lmsg); + lwkt_sendmsg(cpu_portfn(cpu), &msg->nm_netmsg.nm_lmsg); } else #endif { @@ -1118,7 +1073,7 @@ tcp_drain(void) netmsg_init(&msg->nm_netmsg, NULL, &netisr_afree_rport, 0, tcp_drain_handler); msg->nm_head = &tcbinfo[cpu].pcblisthead; - lwkt_sendmsg(tcp_cport(cpu), &msg->nm_netmsg.nm_lmsg); + lwkt_sendmsg(cpu_portfn(cpu), &msg->nm_netmsg.nm_lmsg); } } #else @@ -1374,7 +1329,7 @@ tcp_notifyall_oncpu(struct netmsg *netmsg) nextcpu = mycpuid + 1; if (nextcpu < ncpus2) - lwkt_forwardmsg(tcp_cport(nextcpu), &netmsg->nm_lmsg); + lwkt_forwardmsg(cpu_portfn(nextcpu), &netmsg->nm_lmsg); else lwkt_replymsg(&netmsg->nm_lmsg, 0); } @@ -1458,7 +1413,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) nmsg.nm_arg = arg; nmsg.nm_notify = notify; - lwkt_domsg(tcp_cport(0), &nmsg.nm_nmsg.nm_lmsg, 0); + lwkt_domsg(cpu_portfn(0), &nmsg.nm_nmsg.nm_lmsg, 0); } } diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index bc5fc02946..88bb81b5ad 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -332,9 +332,9 @@ syncache_init(void) callout_init(&syncache_percpu->tt_timerq[i]); syncache_percpu->mrec[i].slot = i; - syncache_percpu->mrec[i].port = tcp_cport(cpu); + syncache_percpu->mrec[i].port = cpu_portfn(cpu); syncache_percpu->mrec[i].msg.nm_mrec = - &syncache_percpu->mrec[i]; + &syncache_percpu->mrec[i]; netmsg_init(&syncache_percpu->mrec[i].msg.nm_netmsg, NULL, &syncache_null_rport, 0, syncache_timer_handler); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 2493b258ec..7f46ae01de 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -387,7 +387,7 @@ tcp_usr_listen(struct socket *so, struct thread *td) 0, in_pcbinswildcardhash_handler); msg->nm_inp = inp; msg->nm_pcbinfo = &tcbinfo[cpu]; - lwkt_sendmsg(tcp_cport(cpu), &msg->nm_netmsg.nm_lmsg); + lwkt_sendmsg(cpu_portfn(cpu), &msg->nm_netmsg.nm_lmsg); } #else in_pcbinswildcardhash(inp); @@ -436,7 +436,7 @@ tcp6_usr_listen(struct socket *so, struct thread *td) 0, in_pcbinswildcardhash_handler); msg->nm_inp = inp; msg->nm_pcbinfo = &tcbinfo[cpu]; - lwkt_sendmsg(tcp_cport(cpu), &msg->nm_netmsg.nm_lmsg); + lwkt_sendmsg(cpu_portfn(cpu), &msg->nm_netmsg.nm_lmsg); } #else in_pcbinswildcardhash(inp); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5b10d87255..447e8e493e 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -574,7 +574,6 @@ struct lwkt_port *tcp_addrport0(void); void tcp_canceltimers (struct tcpcb *); struct tcpcb * tcp_close (struct tcpcb *); -void tcpmsg_service_loop (void *); void tcp_ctlinput (int, struct sockaddr *, void *); int tcp_ctloutput (struct socket *, struct sockopt *); struct lwkt_port * diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 351965cb54..bce1ecfe46 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -118,15 +118,6 @@ #include #endif -int udp_mpsafe_proto = 0; -TUNABLE_INT("net.inet.udp.mpsafe_proto", &udp_mpsafe_proto); - -int udp_mpsafe_thread = NETMSG_SERVICE_ADAPTIVE; -TUNABLE_INT("net.inet.udp.mpsafe_thread", &udp_mpsafe_thread); -SYSCTL_INT(_net_inet_udp, OID_AUTO, mpsafe_thread, CTLFLAG_RW, - &udp_mpsafe_thread, 0, - "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); - /* * UDP protocol implementation. * Per RFC 768, August, 1980. @@ -198,7 +189,6 @@ udp_init(void) udbinfo.wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.wildcardhashmask); udbinfo.ipi_size = sizeof(struct inpcb); - udp_thread_init(); } /* @@ -627,7 +617,7 @@ udp_notifyall_oncpu(struct netmsg *netmsg) nextcpu = mycpuid + 1; if (nextcpu < ncpus2) - lwkt_forwardmsg(udp_cport(nextcpu), &netmsg->nm_lmsg); + lwkt_forwardmsg(cpu_portfn(nextcpu), &netmsg->nm_lmsg); else lwkt_replymsg(&netmsg->nm_lmsg, 0); } @@ -688,13 +678,13 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) nmsg.nm_arg = inetctlerrmap[cmd]; nmsg.nm_notify = notify; - lwkt_domsg(udp_cport(0), &nmsg.nm_nmsg.nm_lmsg, 0); + lwkt_domsg(cpu_portfn(0), &nmsg.nm_nmsg.nm_lmsg, 0); } else { /* * XXX We should forward msg upon PRC_HOSTHEAD and ip == NULL, * once UDP inpcbs are CPU localized */ - KKASSERT(&curthread->td_msgport == udp_cport(0)); + KKASSERT(&curthread->td_msgport == cpu_portfn(0)); in_pcbnotifyall(&udbinfo.pcblisthead, faddr, inetctlerrmap[cmd], notify); } diff --git a/sys/netinet6/ah_input.c b/sys/netinet6/ah_input.c index a1cddae9e4..b81e32c8d7 100644 --- a/sys/netinet6/ah_input.c +++ b/sys/netinet6/ah_input.c @@ -536,7 +536,7 @@ ah4_input(struct mbuf *m, ...) ipsecstat.in_polvio++; goto fail; } - if (!ip_lengthcheck(&m)) { + if (!ip_lengthcheck(&m, 0)) { /* freed in ip_lengthcheck() */ goto fail; } diff --git a/sys/netinet6/esp_input.c b/sys/netinet6/esp_input.c index ae21ed2f72..fa4900c634 100644 --- a/sys/netinet6/esp_input.c +++ b/sys/netinet6/esp_input.c @@ -434,7 +434,7 @@ noreplaycheck: ipsecstat.in_polvio++; goto bad; } - if (!ip_lengthcheck(&m)) { + if (!ip_lengthcheck(&m, 0)) { /* freed in ip_lengthcheck() */ goto bad; } diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index a73a8a69e6..614eab2f65 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -520,8 +520,11 @@ insert: in6_ifstat_inc(dstifp, ifs6_reass_ok); /* - * Tell launch routine the next header + * Reassembly complete, return the next protocol. + * Be sure to clear M_HASH to force the packet + * to be re-characterized. */ + m->m_flags &= ~M_HASH; *mp = m; *offp = offset; diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index aeb636177a..badf0f7f87 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -163,13 +163,16 @@ struct ip6protosw inet6sw[] = { ip6_init, 0, frag6_slowtimo, frag6_drain, &nousrreqs, }, -{ SOCK_DGRAM, &inet6domain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, +{ SOCK_DGRAM, &inet6domain, IPPROTO_UDP, PR_ATOMIC | PR_ADDR | + PR_MPSAFE | PR_LASTHDR, udp6_input, 0, udp6_ctlinput, ip6_ctloutput, cpu0_soport, cpu0_ctlport, 0, 0, 0, 0, &udp6_usrreqs, }, -{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_LASTHDR, +{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED | + PR_WANTRCVD | PR_LISTEN | + PR_MPSAFE | PR_LASTHDR, tcp6_input, 0, tcp6_ctlinput, tcp_ctloutput, tcp6_soport, cpu0_ctlport, #ifdef INET /* don't call initialization and timeout routines twice */ diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index fa325f5649..d240870fb6 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -200,8 +200,7 @@ ip6_init(void) "error %d\n", __func__, i); } - netisr_register(NETISR_IPV6, cpu0_portfn, pktinfo_portfn_cpu0, - ip6_input, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_IPV6, ip6_input, NULL); /* XXX cpufn */ scope6_init(); addrsel_policy_init(); nd6_init(); @@ -866,7 +865,7 @@ hbhcheck: pmsg = &m->m_hdr.mh_netmsg; netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, - MSGF_MPSAFE, transport6_processing_handler); + 0, transport6_processing_handler); pmsg->nm_packet = m; pmsg->nm_nxt = nxt; pmsg->nm_netmsg.nm_lmsg.u.ms_result = off; diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index dad11950b4..adb2e71473 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -290,7 +290,7 @@ struct ip6aux { #define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ #define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */ -/* direction passed to ip_mport as last parameter */ +/* direction passed to ip_cpufn as last parameter */ #define IP6_MPORT_IN 0 /* Find lwkt port for incoming packets */ #define IP6_MPORT_OUT 1 /* Find lwkt port for outgoing packets */ diff --git a/sys/netproto/atalk/aarp.c b/sys/netproto/atalk/aarp.c index 86f3632492..79f75f381a 100644 --- a/sys/netproto/atalk/aarp.c +++ b/sys/netproto/atalk/aarp.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -250,6 +251,8 @@ aarpintr(struct netmsg *msg) struct arphdr *ar; struct arpcom *ac; + get_mplock(); + ac = (struct arpcom *)m->m_pkthdr.rcvif; if ( ac->ac_if.if_flags & IFF_NOARP ) goto out; @@ -280,7 +283,7 @@ aarpintr(struct netmsg *msg) out: m_freem(m); out2: - ; + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } diff --git a/sys/netproto/atalk/ddp_input.c b/sys/netproto/atalk/ddp_input.c index 926cb9d613..1302048b30 100644 --- a/sys/netproto/atalk/ddp_input.c +++ b/sys/netproto/atalk/ddp_input.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -46,7 +47,9 @@ at2intr(struct netmsg *msg) /* * Phase 2 packet handling */ + get_mplock(); ddp_input(m, m->m_pkthdr.rcvif, NULL, 2); + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } @@ -56,6 +59,8 @@ at1intr(struct netmsg *msg) struct mbuf *m = ((struct netmsg_packet *)msg)->nm_packet; struct elaphdr *elhp, elh; + get_mplock(); + /* * Phase 1 packet handling */ @@ -77,7 +82,7 @@ at1intr(struct netmsg *msg) ddp_input(m, m->m_pkthdr.rcvif, &elh, 1); } out: - ; + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } diff --git a/sys/netproto/atalk/ddp_usrreq.c b/sys/netproto/atalk/ddp_usrreq.c index b07142c409..ae6c86a5c2 100644 --- a/sys/netproto/atalk/ddp_usrreq.c +++ b/sys/netproto/atalk/ddp_usrreq.c @@ -541,12 +541,9 @@ at_setsockaddr(struct socket *so, struct sockaddr **nam) void ddp_init(void) { - netisr_register(NETISR_ATALK1, cpu0_portfn, pktinfo_portfn_cpu0, - at1intr, NETISR_FLAG_NOTMPSAFE); - netisr_register(NETISR_ATALK2, cpu0_portfn, pktinfo_portfn_cpu0, - at2intr, NETISR_FLAG_NOTMPSAFE); - netisr_register(NETISR_AARP, cpu0_portfn, pktinfo_portfn_cpu0, - aarpintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_ATALK1, at1intr, NULL); + netisr_register(NETISR_ATALK2, at2intr, NULL); + netisr_register(NETISR_AARP, aarpintr, NULL); } #if 0 diff --git a/sys/netproto/atm/atm_subr.c b/sys/netproto/atm/atm_subr.c index 0283da2764..fc20ab7631 100644 --- a/sys/netproto/atm/atm_subr.c +++ b/sys/netproto/atm/atm_subr.c @@ -112,8 +112,7 @@ atm_initialize(void) atm_init = 1; atm_intrq.ifq_maxlen = ATM_INTRQ_MAX; - netisr_register(NETISR_ATM, cpu0_portfn, pktinfo_portfn_cpu0, - atm_intr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_ATM, atm_intr, NULL); /* * Initialize subsystems @@ -859,6 +858,7 @@ atm_intr(struct netmsg *msg) /* * Get function to call and token value */ + get_mplock(); KB_DATASTART(m, cp, caddr_t); func = *(atm_intr_func_t *)cp; cp += sizeof(func); @@ -879,6 +879,7 @@ atm_intr(struct netmsg *msg) * Drain any deferred calls */ STACK_DRAIN(); + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } diff --git a/sys/netproto/atm/ipatm/ipatm_input.c b/sys/netproto/atm/ipatm/ipatm_input.c index 68bbbf826f..3f3f95e4bb 100644 --- a/sys/netproto/atm/ipatm/ipatm_input.c +++ b/sys/netproto/atm/ipatm/ipatm_input.c @@ -135,6 +135,6 @@ ipatm_ipinput(struct ip_nif *inp, KBuffer *m) * just call IP directly to avoid the extra unnecessary * kernel scheduling. */ - netisr_dispatch(NETISR_IP, m); + netisr_queue(NETISR_IP, m); return (0); } diff --git a/sys/netproto/ipx/ipx_input.c b/sys/netproto/ipx/ipx_input.c index 2cefe0d7f2..e25999fe4b 100644 --- a/sys/netproto/ipx/ipx_input.c +++ b/sys/netproto/ipx/ipx_input.c @@ -48,6 +48,7 @@ #include #include +#include #include #include @@ -117,8 +118,7 @@ ipx_init(void) ipx_hostmask.sipx_addr.x_net = ipx_broadnet; ipx_hostmask.sipx_addr.x_host = ipx_broadhost; - netisr_register(NETISR_IPX, cpu0_portfn, pktinfo_portfn_cpu0, - ipxintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_IPX, ipxintr, NULL); } /* @@ -133,6 +133,8 @@ ipxintr(struct netmsg *msg) struct ipx_ifaddr *ia; int len; + get_mplock(); + /* * If no IPX addresses have been set yet but the interfaces * are receiving, can't do anything with incoming packets yet. @@ -269,7 +271,7 @@ ours: bad: m_freem(m); out: - ; + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } diff --git a/sys/netproto/ipx/ipx_ip.c b/sys/netproto/ipx/ipx_ip.c index e999c763ab..5602ddffba 100644 --- a/sys/netproto/ipx/ipx_ip.c +++ b/sys/netproto/ipx/ipx_ip.c @@ -218,7 +218,7 @@ ipxip_input(struct mbuf *m, ...) /* * Deliver to IPX */ - netisr_dispatch(NETISR_IPX, m); + netisr_queue(NETISR_IPX, m); } static int diff --git a/sys/netproto/mpls/mpls_demux.c b/sys/netproto/mpls/mpls_demux.c index 447dcb0302..dc2b80c59d 100644 --- a/sys/netproto/mpls/mpls_demux.c +++ b/sys/netproto/mpls/mpls_demux.c @@ -56,53 +56,50 @@ MPLSP_MPORT_HASH(mpls_label_t label, u_short if_index) return ((label ^ if_index) & ncpus2_mask); } -boolean_t -mpls_lengthcheck(struct mbuf **mp) +static void +mpls_lengthcheck(struct mbuf **mp, int hoff) { struct mbuf *m = *mp; + int hlen = hoff + sizeof(struct mpls); /* The packet must be at least the size of an MPLS header. */ - if (m->m_pkthdr.len < sizeof(struct mpls)) { + if (m->m_pkthdr.len < hlen) { mplsstat.mplss_tooshort++; m_free(m); - return FALSE; + *mp = NULL; + return; } /* The MPLS header must reside completely in the first mbuf. */ - if (m->m_len < sizeof(struct mpls)) { - m = m_pullup(m, sizeof(struct mpls)); + if (m->m_len < hlen) { + m = m_pullup(m, hlen); if (m == NULL) { mplsstat.mplss_toosmall++; - return FALSE; + *mp = NULL; + return; } } - *mp = m; - return TRUE; } -struct lwkt_port * -mpls_mport(struct mbuf **mp) +void +mpls_cpufn(struct mbuf **mp, int hoff) { struct mbuf *m = *mp; struct mpls *mpls; mpls_label_t label; struct ifnet *ifp; - int cpu; lwkt_port_t port; - if (!mpls_lengthcheck(mp)) { - *mp = NULL; - return (NULL); - } + mpls_lengthcheck(mp, hoff); + if ((m = *mp) == NULL) + return; - mpls = mtod(m, struct mpls *); + mpls = mtodoff(m, struct mpls *, hoff); label = MPLS_LABEL(ntohl(mpls->mpls_shim)); ifp = m->m_pkthdr.rcvif; - cpu = MPLSP_MPORT_HASH(label, ifp->if_index); - port = &netisr_cpu[cpu].td_msgport; - - return (port); + m->m_pkthdr.hash = MPLSP_MPORT_HASH(label, ifp->if_index); + m->m_flags |= M_HASH; } diff --git a/sys/netproto/mpls/mpls_input.c b/sys/netproto/mpls/mpls_input.c index 4abc650892..5216d064d6 100644 --- a/sys/netproto/mpls/mpls_input.c +++ b/sys/netproto/mpls/mpls_input.c @@ -42,6 +42,9 @@ #include #include +#include +#include + #include #include @@ -76,8 +79,7 @@ mpls_init(void) bzero(&mplsstat, sizeof(struct mpls_stats)); #endif - netisr_register(NETISR_MPLS, mpls_mport, pktinfo_portfn_notsupp, - mpls_input_handler, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_MPLS, mpls_input_handler, mpls_cpufn); } static void @@ -85,7 +87,9 @@ mpls_input_handler(struct netmsg *msg0) { struct mbuf *m = ((struct netmsg_packet *)msg0)->nm_packet; + get_mplock(); mpls_input(m); + rel_mplock(); } void @@ -121,10 +125,7 @@ again: if (MPLS_STACK(ntohl(mpls->mpls_shim))) { /* Decapsulate the ip datagram from the mpls frame. */ m_adj(m, sizeof(struct mpls)); -/* - ip_input(m); -*/ - netisr_dispatch(NETISR_IP, m); + netisr_queue(NETISR_IP, m); return; } goto again; /* If not the bottom label, per RFC4182. */ @@ -143,7 +144,7 @@ again: if (MPLS_STACK(ntohl(mpls->mpls_shim))) { /* Decapsulate the ip datagram from the mpls frame. */ m_adj(m, sizeof(struct mpls)); - netisr_dispatch(NETISR_IPV6, m); + netisr_queue(NETISR_IPV6, m); return; } goto again; /* If not the bottom label, per RFC4182. */ diff --git a/sys/netproto/mpls/mpls_var.h b/sys/netproto/mpls/mpls_var.h index 5f3142d249..46f2a420e4 100644 --- a/sys/netproto/mpls/mpls_var.h +++ b/sys/netproto/mpls/mpls_var.h @@ -60,8 +60,7 @@ struct mpls_stats { extern struct mpls_stats mplsstats_percpu[MAXCPU]; void mpls_init(void); -boolean_t mpls_lengthcheck(struct mbuf **); -struct lwkt_port * mpls_mport(struct mbuf **); +void mpls_cpufn(struct mbuf **, int); void mpls_input(struct mbuf *); int mpls_output(struct mbuf *, struct rtentry *); boolean_t mpls_output_process(struct mbuf *, struct rtentry *); diff --git a/sys/netproto/natm/natm.c b/sys/netproto/natm/natm.c index af9ea7365d..9756b5766b 100644 --- a/sys/netproto/natm/natm.c +++ b/sys/netproto/natm/natm.c @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -746,8 +747,7 @@ static void natmintr(struct netmsg *); static void netisr_natm_setup(void *dummy __unused) { - netisr_register(NETISR_NATM, cpu0_portfn, pktinfo_portfn_cpu0, - natmintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_NATM, natmintr, NULL); } SYSINIT(natm_setup, SI_BOOT2_KLD, SI_ORDER_ANY, netisr_natm_setup, NULL); #endif @@ -757,8 +757,7 @@ natm_init(void) { LIST_INIT(&natm_pcbs); - netisr_register(NETISR_NATM, cpu0_portfn, pktinfo_portfn_cpu0, - natmintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_NATM, natmintr, NULL); } /* @@ -780,6 +779,8 @@ natmintr(struct netmsg *msg) panic("natmintr no HDR"); #endif + get_mplock(); + npcb = (struct natmpcb *) m->m_pkthdr.rcvif; /* XXX: overloaded */ so = npcb->npcb_socket; @@ -823,7 +824,7 @@ m->m_pkthdr.rcvif = NULL; /* null it out to be safe */ m_freem(m); } out: - ; + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } diff --git a/sys/netproto/ns/ns_input.c b/sys/netproto/ns/ns_input.c index 5ea43733a0..f1866d6329 100644 --- a/sys/netproto/ns/ns_input.c +++ b/sys/netproto/ns/ns_input.c @@ -49,6 +49,7 @@ #include #include +#include #include #include @@ -96,8 +97,7 @@ ns_init(void) ns_hostmask.sns_len = 12; ns_hostmask.sns_addr.x_net = ns_broadnet; ns_hostmask.sns_addr.x_host = ns_broadhost; - netisr_register(NETISR_NS, cpu0_portfn, pktinfo_portfn_cpu0, - nsintr, NETISR_FLAG_NOTMPSAFE); + netisr_register(NETISR_NS, nsintr, NULL); } /* @@ -116,6 +116,8 @@ nsintr(struct netmsg *msg) int len, error; char oddpacketp; + get_mplock(); + /* * Get IDP header in first mbuf. */ @@ -233,7 +235,7 @@ nsintr(struct netmsg *msg) bad: m_freem(m); out: - ; + rel_mplock(); /* msg was embedded in the mbuf, do not reply! */ } diff --git a/sys/netproto/ns/ns_ip.c b/sys/netproto/ns/ns_ip.c index dcbb64b9ae..4e22f5da4d 100644 --- a/sys/netproto/ns/ns_ip.c +++ b/sys/netproto/ns/ns_ip.c @@ -227,7 +227,7 @@ idpip_input(struct mbuf *m, ...) /* * Deliver to NS */ - netisr_dispatch(NETISR_NS, m); + netisr_queue(NETISR_NS, m); } /* ARGSUSED */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 0e14fe06ca..135674f58e 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -73,7 +73,8 @@ * mtocl(x) - convert pointer within cluster to cluster index # * cltom(x) - convert cluster # to ptr to beginning of cluster */ -#define mtod(m, t) ((t)((m)->m_data)) +#define mtod(m, t) ((t)((m)->m_data)) +#define mtodoff(m, t, off) ((t)((m)->m_data + (off))) /* * Header present at the beginning of every mbuf. diff --git a/sys/sys/thread.h b/sys/sys/thread.h index e853167dd2..4ec8a9adc6 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -347,11 +347,11 @@ struct thread { #define TDF_BLOCKED 0x00040000 /* Thread is blocked */ #define TDF_PANICWARN 0x00080000 /* panic warning in switch */ #define TDF_BLOCKQ 0x00100000 /* on block queue */ -#define TDF_UNUSED200000 0x00200000 +#define TDF_UNUSED00200000 0x00200000 #define TDF_EXITING 0x00400000 /* thread exiting */ #define TDF_USINGFP 0x00800000 /* thread using fp coproc */ #define TDF_KERNELFP 0x01000000 /* kernel using fp coproc */ -#define TDF_NETWORK 0x02000000 /* network proto thread */ +#define TDF_UNUSED02000000 0x02000000 #define TDF_CRYPTO 0x04000000 /* crypto thread */ #define TDF_MARKER 0x80000000 /* fairq marker thread */ -- 2.41.0