From: Sepherosa Ziehau Date: Tue, 23 Sep 2008 11:28:50 +0000 (+0000) Subject: Add following three network protocol threads running mode: X-Git-Url: https://gitweb.dragonflybsd.org/~lentferj/dragonfly.git/commitdiff_plain/92db3805bd7a1f247cdbdcfd50157ca600010b24 Add following three network protocol threads running mode: 1) BGL (default) 2) Adaptive BGL. Protocol threads run without BGL by default. BGL will be held if the received msg does not have MSGF_MPSAFE turned on the ms_flags field 3) No BGL (experimental) The code on the main path is done by dillon@ Following three sysctls and tunables are added to adjust the "mode": net.netisr.mpsafe_thread net.inet.tcp.mpsafe_thread net.inet.udp.mpsafe_thread They have same set of values, 0 (default) -- BGL 1 -- Adaptive BGL 2 -- No BGL NETISR_FLAG_MPSAFE is added (netisr.ni_flags), so that: - netisr_queue() and schednetisr() could set MSGF_MPSAFE during msg initialization - netisr_run() (called by ether_input_oncpu()) could hold BGL based on this flag before calling netisr's handler PR_MPSAFE is added (protosw.pr_flags), so that tranport_processing_oncpu() could hold BGL before calling protocol's input handler Kernel API changes: - The thread parameter to netmsg_service_loop() must be supplied (running mode) and it must have the type of "int *" - netisr_register() takes additional flags parameter to indicate whether its handler is MPSAFE (NETISR_FLAG_MPSAFE) or not Reviewed-by: dillon@ --- diff --git a/sys/bus/usb/usb_ethersubr.c b/sys/bus/usb/usb_ethersubr.c index ca993ad780..6216b4aac0 100644 --- a/sys/bus/usb/usb_ethersubr.c +++ b/sys/bus/usb/usb_ethersubr.c @@ -31,7 +31,7 @@ * * * $FreeBSD: src/sys/dev/usb/usb_ethersubr.c,v 1.17 2003/11/14 11:09:45 johan Exp $ - * $DragonFly: src/sys/bus/usb/usb_ethersubr.c,v 1.19 2007/06/28 06:32:31 hasso Exp $ + * $DragonFly: src/sys/bus/usb/usb_ethersubr.c,v 1.20 2008/09/23 11:28:49 sephe Exp $ */ /* @@ -91,7 +91,7 @@ usb_register_netisr(void) { if (netisr_inited == 0) { netisr_inited = 1; - netisr_register(NETISR_USB, cpu0_portfn, usbintr); + netisr_register(NETISR_USB, cpu0_portfn, usbintr, 0); } } diff --git a/sys/net/bpf.c b/sys/net/bpf.c index a23e2d000c..9ac8291def 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -38,7 +38,7 @@ * @(#)bpf.c 8.2 (Berkeley) 3/28/94 * * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.12 2002/04/14 21:41:48 luigi Exp $ - * $DragonFly: src/sys/net/bpf.c,v 1.49 2008/09/17 13:38:28 sephe Exp $ + * $DragonFly: src/sys/net/bpf.c,v 1.50 2008/09/23 11:28:49 sephe Exp $ */ #include "use_bpf.h" @@ -552,7 +552,7 @@ bpfwrite(struct dev_write_args *ap) if (d->bd_hdrcmplt) dst.sa_family = pseudo_AF_HDRCMPLT; - netmsg_init(&bmsg.nm_netmsg, &curthread->td_msgport, 0, + netmsg_init(&bmsg.nm_netmsg, &curthread->td_msgport, MSGF_MPSAFE, bpf_output_dispatch); bmsg.nm_mbuf = m; bmsg.nm_ifp = ifp; diff --git a/sys/net/if.c b/sys/net/if.c index 7114db8d86..502d3475db 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -32,7 +32,7 @@ * * @(#)if.c 8.3 (Berkeley) 1/4/94 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ - * $DragonFly: src/sys/net/if.c,v 1.79 2008/09/20 04:31:02 sephe Exp $ + * $DragonFly: src/sys/net/if.c,v 1.80 2008/09/23 11:28:49 sephe Exp $ */ #include "opt_compat.h" @@ -163,6 +163,7 @@ struct callout if_slowtimo_timer; int if_index = 0; struct ifnet **ifindex2ifnet = NULL; static struct thread ifnet_threads[MAXCPU]; +static int ifnet_mpsafe_thread = NETMSG_SERVICE_MPSAFE; #define IFQ_KTR_STRING "ifq=%p" #define IFQ_KTR_ARG_SIZE (sizeof(void *)) @@ -2360,8 +2361,8 @@ ifnetinit(void *dummy __unused) for (i = 0; i < ncpus; ++i) { struct thread *thr = &ifnet_threads[i]; - lwkt_create(netmsg_service_loop, NULL, NULL, thr, - TDF_NETWORK | TDF_MPSAFE, i, "ifnet %d", i); + lwkt_create(netmsg_service_loop, &ifnet_mpsafe_thread, NULL, + thr, TDF_NETWORK | TDF_MPSAFE, i, "ifnet %d", i); netmsg_service_port_init(&thr->td_msgport); } } diff --git a/sys/net/netisr.c b/sys/net/netisr.c index 4ff5155de8..5d216b9f74 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -35,7 +35,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/net/netisr.c,v 1.45 2008/09/20 04:31:02 sephe Exp $ + * $DragonFly: src/sys/net/netisr.c,v 1.46 2008/09/23 11:28:49 sephe Exp $ */ #include @@ -56,6 +56,18 @@ #include #include +#define NETISR_GET_MPLOCK(ni) \ +do { \ + if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ + get_mplock(); \ +} while (0) + +#define NETISR_REL_MPLOCK(ni) \ +do { \ + if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ + rel_mplock(); \ +} while (0) + static void netmsg_sync_func(struct netmsg *msg); struct netmsg_port_registration { @@ -76,7 +88,12 @@ lwkt_port netisr_sync_port; static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); static int netisr_mpsafe_thread = 0; -TUNABLE_INT("netisr.mpsafe_thread", &netisr_mpsafe_thread); +TUNABLE_INT("net.netisr.mpsafe_thread", &netisr_mpsafe_thread); + +SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); +SYSCTL_INT(_net_netisr, OID_AUTO, mpsafe_thread, CTLFLAG_RW, + &netisr_mpsafe_thread, 0, + "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); /* * netisr_afree_rport replymsg function, only used to handle async @@ -147,10 +164,9 @@ netisr_init(void) * Create default per-cpu threads for generic protocol handling. */ for (i = 0; i < ncpus; ++i) { - lwkt_create(netisr_mpsafe_thread ? - netmsg_service_loop_mpsafe : netmsg_service_loop, - NULL, NULL, &netisr_cpu[i], - TDF_NETWORK, i, "netisr_cpu %d", i); + lwkt_create(netmsg_service_loop, &netisr_mpsafe_thread, NULL, + &netisr_cpu[i], TDF_NETWORK | TDF_MPSAFE, i, + "netisr_cpu %d", i); netmsg_service_port_init(&netisr_cpu[i].td_msgport); } @@ -235,27 +251,77 @@ netmsg_sync_func(struct netmsg *msg) } /* - * Generic netmsg service loop. Some protocols may roll their own but all - * must do the basic command dispatch function call done here. + * Return current BGL lock state (1:locked, 0: unlocked) */ -void -netmsg_service_loop(void *arg) +int +netmsg_service(struct netmsg *msg, int mpsafe_mode, int mplocked) { - struct netmsg *msg; - - while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { + /* + * Adjust the mplock dynamically. + */ + switch (mpsafe_mode) { + case NETMSG_SERVICE_ADAPTIVE: /* Adaptive BGL */ + if (msg->nm_lmsg.ms_flags & MSGF_MPSAFE) { + if (mplocked) { + rel_mplock(); + mplocked = 0; + } + msg->nm_dispatch(msg); + /* Leave mpunlocked */ + } else { + if (!mplocked) { + get_mplock(); + /* mplocked = 1; not needed */ + } + msg->nm_dispatch(msg); + rel_mplock(); + mplocked = 0; + /* Leave mpunlocked, next msg might be mpsafe */ + } + break; + + case NETMSG_SERVICE_MPSAFE: /* No BGL */ + if (mplocked) { + rel_mplock(); + mplocked = 0; + } + msg->nm_dispatch(msg); + /* Leave mpunlocked */ + break; + + default: /* BGL */ + if (!mplocked) { + get_mplock(); + mplocked = 1; + } msg->nm_dispatch(msg); + /* Leave mplocked */ + break; } + return mplocked; } /* - * MPSAFE version of netmsg_service_loop() + * Generic netmsg service loop. Some protocols may roll their own but all + * must do the basic command dispatch function call done here. */ void -netmsg_service_loop_mpsafe(void *arg) +netmsg_service_loop(void *arg) { - rel_mplock(); - netmsg_service_loop(arg); + struct netmsg *msg; + int mplocked, *mpsafe_mode = arg; + + /* + * Thread was started with TDF_MPSAFE + */ + mplocked = 0; + + /* + * Loop on netmsgs + */ + while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { + mplocked = netmsg_service(msg, *mpsafe_mode, mplocked); + } } /* @@ -296,7 +362,9 @@ netisr_queue(int num, struct mbuf *m) pmsg = &m->m_hdr.mh_netmsg; - netmsg_init(&pmsg->nm_netmsg, &netisr_apanic_rport, 0, ni->ni_handler); + netmsg_init(&pmsg->nm_netmsg, &netisr_apanic_rport, + (ni->ni_flags & NETISR_FLAG_MPSAFE) ? MSGF_MPSAFE : 0, + ni->ni_handler); pmsg->nm_packet = m; pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg); @@ -304,13 +372,16 @@ netisr_queue(int num, struct mbuf *m) } void -netisr_register(int num, lwkt_portfn_t mportfn, netisr_fn_t handler) +netisr_register(int num, lwkt_portfn_t mportfn, netisr_fn_t handler, + uint32_t flags) { KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), ("netisr_register: bad isr %d", num)); - netmsg_init(&netisrs[num].ni_netmsg, &netisr_adone_rport, 0, NULL); + netmsg_init(&netisrs[num].ni_netmsg, &netisr_adone_rport, + (flags & NETISR_FLAG_MPSAFE) ? MSGF_MPSAFE : 0, NULL); netisrs[num].ni_mport = mportfn; netisrs[num].ni_handler = handler; + netisrs[num].ni_flags = flags; } int @@ -446,5 +517,7 @@ netisr_run(int num, struct mbuf *m) pmsg->nm_packet = m; pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; + NETISR_GET_MPLOCK(ni); ni->ni_handler(&pmsg->nm_netmsg); + NETISR_REL_MPLOCK(ni); } diff --git a/sys/net/netisr.h b/sys/net/netisr.h index 65fe8816d8..1fb3d3be09 100644 --- a/sys/net/netisr.h +++ b/sys/net/netisr.h @@ -65,7 +65,7 @@ * * @(#)netisr.h 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/net/netisr.h,v 1.21.2.5 2002/02/09 23:02:39 luigi Exp $ - * $DragonFly: src/sys/net/netisr.h,v 1.35 2008/09/17 07:24:18 sephe Exp $ + * $DragonFly: src/sys/net/netisr.h,v 1.36 2008/09/23 11:28:49 sephe Exp $ */ #ifndef _NET_NETISR_H_ @@ -203,12 +203,18 @@ struct netisr { lwkt_portfn_t ni_mport; netisr_fn_t ni_handler; struct netmsg ni_netmsg; /* for sched_netisr() (no-data) */ + uint32_t ni_flags; /* NETISR_FLAG_ */ }; +#define NETISR_FLAG_MPSAFE 0x1 + #endif #ifdef _KERNEL +#define NETMSG_SERVICE_ADAPTIVE 1 +#define NETMSG_SERVICE_MPSAFE 2 + extern lwkt_port netisr_adone_rport; extern lwkt_port netisr_afree_rport; extern lwkt_port netisr_apanic_rport; @@ -219,11 +225,11 @@ lwkt_port_t netisr_find_port(int, struct mbuf **); void netisr_dispatch(int, struct mbuf *); void netisr_run(int, struct mbuf *); int netisr_queue(int, struct mbuf *); -void netisr_register(int, lwkt_portfn_t, netisr_fn_t); +void netisr_register(int, lwkt_portfn_t, netisr_fn_t, uint32_t); int netisr_unregister(int); void netmsg_service_port_init(lwkt_port_t); void netmsg_service_loop(void *arg); -void netmsg_service_loop_mpsafe(void *arg); +int netmsg_service(struct netmsg *, int, int); void netmsg_service_sync(void); void schednetisr(int); diff --git a/sys/net/netmsg.h b/sys/net/netmsg.h index e279feb50f..684570c905 100644 --- a/sys/net/netmsg.h +++ b/sys/net/netmsg.h @@ -27,7 +27,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $DragonFly: src/sys/net/netmsg.h,v 1.8 2008/09/17 11:22:13 sephe Exp $ + * $DragonFly: src/sys/net/netmsg.h,v 1.9 2008/09/23 11:28:49 sephe Exp $ */ #ifndef _NET_NETMSG_H_ @@ -52,6 +52,8 @@ typedef struct netmsg { netisr_fn_t nm_dispatch; } *netmsg_t; +#define MSGF_MPSAFE MSGF_USER0 + #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) /* diff --git a/sys/net/ppp/if_ppp.c b/sys/net/ppp/if_ppp.c index d22d9a41b9..48777f7a8f 100644 --- a/sys/net/ppp/if_ppp.c +++ b/sys/net/ppp/if_ppp.c @@ -70,7 +70,7 @@ */ /* $FreeBSD: src/sys/net/if_ppp.c,v 1.67.2.4 2002/04/14 21:41:48 luigi Exp $ */ -/* $DragonFly: src/sys/net/ppp/if_ppp.c,v 1.39 2008/07/27 10:06:57 sephe Exp $ */ +/* $DragonFly: src/sys/net/ppp/if_ppp.c,v 1.40 2008/09/23 11:28:49 sephe Exp $ */ /* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */ /* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */ @@ -256,7 +256,7 @@ pppattach(void *dummy) if_attach(&sc->sc_if, NULL); bpfattach(&sc->sc_if, DLT_PPP, PPP_HDRLEN); } - netisr_register(NETISR_PPP, cpu0_portfn, pppintr); + netisr_register(NETISR_PPP, cpu0_portfn, pppintr, 0); /* * XXX layering violation - if_ppp can work over any lower level * transport that cares to attach to it. diff --git a/sys/netbt/bt_proto.c b/sys/netbt/bt_proto.c index cfebb7cbef..c6ae1d6147 100644 --- a/sys/netbt/bt_proto.c +++ b/sys/netbt/bt_proto.c @@ -1,4 +1,4 @@ -/* $DragonFly: src/sys/netbt/bt_proto.c,v 1.4 2008/04/20 13:44:25 swildner Exp $ */ +/* $DragonFly: src/sys/netbt/bt_proto.c,v 1.5 2008/09/23 11:28:49 sephe Exp $ */ /* $OpenBSD: bt_proto.c,v 1.4 2007/06/24 20:55:27 uwe Exp $ */ /* @@ -234,7 +234,7 @@ SYSCTL_INT(_net_bluetooth_sco, OID_AUTO, recvspace, CTLFLAG_RW, &sco_recvspace, static void netisr_netbt_setup(void *dummy __unused) { - netisr_register(NETISR_BLUETOOTH, cpu0_portfn, btintr); + netisr_register(NETISR_BLUETOOTH, cpu0_portfn, btintr, 0); } SYSINIT(netbt_setup, SI_BOOT2_KLD, SI_ORDER_ANY, netisr_netbt_setup, NULL); diff --git a/sys/netgraph/netgraph/ng_base.c b/sys/netgraph/netgraph/ng_base.c index d0acda554c..e1ed515748 100644 --- a/sys/netgraph/netgraph/ng_base.c +++ b/sys/netgraph/netgraph/ng_base.c @@ -38,7 +38,7 @@ * Archie Cobbs * * $FreeBSD: src/sys/netgraph/ng_base.c,v 1.11.2.17 2002/07/02 23:44:02 archie Exp $ - * $DragonFly: src/sys/netgraph/netgraph/ng_base.c,v 1.26 2008/01/05 14:02:39 swildner Exp $ + * $DragonFly: src/sys/netgraph/netgraph/ng_base.c,v 1.27 2008/09/23 11:28:49 sephe Exp $ * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $ */ @@ -1839,7 +1839,7 @@ ngb_mod_event(module_t mod, int event, void *data) crit_exit(); break; } - netisr_register(NETISR_NETGRAPH, cpu0_portfn, ngintr); + netisr_register(NETISR_NETGRAPH, cpu0_portfn, ngintr, 0); error = 0; crit_exit(); break; diff --git a/sys/netgraph7/ng_base.c b/sys/netgraph7/ng_base.c index 7d1b4faa37..167b91a3a9 100644 --- a/sys/netgraph7/ng_base.c +++ b/sys/netgraph7/ng_base.c @@ -39,7 +39,7 @@ * Archie Cobbs * * $FreeBSD: src/sys/netgraph/ng_base.c,v 1.159 2008/04/19 05:30:49 mav Exp $ - * $DragonFly: src/sys/netgraph7/ng_base.c,v 1.2 2008/06/26 23:05:35 dillon Exp $ + * $DragonFly: src/sys/netgraph7/ng_base.c,v 1.3 2008/09/23 11:28:49 sephe Exp $ * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $ */ @@ -3068,8 +3068,7 @@ ngb_mod_event(module_t mod, int event, void *data) ng_qdzone = uma_zcreate("NetGraph data items", sizeof(struct ng_item), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(ng_qdzone, maxdata); - netisr_register(NETISR_NETGRAPH, (netisr_t *)ngintr, NULL, - NETISR_MPSAFE); + netisr_register(NETISR_NETGRAPH, (netisr_t *)ngintr, NULL, 0); break; case MOD_UNLOAD: /* You can't unload it because an interface may be using it. */ diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 16e22e90f1..a9593c8ac2 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -64,7 +64,7 @@ * * @(#)if_ether.c 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/netinet/if_ether.c,v 1.64.2.23 2003/04/11 07:23:15 fjoe Exp $ - * $DragonFly: src/sys/netinet/if_ether.c,v 1.49 2008/06/09 11:24:24 sephe Exp $ + * $DragonFly: src/sys/netinet/if_ether.c,v 1.50 2008/09/23 11:28:49 sephe Exp $ */ /* @@ -987,7 +987,7 @@ arp_init(void) for (cpu = 0; cpu < ncpus2; cpu++) LIST_INIT(&llinfo_arp_list[cpu]); - netisr_register(NETISR_ARP, cpu0_portfn, arpintr); + netisr_register(NETISR_ARP, cpu0_portfn, arpintr, 0); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); diff --git a/sys/netinet/ip_demux.c b/sys/netinet/ip_demux.c index f6a68a0850..5d2b41b593 100644 --- a/sys/netinet/ip_demux.c +++ b/sys/netinet/ip_demux.c @@ -30,7 +30,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/netinet/ip_demux.c,v 1.40 2008/09/20 04:31:02 sephe Exp $ + * $DragonFly: src/sys/netinet/ip_demux.c,v 1.41 2008/09/23 11:28:49 sephe Exp $ */ #include "opt_inet.h" @@ -60,13 +60,11 @@ #include extern struct thread netisr_cpu[]; +extern int udp_mpsafe_thread; static struct thread tcp_thread[MAXCPU]; static struct thread udp_thread[MAXCPU]; -static int udp_mpsafe_thread = 0; -TUNABLE_INT("net.inet.udp.mpsafe_thread", &udp_mpsafe_thread); - static __inline int INP_MPORT_HASH(in_addr_t faddr, in_addr_t laddr, in_port_t fport, in_port_t lport) @@ -377,7 +375,7 @@ tcp_thread_init(void) for (cpu = 0; cpu < ncpus2; cpu++) { lwkt_create(tcpmsg_service_loop, NULL, NULL, - &tcp_thread[cpu], TDF_NETWORK, cpu, + &tcp_thread[cpu], TDF_NETWORK | TDF_MPSAFE, cpu, "tcp_thread %d", cpu); netmsg_service_port_init(&tcp_thread[cpu].td_msgport); } @@ -389,9 +387,8 @@ udp_thread_init(void) int cpu; for (cpu = 0; cpu < ncpus2; cpu++) { - lwkt_create(udp_mpsafe_thread ? - netmsg_service_loop_mpsafe : netmsg_service_loop, - NULL, NULL, &udp_thread[cpu], TDF_NETWORK, cpu, + lwkt_create(netmsg_service_loop, &udp_mpsafe_thread, NULL, + &udp_thread[cpu], TDF_NETWORK | TDF_MPSAFE, cpu, "udp_thread %d", cpu); netmsg_service_port_init(&udp_thread[cpu].td_msgport); } diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 42eee23c66..c43a94930b 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -65,7 +65,7 @@ * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 * $FreeBSD: src/sys/netinet/ip_input.c,v 1.130.2.52 2003/03/07 07:01:28 silby Exp $ - * $DragonFly: src/sys/netinet/ip_input.c,v 1.107 2008/09/18 11:19:42 sephe Exp $ + * $DragonFly: src/sys/netinet/ip_input.c,v 1.108 2008/09/23 11:28:49 sephe Exp $ */ #define _IP_VHL @@ -203,6 +203,9 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, static int ipprintfs = 0; #endif +extern int udp_mpsafe_proto; +extern int tcp_mpsafe_proto; + extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; @@ -325,10 +328,25 @@ ip_init(void) for (i = 0; i < IPPROTO_MAX; i++) ip_protox[i] = pr - inetsw; for (pr = inetdomain.dom_protosw; - pr < inetdomain.dom_protoswNPROTOSW; pr++) - if (pr->pr_domain->dom_family == PF_INET && - pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) - ip_protox[pr->pr_protocol] = pr - inetsw; + pr < inetdomain.dom_protoswNPROTOSW; pr++) { + if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol) { + if (pr->pr_protocol != IPPROTO_RAW) + ip_protox[pr->pr_protocol] = pr - inetsw; + + /* XXX */ + switch (pr->pr_protocol) { + case IPPROTO_TCP: + if (tcp_mpsafe_proto) + pr->pr_flags |= PR_MPSAFE; + break; + + case IPPROTO_UDP: + if (udp_mpsafe_proto) + pr->pr_flags |= PR_MPSAFE; + break; + } + } + } inet_pfil_hook.ph_type = PFIL_TYPE_AF; inet_pfil_hook.ph_af = AF_INET; @@ -357,7 +375,7 @@ ip_init(void) bzero(&ipstat, sizeof(struct ip_stats)); #endif - netisr_register(NETISR_IP, ip_mport_in, ip_input_handler); + netisr_register(NETISR_IP, ip_mport_in, ip_input_handler, 0); } /* @@ -371,10 +389,14 @@ struct route ipforward_rt[MAXCPU]; static void transport_processing_oncpu(struct mbuf *m, int hlen, struct ip *ip) { + const struct protosw *pr = &inetsw[ip_protox[ip->ip_p]]; + /* * Switch out to protocol's input routine. */ - (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen, ip->ip_p); + PR_GET_MPLOCK(pr); + pr->pr_input(m, hlen, ip->ip_p); + PR_REL_MPLOCK(pr); } static void @@ -898,7 +920,7 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ return; pmsg = &m->m_hdr.mh_netmsg; - netmsg_init(&pmsg->nm_netmsg, &netisr_apanic_rport, 0, + netmsg_init(&pmsg->nm_netmsg, &netisr_apanic_rport, MSGF_MPSAFE, transport_processing_handler); pmsg->nm_packet = m; pmsg->nm_netmsg.nm_lmsg.u.ms_result = hlen; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index a178b8babf..fc3e52cda4 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -65,7 +65,7 @@ * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.73.2.31 2003/01/24 05:11:34 sam Exp $ - * $DragonFly: src/sys/netinet/tcp_subr.c,v 1.60 2008/08/15 21:37:16 nth Exp $ + * $DragonFly: src/sys/netinet/tcp_subr.c,v 1.61 2008/09/23 11:28:49 sephe Exp $ */ #include "opt_compat.h" @@ -157,6 +157,15 @@ KTR_INFO(KTR_TCP, tcp, delayed, 2, "tcp execute delayed ops", 0); struct inpcbinfo tcbinfo[MAXCPU]; struct tcpcbackqhead tcpcbackq[MAXCPU]; +int tcp_mpsafe_proto = 0; +TUNABLE_INT("net.inet.tcp.mpsafe_proto", &tcp_mpsafe_proto); + +static int tcp_mpsafe_thread = 0; +TUNABLE_INT("net.inet.tcp.mpsafe_thread", &tcp_mpsafe_thread); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, mpsafe_thread, CTLFLAG_RW, + &tcp_mpsafe_thread, 0, + "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); + int tcp_mssdflt = TCP_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, &tcp_mssdflt, 0, "Default TCP Maximum Segment Size"); @@ -229,7 +238,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW, static MALLOC_DEFINE(M_TCPTEMP, "tcptemp", "TCP Templates for Keepalives"); static struct malloc_pipe tcptemp_mpipe; -static void tcp_willblock(void); +static void tcp_willblock(int); static void tcp_cleartaocache (void); static void tcp_notify (struct inpcb *, int); @@ -377,23 +386,41 @@ void tcpmsg_service_loop(void *dummy) { struct netmsg *msg; + int mplocked; + + /* + * Thread was started with TDF_MPSAFE + */ + mplocked = 0; while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { do { logtcp(rxmsg); - msg->nm_dispatch(msg); + mplocked = netmsg_service(msg, tcp_mpsafe_thread, + mplocked); } while ((msg = lwkt_getport(&curthread->td_msgport)) != NULL); + logtcp(delayed); - tcp_willblock(); + tcp_willblock(mplocked); logtcp(wait); } } static void -tcp_willblock(void) +tcp_willblock(int mplocked) { struct tcpcb *tp; int cpu = mycpu->gd_cpuid; + int unlock = 0; + + if (!mplocked && !tcp_mpsafe_proto) { + if (TAILQ_EMPTY(&tcpcbackq[cpu])) + return; + + get_mplock(); + mplocked = 1; + unlock = 1; + } while ((tp = TAILQ_FIRST(&tcpcbackq[cpu])) != NULL) { KKASSERT(tp->t_flags & TF_ONOUTPUTQ); @@ -401,6 +428,9 @@ tcp_willblock(void) TAILQ_REMOVE(&tcpcbackq[cpu], tp, t_outputq); tcp_output(tp); } + + if (unlock) + rel_mplock(); } diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index c9c777578b..907a38521a 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -65,7 +65,7 @@ * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ - * $DragonFly: src/sys/netinet/udp_usrreq.c,v 1.45 2008/09/12 11:37:41 sephe Exp $ + * $DragonFly: src/sys/netinet/udp_usrreq.c,v 1.46 2008/09/23 11:28:49 sephe Exp $ */ #include "opt_ipsec.h" @@ -118,6 +118,15 @@ #include #endif +int udp_mpsafe_proto = 0; +TUNABLE_INT("net.inet.udp.mpsafe_proto", &udp_mpsafe_proto); + +int udp_mpsafe_thread = 0; +TUNABLE_INT("net.inet.udp.mpsafe_thread", &udp_mpsafe_thread); +SYSCTL_INT(_net_inet_udp, OID_AUTO, mpsafe_thread, CTLFLAG_RW, + &udp_mpsafe_thread, 0, + "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); + /* * UDP protocol implementation. * Per RFC 768, August, 1980. diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index a89dc38d06..4b84b8679b 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -1,5 +1,5 @@ /* $FreeBSD: src/sys/netinet6/ip6_input.c,v 1.11.2.15 2003/01/24 05:11:35 sam Exp $ */ -/* $DragonFly: src/sys/netinet6/ip6_input.c,v 1.36 2008/09/04 09:08:22 hasso Exp $ */ +/* $DragonFly: src/sys/netinet6/ip6_input.c,v 1.37 2008/09/23 11:28:50 sephe Exp $ */ /* $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ */ /* @@ -197,7 +197,7 @@ ip6_init(void) "error %d\n", __func__, i); } - netisr_register(NETISR_IPV6, cpu0_portfn, ip6_input); + netisr_register(NETISR_IPV6, cpu0_portfn, ip6_input, 0); scope6_init(); nd6_init(); frag6_init(); diff --git a/sys/netproto/atalk/ddp_usrreq.c b/sys/netproto/atalk/ddp_usrreq.c index 62bee57c41..c78219c92f 100644 --- a/sys/netproto/atalk/ddp_usrreq.c +++ b/sys/netproto/atalk/ddp_usrreq.c @@ -2,7 +2,7 @@ * Copyright (c) 1990,1994 Regents of The University of Michigan. * All Rights Reserved. See COPYRIGHT. * - * $DragonFly: src/sys/netproto/atalk/ddp_usrreq.c,v 1.12 2008/01/05 14:02:40 swildner Exp $ + * $DragonFly: src/sys/netproto/atalk/ddp_usrreq.c,v 1.13 2008/09/23 11:28:50 sephe Exp $ */ #include @@ -540,9 +540,9 @@ at_setsockaddr(struct socket *so, struct sockaddr **nam) void ddp_init(void) { - netisr_register(NETISR_ATALK1, cpu0_portfn, at1intr); - netisr_register(NETISR_ATALK2, cpu0_portfn, at2intr); - netisr_register(NETISR_AARP, cpu0_portfn, aarpintr); + netisr_register(NETISR_ATALK1, cpu0_portfn, at1intr, 0); + netisr_register(NETISR_ATALK2, cpu0_portfn, at2intr, 0); + netisr_register(NETISR_AARP, cpu0_portfn, aarpintr, 0); } #if 0 diff --git a/sys/netproto/atm/atm_subr.c b/sys/netproto/atm/atm_subr.c index 8be763b951..da2c07e653 100644 --- a/sys/netproto/atm/atm_subr.c +++ b/sys/netproto/atm/atm_subr.c @@ -24,7 +24,7 @@ * notice must be reproduced on all copies. * * @(#) $FreeBSD: src/sys/netatm/atm_subr.c,v 1.7 2000/02/13 03:31:59 peter Exp $ - * @(#) $DragonFly: src/sys/netproto/atm/atm_subr.c,v 1.21 2007/05/23 08:57:07 dillon Exp $ + * @(#) $DragonFly: src/sys/netproto/atm/atm_subr.c,v 1.22 2008/09/23 11:28:50 sephe Exp $ */ /* @@ -112,7 +112,7 @@ atm_initialize(void) atm_init = 1; atm_intrq.ifq_maxlen = ATM_INTRQ_MAX; - netisr_register(NETISR_ATM, cpu0_portfn, atm_intr); + netisr_register(NETISR_ATM, cpu0_portfn, atm_intr, 0); /* * Initialize subsystems diff --git a/sys/netproto/ipx/ipx_input.c b/sys/netproto/ipx/ipx_input.c index a8699ce725..b2d67d8baa 100644 --- a/sys/netproto/ipx/ipx_input.c +++ b/sys/netproto/ipx/ipx_input.c @@ -34,7 +34,7 @@ * @(#)ipx_input.c * * $FreeBSD: src/sys/netipx/ipx_input.c,v 1.22.2.2 2001/02/22 09:44:18 bp Exp $ - * $DragonFly: src/sys/netproto/ipx/ipx_input.c,v 1.18 2008/03/07 11:34:21 sephe Exp $ + * $DragonFly: src/sys/netproto/ipx/ipx_input.c,v 1.19 2008/09/23 11:28:50 sephe Exp $ */ #include @@ -117,7 +117,7 @@ ipx_init(void) ipx_hostmask.sipx_addr.x_net = ipx_broadnet; ipx_hostmask.sipx_addr.x_host = ipx_broadhost; - netisr_register(NETISR_IPX, cpu0_portfn, ipxintr); + netisr_register(NETISR_IPX, cpu0_portfn, ipxintr, 0); } /* diff --git a/sys/netproto/mpls/mpls_input.c b/sys/netproto/mpls/mpls_input.c index d5a1e041a1..ad85cb6041 100644 --- a/sys/netproto/mpls/mpls_input.c +++ b/sys/netproto/mpls/mpls_input.c @@ -28,7 +28,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/netproto/mpls/mpls_input.c,v 1.2 2008/08/05 15:11:32 nant Exp $ + * $DragonFly: src/sys/netproto/mpls/mpls_input.c,v 1.3 2008/09/23 11:28:50 sephe Exp $ */ #include @@ -76,7 +76,7 @@ mpls_init(void) bzero(&mplsstat, sizeof(struct mpls_stats)); #endif - netisr_register(NETISR_MPLS, mpls_mport, mpls_input_handler); + netisr_register(NETISR_MPLS, mpls_mport, mpls_input_handler, 0); } static void diff --git a/sys/netproto/natm/natm.c b/sys/netproto/natm/natm.c index e25dcff654..bf267d3468 100644 --- a/sys/netproto/natm/natm.c +++ b/sys/netproto/natm/natm.c @@ -1,6 +1,6 @@ /* $NetBSD: natm.c,v 1.5 1996/11/09 03:26:26 chuck Exp $ */ /* $FreeBSD: src/sys/netnatm/natm.c,v 1.12 2000/02/13 03:32:03 peter Exp $ */ -/* $DragonFly: src/sys/netproto/natm/natm.c,v 1.29 2008/05/14 11:59:24 sephe Exp $ */ +/* $DragonFly: src/sys/netproto/natm/natm.c,v 1.30 2008/09/23 11:28:50 sephe Exp $ */ /* * @@ -748,7 +748,7 @@ static void natmintr(struct netmsg *); static void netisr_natm_setup(void *dummy __unused) { - netisr_register(NETISR_NATM, cpu0_portfn, natmintr); + netisr_register(NETISR_NATM, cpu0_portfn, natmintr, 0); } SYSINIT(natm_setup, SI_BOOT2_KLD, SI_ORDER_ANY, netisr_natm_setup, NULL); #endif @@ -758,7 +758,7 @@ natm_init(void) { LIST_INIT(&natm_pcbs); - netisr_register(NETISR_NATM, cpu0_portfn, natmintr); + netisr_register(NETISR_NATM, cpu0_portfn, natmintr, 0); } /* diff --git a/sys/netproto/ns/ns_input.c b/sys/netproto/ns/ns_input.c index e2fb775662..4f20200028 100644 --- a/sys/netproto/ns/ns_input.c +++ b/sys/netproto/ns/ns_input.c @@ -32,7 +32,7 @@ * * @(#)ns_input.c 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/netns/ns_input.c,v 1.13 2000/02/13 03:32:04 peter Exp $ - * $DragonFly: src/sys/netproto/ns/ns_input.c,v 1.21 2008/03/07 11:34:21 sephe Exp $ + * $DragonFly: src/sys/netproto/ns/ns_input.c,v 1.22 2008/09/23 11:28:50 sephe Exp $ */ #include @@ -96,7 +96,7 @@ ns_init(void) ns_hostmask.sns_len = 12; ns_hostmask.sns_addr.x_net = ns_broadnet; ns_hostmask.sns_addr.x_host = ns_broadhost; - netisr_register(NETISR_NS, cpu0_portfn, nsintr); + netisr_register(NETISR_NS, cpu0_portfn, nsintr, 0); } /* diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 750a8486bf..59e5426b51 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -32,7 +32,7 @@ * * @(#)protosw.h 8.1 (Berkeley) 6/2/93 * $FreeBSD: src/sys/sys/protosw.h,v 1.28.2.2 2001/07/03 11:02:01 ume Exp $ - * $DragonFly: src/sys/sys/protosw.h,v 1.22 2008/06/17 20:50:11 aggelos Exp $ + * $DragonFly: src/sys/sys/protosw.h,v 1.23 2008/09/23 11:28:50 sephe Exp $ */ #ifndef _SYS_PROTOSW_H_ @@ -127,6 +127,7 @@ struct protosw { #define PR_IMPLOPCL 0x20 /* implied open/close */ #define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ #define PR_ADDR_OPT 0x80 /* allow addresses during delivery */ +#define PR_MPSAFE 0x0100 /* protocal is MPSAFE */ /* * The arguments to usrreq are: @@ -391,6 +392,18 @@ void kpfctlinput2 (int, struct sockaddr *, void *); struct protosw *pffindproto (int family, int protocol, int type); struct protosw *pffindtype (int family, int type); +#define PR_GET_MPLOCK(_pr) \ +do { \ + if (((_pr)->pr_flags & PR_MPSAFE) == 0) \ + get_mplock(); \ +} while (0) + +#define PR_REL_MPLOCK(_pr) \ +do { \ + if (((_pr)->pr_flags & PR_MPSAFE) == 0) \ + rel_mplock(); \ +} while (0) + #endif /* _KERNEL */ #endif /* _SYS_PROTOSW_H_ */