From: Matthew Dillon Date: Thu, 21 Feb 2013 23:33:26 +0000 (-0800) Subject: kernel - Fix issue with ARP packets stalling out entire network X-Git-Tag: v3.4.0rc~247 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/b069f9cd471adb5001fcf32854e273cb92448641 kernel - Fix issue with ARP packets stalling out entire network * ARP packets can cause ARP routing table updates to occur. An ARP routing table update is an expensive synchronous netmsg that is forwarded through *ALL* cpus. * ARP was previously being handled by netisr 0 and on large multi-way machines (aka monster the 48-way opteron) under very heavy loads this could result in very long stalls for any packet processing forwarded to cpu 0. Stalls exceeding 200 seconds were observed on monster when a large number of ARP packets had to be processed. * Implement a dedicated thread feature for the NETISR mechanism and modify NETISR_ARP to use it. This takes the expensive synchronous ARP packet processing off the general per-cpu netisr threads. This thread currently runs on cpu (18 % ncpus) (NETISR_ARP == 18). Thus the general per-cpu (netisr 0) thread will no longer stall on ARP packets. * ping latencies under extreme loads improved to (approximately): ping -i 0.001 monster-nr 11735 packets transmitted, 11735 packets received, 0.0% packet loss round-trip min/avg/max/stddev = 0.073/0.190/27.019/0.382 ms --- diff --git a/sys/net/netisr.c b/sys/net/netisr.c index 0d9a9bfe00..202737f673 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -92,6 +92,7 @@ static TAILQ_HEAD(,netmsg_rollup) netrulist; /* Per-CPU thread to handle any protocol. */ static struct thread netisr_cpu[MAXCPU]; +static struct thread netisr_ded[NETISR_MAX]; lwkt_port netisr_afree_rport; lwkt_port netisr_afree_free_so_rport; lwkt_port netisr_adone_rport; @@ -471,6 +472,18 @@ netisr_characterize(int num, struct mbuf **mp, int hoff) } } +void +netisr_init_dedicated(int num) +{ + KKASSERT(num > 0 && num < NETISR_MAX); + KKASSERT(netisr_ded[num].td_pri == 0); + lwkt_create(netmsg_service_loop, NULL, NULL, + &netisr_ded[num], TDF_NOSTART|TDF_FORCE_SPINPORT, + num % ncpus, "netisr_ded %d", num); + netmsg_service_port_init(&netisr_ded[num].td_msgport); + lwkt_schedule(&netisr_ded[num]); +} + void netisr_register(int num, netisr_fn_t handler, netisr_cpufn_t cpufn) { @@ -527,12 +540,25 @@ netisr_register_rollup(netisr_ru_t ru_func, int prio) /* * Return the message port for the general protocol message servicing * thread for a particular cpu. + * + * A standard cpu value returns the general lockless/asynchronous + * netisr thread for the cpu specified. + * + * A dedicated cpu value specifies a thread dedicated to a particular + * ISR. Such threads can potentially stall or block for long periods + * of time (see arp_init() for an example). */ lwkt_port_t netisr_portfn(int cpu) { - KKASSERT(cpu >= 0 && cpu < ncpus); - return (&netisr_cpu[cpu].td_msgport); + if (__predict_false(cpu & NETISR_DEDICATED)) { + cpu &= (NETISR_DEDICATED - 1); + KKASSERT(cpu < NETISR_MAX && netisr_ded[cpu].td_pri != 0); + return (&netisr_ded[cpu].td_msgport); + } else { + KKASSERT((uint32_t)cpu < ncpus); + return (&netisr_cpu[cpu].td_msgport); + } } /* diff --git a/sys/net/netisr.h b/sys/net/netisr.h index 530d9b8889..c1f070802d 100644 --- a/sys/net/netisr.h +++ b/sys/net/netisr.h @@ -99,6 +99,7 @@ #define NETISR_BLUETOOTH 31 #define NETISR_MAX 32 +#define NETISR_DEDICATED 0x1000 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) @@ -168,6 +169,7 @@ extern lwkt_port netisr_sync_port; lwkt_port_t netisr_portfn(int cpu); lwkt_port_t cur_netport(void); +void netisr_init_dedicated(int); void netisr_register(int, netisr_fn_t, netisr_cpufn_t); void netisr_register_hashcheck(int, netisr_hashck_t); void netisr_register_rollup(netisr_ru_t ru_func, int ru_prio); diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 03c0011a5f..0441ce0380 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -1258,6 +1258,20 @@ arp_ifaddr(void *arg __unused, struct ifnet *ifp, } } +/* + * The ARP handler uses a dedicated thread because arp updates must + * update the routing table on every cpu via a synchronous forwarded + * message, and this can be expensive. + */ +static void +cpuarg_cpufn(struct mbuf **mp, int hoff __unused) +{ + struct mbuf *m = *mp; + + m->m_flags |= M_HASH; + m->m_pkthdr.hash = NETISR_ARP | NETISR_DEDICATED; +} + static void arp_init(void) { @@ -1266,7 +1280,8 @@ arp_init(void) for (cpu = 0; cpu < ncpus2; cpu++) LIST_INIT(&llinfo_arp_list[cpu]); - netisr_register(NETISR_ARP, arpintr, NULL); + netisr_init_dedicated(NETISR_ARP); + netisr_register(NETISR_ARP, arpintr, cpuarg_cpufn); EVENTHANDLER_REGISTER(ifaddr_event, arp_ifaddr, NULL, EVENTHANDLER_PRI_LAST);