From: Sepherosa Ziehau Date: Sun, 23 Dec 2012 12:31:32 +0000 (+0800) Subject: ifq/staging: Initial implementation of IFQ packet staging mechanism X-Git-Tag: v3.4.0rc~621 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/28cc0c295c957f68a6e8afbac62d0e50d56ccb25 ifq/staging: Initial implementation of IFQ packet staging mechanism The packets enqueued into IFQ are staged to a certain amount before the ifnet's if_start is called. In this way, the driver could avoid writing to hardware registers upon every packet, instead, hardware registers could be written when certain amount of packets are put onto hardware TX ring. The measurement on several modern NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware registers writing aggregation could save ~20% CPU time when 18bytes UDP datagrams are transmitted at 1.48Mpps. IFQ packets staging is performed for direct ifnet's if_start calling, i.e. ifq_try_ifstart() IFQ packets staging will be stopped upon any of the following conditions: - If the count of packets enqueued on the current CPU is great than or equal to ifq_stage_cntmax. - If the total length of packets enqueued on the current CPU is great than or equal to the hardware's MTU - max_protohdr. max_protohdr is cut from the hardware's MTU mainly bacause a full TCP segment's size is usually less than hardware's MTU. - if_start interlock (if_snd.altq_started) is not released. - The if_start_rollup(), which is registered as low priority netisr rollup function, is called; probably because no more work is pending for netisr. Currently IFQ packet staging is only performed in netisr threads. Inspired-by: Luigi Rizzo's netmap paper (http://info.iet.unipi.it/~luigi/netmap/) Also-Suggested-by: dillon@ --- diff --git a/sys/net/altq/if_altq.h b/sys/net/altq/if_altq.h index 94b189d505..d4bb667517 100644 --- a/sys/net/altq/if_altq.h +++ b/sys/net/altq/if_altq.h @@ -34,6 +34,18 @@ struct altq_pktattr; +struct ifaltq; + +struct ifaltq_stage { + struct ifaltq *ifqs_altq; + int ifqs_cnt; + int ifqs_len; + uint32_t ifqs_flags; + TAILQ_ENTRY(ifaltq_stage) ifqs_link; +} __cachealign; + +#define IFQ_STAGE_FLAG_QUED 0x1 + /* * Structure defining a queue for a network interface. */ @@ -67,6 +79,7 @@ struct ifaltq { struct lwkt_serialize altq_lock; struct mbuf *altq_prepended; /* mbuf dequeued, but not yet xmit */ int altq_started; /* ifnet.if_start interlock */ + struct ifaltq_stage *altq_stage; }; #define ALTQ_ASSERT_LOCKED(ifq) ASSERT_SERIALIZED(&(ifq)->altq_lock) diff --git a/sys/net/if.c b/sys/net/if.c index caf5872f07..09d901076a 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -103,6 +103,10 @@ struct netmsg_ifaddr { int tail; }; +struct ifaltq_stage_head { + TAILQ_HEAD(, ifaltq_stage) ifqs_head; +} __cachealign; + /* * System initialization */ @@ -126,6 +130,17 @@ extern void nd6_setmtu(struct ifnet *); SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); +static u_long if_staged; +SYSCTL_ULONG(_net_link, OID_AUTO, staged, CTLFLAG_RW, &if_staged, 0, ""); + +static u_long if_staged_start; +SYSCTL_ULONG(_net_link, OID_AUTO, staged_start, CTLFLAG_RW, + &if_staged_start, 0, ""); + +static int ifq_stage_cntmax = 4; +SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, + &ifq_stage_cntmax, 0, "ifq staging packet count max"); + SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL) /* Must be after netisr_init */ SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL) @@ -146,6 +161,8 @@ int if_index = 0; struct ifnet **ifindex2ifnet = NULL; static struct thread ifnet_threads[MAXCPU]; +static struct ifaltq_stage_head ifq_stage_heads[MAXCPU]; + #define IFQ_KTR_STRING "ifq=%p" #define IFQ_KTR_ARGS struct ifaltq *ifq #ifndef KTR_IFQ @@ -557,6 +574,12 @@ if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) ALTQ_LOCK_INIT(ifq); ifq_set_classic(ifq); + ifq->altq_stage = + kmalloc_cachealign(ncpus * sizeof(struct ifaltq_stage), + M_DEVBUF, M_WAITOK | M_ZERO); + for (i = 0; i < ncpus; ++i) + ifq->altq_stage[i].ifqs_altq = ifq; + if (!SLIST_EMPTY(&domains)) if_attachdomain1(ifp); @@ -2385,11 +2408,75 @@ ifq_classic_request(struct ifaltq *ifq, int req, void *arg) return(0); } +static void +ifq_try_ifstart(struct ifaltq *ifq) +{ + struct ifnet *ifp = ifq->altq_ifp; + int running = 0, need_sched; + + /* + * Try to do direct ifnet.if_start first, if there is + * contention on ifnet's serializer, ifnet.if_start will + * be scheduled on ifnet's CPU. + */ + if (!ifnet_tryserialize_tx(ifp)) { + /* + * ifnet serializer contention happened, + * ifnet.if_start is scheduled on ifnet's + * CPU, and we keep going. + */ + logifstart(contend_sched, ifp); + if_start_schedule(ifp); + return; + } + + if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING) { + logifstart(run, ifp); + ifp->if_start(ifp); + if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == + IFF_RUNNING) + running = 1; + } + need_sched = if_start_need_schedule(ifq, running); + + ifnet_deserialize_tx(ifp); + + if (need_sched) { + /* + * More data need to be transmitted, ifnet.if_start is + * scheduled on ifnet's CPU, and we keep going. + * NOTE: ifnet.if_start interlock is not released. + */ + logifstart(sched, ifp); + if_start_schedule(ifp); + } +} + +static __inline void +ifq_stage_remove(struct ifaltq_stage_head *head, struct ifaltq_stage *stage) +{ + KKASSERT(stage->ifqs_flags & IFQ_STAGE_FLAG_QUED); + TAILQ_REMOVE(&head->ifqs_head, stage, ifqs_link); + stage->ifqs_flags &= ~IFQ_STAGE_FLAG_QUED; + stage->ifqs_cnt = 0; + stage->ifqs_len = 0; +} + +static __inline void +ifq_stage_insert(struct ifaltq_stage_head *head, struct ifaltq_stage *stage) +{ + KKASSERT((stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) == 0); + stage->ifqs_flags |= IFQ_STAGE_FLAG_QUED; + TAILQ_INSERT_TAIL(&head->ifqs_head, stage, ifqs_link); +} + int ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) { struct ifaltq *ifq = &ifp->if_snd; - int running = 0, error, start = 0, need_sched, mcast = 0, len; + int error, start = 0, len, mcast = 0, avoid_start = 0; + struct ifaltq_stage_head *head = NULL; + struct ifaltq_stage *stage = NULL; ASSERT_IFNET_NOT_SERIALIZED_TX(ifp); @@ -2397,6 +2484,19 @@ ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) if (m->m_flags & M_MCAST) mcast = 1; + if (curthread->td_type == TD_TYPE_NETISR) { + int cpuid = mycpuid; + + head = &ifq_stage_heads[cpuid]; + stage = &ifq->altq_stage[cpuid]; + + stage->ifqs_cnt++; + stage->ifqs_len += len; + if (stage->ifqs_cnt < ifq_stage_cntmax && + stage->ifqs_len < (ifp->if_mtu - max_protohdr)) + avoid_start = 1; + } + ALTQ_LOCK(ifq); error = ifq_enqueue_locked(ifq, m, pa); if (error) { @@ -2404,8 +2504,24 @@ ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) ALTQ_UNLOCK(ifq); return error; } + avoid_start = 0; } if (!ifq->altq_started) { + if (avoid_start) { + ALTQ_UNLOCK(ifq); + + KKASSERT(!error); + if ((stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) == 0) + ifq_stage_insert(head, stage); + + ifp->if_obytes += len; + if (mcast) + ifp->if_omcasts++; + + /* atomic_add_long(&if_staged, 1); */ + return error; + } + /* * Hold the interlock of ifnet.if_start */ @@ -2420,47 +2536,21 @@ ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) ifp->if_omcasts++; } - if (!start) { - logifstart(avoid, ifp); - return error; + if (stage != NULL) { + if (stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) { + ifq_stage_remove(head, stage); + } else { + stage->ifqs_cnt = 0; + stage->ifqs_len = 0; + } } - /* - * Try to do direct ifnet.if_start first, if there is - * contention on ifnet's serializer, ifnet.if_start will - * be scheduled on ifnet's CPU. - */ - if (!ifnet_tryserialize_tx(ifp)) { - /* - * ifnet serializer contention happened, - * ifnet.if_start is scheduled on ifnet's - * CPU, and we keep going. - */ - logifstart(contend_sched, ifp); - if_start_schedule(ifp); + if (!start) { + logifstart(avoid, ifp); return error; } - if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING) { - logifstart(run, ifp); - ifp->if_start(ifp); - if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == - IFF_RUNNING) - running = 1; - } - need_sched = if_start_need_schedule(ifq, running); - - ifnet_deserialize_tx(ifp); - - if (need_sched) { - /* - * More data need to be transmitted, ifnet.if_start is - * scheduled on ifnet's CPU, and we keep going. - * NOTE: ifnet.if_start interlock is not released. - */ - logifstart(sched, ifp); - if_start_schedule(ifp); - } + ifq_try_ifstart(ifq); return error; } @@ -2664,13 +2754,33 @@ ifnet_service_loop(void *arg __unused) } } -#ifdef notyet static void if_start_rollup(void) { - /* TODO */ + struct ifaltq_stage_head *head = &ifq_stage_heads[mycpuid]; + struct ifaltq_stage *stage; + + while ((stage = TAILQ_FIRST(&head->ifqs_head)) != NULL) { + struct ifaltq *ifq = stage->ifqs_altq; + int start = 0; + + ifq_stage_remove(head, stage); + + ALTQ_LOCK(ifq); + if (!ifq->altq_started) { + /* + * Hold the interlock of ifnet.if_start + */ + ifq->altq_started = 1; + start = 1; + } + ALTQ_UNLOCK(ifq); + + if (start) + ifq_try_ifstart(ifq); + /* atomic_add_long(&if_staged_start, 1); */ + } } -#endif static void ifnetinit(void *dummy __unused) @@ -2686,9 +2796,10 @@ ifnetinit(void *dummy __unused) netmsg_service_port_init(&thr->td_msgport); lwkt_schedule(thr); } -#ifdef notyet + + for (i = 0; i < ncpus; ++i) + TAILQ_INIT(&ifq_stage_heads[i].ifqs_head); netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); -#endif } struct ifnet *