From: Sepherosa Ziehau Date: Wed, 16 Jan 2013 11:02:36 +0000 (+0800) Subject: tcp/tso: Add per-device TSO aggregation size limit X-Git-Tag: v3.4.0rc~462 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/e41e61d527704e7aa16af8e67ac60563ee8e4962?ds=sidebyside tcp/tso: Add per-device TSO aggregation size limit - Prevent possible TSO large burst, when it is inappropriate (plenty of >24 segements bursts were observered, even when 32 parallel sending TCP streams are running on the same GigE NIC). TSO large burst has following drawbacks on a single TX queue, even on the devices that are multiple TX queues capable: o Delay other senders' packet transmission quite a lot. o Has negative effect on TCP receivers, which sends ACKs. o Cause buffer bloat in software sending queues, whose upper limit is based on "packet count". o Packet scheduler's decision could be less effective. On the other hand, TSO large burst could improve CPU usage. - Improve fairness between multiple TX queues on the devices that are multiple TX queues capable but only fetch data on TSO large packet boundary instead of TCP segment boundary. Drivers could supply their own TSO aggregation size limit. If driver does not set it, the default value is 6000 (4 segments if MTU is 1500). The default value increases CPU usage a little bit: on i7-2600 w/ HT enabled, single TCP sending stream, CPU usage increases from 14%~17% to 17%~20%. User could configure TSO aggregation size limit by using ifconfig(8): ifconfig ifaceX tsolen _n_ --- diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index b545fe92ba..7020f2ebd7 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -408,6 +408,13 @@ The MTU is used to limit the size of packets that are transmitted on an interface. Not all interfaces support setting the MTU, and some interfaces have range restrictions. +.It Cm tsolen Ar n +Set the maximum amount of data +that TCP segmentation offloading is allowed to aggregate to +.Ar n , +the default value is interface specific. +This setting only takes effect on interfaces +that support TCP segmentation offloading. .It Cm netmask Ar mask .\" (Inet and ISO.) (Inet only.) @@ -2243,8 +2250,10 @@ If the .Fl m flag is passed before an interface name, .Nm -will display the capability list and all -of the supported media for the specified interface. +will display the capability list, +the maximum amount of data +that TCP segmentation offloading is allowed to aggregate and +all of the supported media for the specified interface. If .Fl L flag is supplied, address lifetime is displayed for IPv6 addresses, diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index c551348961..550ec741f7 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -816,6 +816,16 @@ setifmtu(const char *val, int dummy __unused, int s, warn("ioctl (set mtu)"); } +static void +setiftsolen(const char *val, int dummy __unused, int s, + const struct afswtch *afp) +{ + strncpy(ifr.ifr_name, name, sizeof (ifr.ifr_name)); + ifr.ifr_tsolen = atoi(val); + if (ioctl(s, SIOCSIFTSOLEN, (caddr_t)&ifr) < 0) + warn("ioctl (set tsolen)"); +} + static void setifname(const char *val, int dummy __unused, int s, const struct afswtch *afp) @@ -920,6 +930,13 @@ status(const struct afswtch *afp, int addrcount, struct sockaddr_dl *sdl, if (supmedia && ifr.ifr_reqcap != 0) { printb("\tcapabilities", ifr.ifr_reqcap, IFCAPBITS); putchar('\n'); + if (ifr.ifr_reqcap & IFCAP_TSO) { + if (ioctl(s, SIOCGIFTSOLEN, + (caddr_t)&ifr) == 0) { + printf("\ttsolen %d", ifr.ifr_tsolen); + putchar('\n'); + } + } } } @@ -1121,7 +1138,8 @@ static struct cmd basic_cmds[] = { DEF_CMD("noicmp", IFF_LINK1, setifflags), DEF_CMD_ARG("mtu", setifmtu), DEF_CMD_ARG("name", setifname), - DEF_CMD_ARG("pollcpu", setifpollcpu) + DEF_CMD_ARG("pollcpu", setifpollcpu), + DEF_CMD_ARG("tsolen", setiftsolen) }; static __constructor(101) void diff --git a/sys/net/if.c b/sys/net/if.c index a74e1587fa..1cf740dc23 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1568,6 +1568,10 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) ifr->ifr_mtu = ifp->if_mtu; break; + case SIOCGIFTSOLEN: + ifr->ifr_tsolen = ifp->if_tsolen; + break; + case SIOCGIFDATA: error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, sizeof(ifp->if_data)); @@ -1748,6 +1752,19 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) break; } + case SIOCSIFTSOLEN: + error = priv_check_cred(cred, PRIV_ROOT, 0); + if (error) + break; + + /* XXX need driver supplied upper limit */ + if (ifr->ifr_tsolen <= 0) { + error = EINVAL; + break; + } + ifp->if_tsolen = ifr->ifr_tsolen; + break; + case SIOCADDMULTI: case SIOCDELMULTI: error = priv_check_cred(cred, PRIV_ROOT, 0); diff --git a/sys/net/if.h b/sys/net/if.h index 605d859966..2864a68086 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -257,6 +257,7 @@ struct ifreq { void *ifru_data; int ifru_cap[2]; int ifru_pollcpu; + int ifru_tsolen; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -272,6 +273,7 @@ struct ifreq { #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_pollcpu ifr_ifru.ifru_pollcpu /* polling(4) cpu */ +#define ifr_tsolen ifr_ifru.ifru_tsolen /* max TSO length */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index bf46ab7f1e..bd6cf5df42 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -155,6 +155,11 @@ static u_long ether_input_wronghwhash; static int ether_input_ckhash; #endif +#define ETHER_TSOLEN_DEFAULT (4 * ETHERMTU) + +static int ether_tsolen_default = ETHER_TSOLEN_DEFAULT; +TUNABLE_INT("net.link.ether.tsolen", ðer_tsolen_default); + SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); SYSCTL_INT(_net_link_ether, OID_AUTO, debug, CTLFLAG_RW, @@ -168,6 +173,9 @@ SYSCTL_ULONG(_net_link_ether, OID_AUTO, prepend_hdr, CTLFLAG_RW, "# of ether header restoration which prepends mbuf"); SYSCTL_ULONG(_net_link_ether, OID_AUTO, input_wronghash, CTLFLAG_RW, ðer_input_wronghash, 0, "# of input packets with wrong hash"); +SYSCTL_INT(_net_link_ether, OID_AUTO, tsolen, CTLFLAG_RW, + ðer_tsolen_default, 0, "Default max TSO length"); + #ifdef RSS_DEBUG SYSCTL_ULONG(_net_link_ether, OID_AUTO, rss_nopi, CTLFLAG_RW, ðer_rss_nopi, 0, "# of packets do not have pktinfo"); @@ -580,6 +588,14 @@ ether_ifattach_bpf(struct ifnet *ifp, uint8_t *lla, u_int dlt, u_int hdrlen, ifp->if_hdrlen = ETHER_HDR_LEN; if_attach(ifp, serializer); ifp->if_mtu = ETHERMTU; + if (ifp->if_tsolen <= 0) { + if ((ether_tsolen_default / ETHERMTU) < 2) { + kprintf("ether TSO maxlen %d -> %d\n", + ether_tsolen_default, ETHER_TSOLEN_DEFAULT); + ether_tsolen_default = ETHER_TSOLEN_DEFAULT; + } + ifp->if_tsolen = ether_tsolen_default; + } if (ifp->if_baudrate == 0) ifp->if_baudrate = 10000000; ifp->if_output = ether_output; diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 8613d65449..ed3f7d74a1 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -251,7 +251,7 @@ struct ifnet { /* Place holder */ void (*if_npoll_unused)(void); #endif - int if_unused3; + int if_tsolen; /* max TSO length */ struct ifaltq if_snd; /* output queue (includes altq) */ struct ifprefixhead if_prefixhead; /* list of prefixes per if */ const uint8_t *if_broadcastaddr; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 82e9741931..6b6ddca08a 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -187,7 +187,7 @@ tcp_output(struct tcpcb *tp) #endif boolean_t can_tso = FALSE, use_tso; boolean_t report_sack, idle_cwv = FALSE; - u_int segsz, tso_hlen; + u_int segsz, tso_hlen, tso_lenmax = 0; KKASSERT(so->so_port == &curthread->td_msgport); @@ -245,8 +245,10 @@ tcp_output(struct tcpcb *tp) struct rtentry *rt = inp->inp_route.ro_rt; if (rt != NULL && (rt->rt_flags & RTF_UP) && - (rt->rt_ifp->if_hwassist & CSUM_TSO)) + (rt->rt_ifp->if_hwassist & CSUM_TSO)) { can_tso = TRUE; + tso_lenmax = rt->rt_ifp->if_tsolen; + } } } #endif /* !IPSEC && !FAST_IPSEC */ @@ -492,12 +494,16 @@ again: if (!use_tso) { len = segsz; } else { + if (__predict_false(tso_lenmax < segsz)) + tso_lenmax = segsz << 1; + /* * Truncate TSO transfers to (IP_MAXPACKET - iphlen - * thoff), and make sure that we send equal size * transfers down the stack (rather than big-small- * big-small-...). */ + len = min(len, tso_lenmax); len = (min(len, (IP_MAXPACKET - tso_hlen)) / segsz) * segsz; if (len <= segsz) diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index 69c1fe812c..9e197a37c3 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -129,4 +129,7 @@ #define SIOCSIFPOLLCPU _IOW('i', 125, struct ifreq) /* set polling(4) cpu */ #define SIOCGIFPOLLCPU _IOWR('i', 126, struct ifreq) /* set polling(4) cpu */ +#define SIOCSIFTSOLEN _IOW('i', 127, struct ifreq) /* set max TSO len */ +#define SIOCGIFTSOLEN _IOWR('i', 128, struct ifreq) /* get max TSO len */ + #endif /* !_SYS_SOCKIO_H_ */