From 2b57d0132672c4602a80d22a441c4a2df81aef5d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 7 Apr 2004 17:01:27 +0000 Subject: [PATCH] Make TCP stats per-cpu. Submitted-by: Hiten Pandya --- sys/netinet/ip_demux.c | 3 +- sys/netinet/tcp_input.c | 7 +-- sys/netinet/tcp_output.c | 4 +- sys/netinet/tcp_subr.c | 44 ++++++++++++++++- sys/netinet/tcp_timer.c | 4 +- sys/netinet/tcp_var.h | 104 ++------------------------------------- usr.bin/netstat/inet.c | 52 +++++++++++++++++--- 7 files changed, 102 insertions(+), 116 deletions(-) diff --git a/sys/netinet/ip_demux.c b/sys/netinet/ip_demux.c index 6c28332621..2592a0b9b7 100644 --- a/sys/netinet/ip_demux.c +++ b/sys/netinet/ip_demux.c @@ -2,7 +2,7 @@ * Copyright (c) 2003 Jeffrey Hsu * All rights reserved. * - * $DragonFly: src/sys/netinet/ip_demux.c,v 1.14 2004/04/05 09:17:48 hsu Exp $ + * $DragonFly: src/sys/netinet/ip_demux.c,v 1.15 2004/04/07 17:01:25 dillon Exp $ */ #include "opt_inet.h" @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 0c1e2b6b97..2fa8d86ca3 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -33,7 +33,7 @@ * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.38 2003/05/21 04:46:41 cjc Exp $ - * $DragonFly: src/sys/netinet/tcp_input.c,v 1.23 2004/04/05 17:47:01 dillon Exp $ + * $DragonFly: src/sys/netinet/tcp_input.c,v 1.24 2004/04/07 17:01:25 dillon Exp $ */ #include "opt_ipfw.h" /* for ipfw_fwd */ @@ -102,11 +102,6 @@ MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry"); static const int tcprexmtthresh = 3; tcp_cc tcp_ccgen; - -struct tcpstat tcpstat; -SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW, - &tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); - static int log_in_vain = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, &log_in_vain, 0, "Log all incoming TCP connections"); diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 46ae71ade1..0c9bdfff70 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -32,7 +32,7 @@ * * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.39.2.20 2003/01/29 22:45:36 hsu Exp $ - * $DragonFly: src/sys/netinet/tcp_output.c,v 1.10 2004/03/08 00:36:30 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_output.c,v 1.11 2004/04/07 17:01:25 dillon Exp $ */ #include "opt_inet6.h" @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index f7671e7535..e5665276a3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -32,7 +32,7 @@ * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.73.2.31 2003/01/24 05:11:34 sam Exp $ - * $DragonFly: src/sys/netinet/tcp_subr.c,v 1.19 2004/04/05 17:47:01 dillon Exp $ + * $DragonFly: src/sys/netinet/tcp_subr.c,v 1.20 2004/04/07 17:01:25 dillon Exp $ */ #include "opt_compat.h" @@ -185,6 +185,33 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW, static void tcp_cleartaocache (void); static void tcp_notify (struct inpcb *, int); +struct tcp_stats tcpstats_ary[MAXCPU]; +#ifdef SMP +static int +sysctl_tcpstats(SYSCTL_HANDLER_ARGS) +{ + int cpu, error; + + for (cpu = error = 0; cpu < ncpus; ++cpu) { + if ((error = SYSCTL_OUT(req, (void *)&tcpstats_ary[cpu], + sizeof(struct tcp_stats)))) + break; + if ((error = SYSCTL_IN(req, (void *)&tcpstats_ary[cpu], + sizeof(struct tcp_stats)))) + break; + } + + return (error); +} +SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, CTLTYPE_OPAQUE|CTLFLAG_RW, + 0, 0, sysctl_tcpstats, "S,tcp_stats", + "TCP statistics (struct tcp_stats, netinet/tcp_stats.h)"); +#else /* !SMP */ +SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW, + &tcpstat , tcp_stats, + "TCP statistics (struct tcp_stats, netinet/tcp_stats.h)"); +#endif + /* * Target size of TCP PCB hash tables. Must be a power of two. * @@ -280,6 +307,21 @@ tcp_init() panic("tcp_init"); #undef TCP_MINPROTOHDR + /* + * Initialize TCP statistics. + * + * It is layed out as an array which is has one element for UP, + * and SMP_MAXCPU elements for SMP. This allows us to retain + * the access mechanism from userland for both UP and SMP. + */ +#ifdef SMP + for (cpu = 0; cpu < ncpus; ++cpu) { + bzero(&tcpstats_ary[cpu], sizeof(struct tcp_stats)); + } +#else + bzero(&tcpstat, sizeof(struct tcp_stats)); +#endif + syncache_init(); tcp_thread_init(); } diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 71ec7e1ea2..c882293501 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -33,7 +33,7 @@ * * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $ - * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.6 2004/03/08 00:39:00 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.7 2004/04/07 17:01:25 dillon Exp $ */ #include "opt_compat.h" @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include /* before tcp_seq.h, for tcp_random18() */ diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 062f1e37c4..2df5e99977 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -33,7 +33,7 @@ * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.13 2003/02/03 02:34:07 hsu Exp $ - * $DragonFly: src/sys/netinet/tcp_var.h,v 1.15 2004/03/14 08:21:53 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_var.h,v 1.16 2004/04/07 17:01:25 dillon Exp $ */ #ifndef _NETINET_TCP_VAR_H_ @@ -41,6 +41,10 @@ #include /* needed for in_conninfo, inp_gen_t */ +#ifndef _NETINET_TCP_STATS_H_ +#include +#endif + /* * Kernel variables for tcp. */ @@ -304,103 +308,6 @@ struct rmxp_tao { max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) -/* - * TCP statistics. - * Many of these should be kept per connection, - * but that's inconvenient at the moment. - */ -struct tcpstat { - u_long tcps_connattempt; /* connections initiated */ - u_long tcps_accepts; /* connections accepted */ - u_long tcps_connects; /* connections established */ - u_long tcps_drops; /* connections dropped */ - u_long tcps_conndrops; /* embryonic connections dropped */ - u_long tcps_closed; /* conn. closed (includes drops) */ - u_long tcps_segstimed; /* segs where we tried to get rtt */ - u_long tcps_rttupdated; /* times we succeeded */ - u_long tcps_delack; /* delayed acks sent */ - u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ - u_long tcps_rexmttimeo; /* retransmit timeouts */ - u_long tcps_persisttimeo; /* persist timeouts */ - u_long tcps_keeptimeo; /* keepalive timeouts */ - u_long tcps_keepprobe; /* keepalive probes sent */ - u_long tcps_keepdrops; /* connections dropped in keepalive */ - - u_long tcps_sndtotal; /* total packets sent */ - u_long tcps_sndpack; /* data packets sent */ - u_long tcps_sndbyte; /* data bytes sent */ - u_long tcps_sndrexmitpack; /* data packets retransmitted */ - u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ - u_long tcps_sndfastrexmit; /* Fast Retransmissions */ - u_long tcps_sndearlyrexmit; /* early Fast Retransmissions */ - u_long tcps_sndlimited; /* Limited Transmit packets */ - u_long tcps_sndrtobad; /* spurious RTO retransmissions */ - u_long tcps_sndfastrexmitbad; /* spurious Fast Retransmissions */ - u_long tcps_sndearlyrexmitbad; /* spurious early Fast Retransmissions, - a subset of tcps_sndfastrexmitbad */ - u_long tcps_eifeldetected; /* Eifel-detected spurious rexmits */ - u_long tcps_rttcantdetect; /* Eifel but not 1/2 RTT-detectable */ - u_long tcps_rttdetected; /* RTT-detected spurious RTO rexmits */ - u_long tcps_sndacks; /* ack-only packets sent */ - u_long tcps_sndprobe; /* window probes sent */ - u_long tcps_sndurg; /* packets sent with URG only */ - u_long tcps_sndwinup; /* window update-only packets sent */ - u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ - - u_long tcps_rcvtotal; /* total packets received */ - u_long tcps_rcvpack; /* packets received in sequence */ - u_long tcps_rcvbyte; /* bytes received in sequence */ - u_long tcps_rcvbadsum; /* packets received with ccksum errs */ - u_long tcps_rcvbadoff; /* packets received with bad offset */ - u_long tcps_rcvmemdrop; /* packets dropped for lack of memory */ - u_long tcps_rcvshort; /* packets received too short */ - u_long tcps_rcvduppack; /* duplicate-only packets received */ - u_long tcps_rcvdupbyte; /* duplicate-only bytes received */ - u_long tcps_rcvpartduppack; /* packets with some duplicate data */ - u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ - u_long tcps_rcvoopack; /* out-of-order packets received */ - u_long tcps_rcvoobyte; /* out-of-order bytes received */ - u_long tcps_rcvpackafterwin; /* packets with data after window */ - u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */ - u_long tcps_rcvafterclose; /* packets rcvd after "close" */ - u_long tcps_rcvwinprobe; /* rcvd window probe packets */ - u_long tcps_rcvdupack; /* rcvd duplicate acks */ - u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */ - u_long tcps_rcvackpack; /* rcvd ack packets */ - u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */ - u_long tcps_rcvwinupd; /* rcvd window update packets */ - u_long tcps_pawsdrop; /* segments dropped due to PAWS */ - u_long tcps_predack; /* times hdr predict ok for acks */ - u_long tcps_preddat; /* times hdr predict ok for data pkts */ - u_long tcps_pcbcachemiss; - u_long tcps_cachedrtt; /* times cached RTT in route updated */ - u_long tcps_cachedrttvar; /* times cached rttvar updated */ - u_long tcps_cachedssthresh; /* times cached ssthresh updated */ - u_long tcps_usedrtt; /* times RTT initialized from route */ - u_long tcps_usedrttvar; /* times RTTVAR initialized from rt */ - u_long tcps_usedssthresh; /* times ssthresh initialized from rt*/ - u_long tcps_persistdrop; /* timeout in persist state */ - u_long tcps_badsyn; /* bogus SYN, e.g. premature ACK */ - u_long tcps_mturesent; /* resends due to MTU discovery */ - u_long tcps_listendrop; /* listen queue overflows */ - - u_long tcps_sc_added; /* entry added to syncache */ - u_long tcps_sc_retransmitted; /* syncache entry was retransmitted */ - u_long tcps_sc_dupsyn; /* duplicate SYN packet */ - u_long tcps_sc_dropped; /* could not reply to packet */ - u_long tcps_sc_completed; /* successful extraction of entry */ - u_long tcps_sc_bucketoverflow; /* syncache per-bucket limit hit */ - u_long tcps_sc_cacheoverflow; /* syncache cache limit hit */ - u_long tcps_sc_reset; /* RST removed entry from syncache */ - u_long tcps_sc_stale; /* timed out or listen socket gone */ - u_long tcps_sc_aborted; /* syncache entry aborted */ - u_long tcps_sc_badack; /* removed due to bad ACK */ - u_long tcps_sc_unreach; /* ICMP unreachable received */ - u_long tcps_sc_zonefail; /* zalloc() failed */ - u_long tcps_sc_sendcookie; /* SYN cookie sent */ - u_long tcps_sc_recvcookie; /* SYN cookie received */ -}; - /* * TCB structure exported to user-land via sysctl(3). * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been @@ -457,7 +364,6 @@ SYSCTL_DECL(_net_inet_tcp); #endif extern struct inpcbinfo tcbinfo[]; -extern struct tcpstat tcpstat; /* tcp statistics */ extern int tcp_mssdflt; /* XXX */ extern int tcp_delack_enabled; extern int tcp_do_newreno; diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index b8703cb2e6..6da4db300b 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -32,7 +32,7 @@ * * @(#)inet.c 8.5 (Berkeley) 5/24/95 * $FreeBSD: src/usr.bin/netstat/inet.c,v 1.37.2.11 2003/11/27 14:46:49 ru Exp $ - * $DragonFly: src/usr.bin/netstat/inet.c,v 1.11 2004/03/12 11:29:51 hmp Exp $ + * $DragonFly: src/usr.bin/netstat/inet.c,v 1.12 2004/04/07 17:01:27 dillon Exp $ */ #include @@ -340,22 +340,59 @@ protopr(u_long proto, /* for sysctl version we pass proto # */ free(buf); } +void +tcp_stats_agg(struct tcp_stats *ary, struct tcp_stats *ttl, int cpucnt) +{ + int i, off, siz; + siz = sizeof(struct tcp_stats); + + if (!ary && !ttl) + return; + + bzero(ttl, siz); + if (cpucnt == 1) { + *ttl = ary[0]; + } else { + for (i = 0; i < cpucnt; ++i) { + for (off = 0; off < siz; off += sizeof(u_long)) { + *(u_long *)((char *)(*(&ttl)) + off) += + *(u_long *)((char *)&ary[i] + off); + } + } + } +} + /* * Dump TCP statistics structure. */ void tcp_stats(u_long off __unused, char *name, int af __unused) { - struct tcpstat tcpstat, zerostat; - size_t len = sizeof tcpstat; + struct tcp_stats tcpstat, *stattmp; + struct tcp_stats zerostat[SMP_MAXCPU]; + size_t len = sizeof(struct tcp_stats) * SMP_MAXCPU; + int cpucnt; if (zflag) - memset(&zerostat, 0, len); - if (sysctlbyname("net.inet.tcp.stats", &tcpstat, &len, - zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { - warn("sysctl: net.inet.tcp.stats"); + memset(zerostat, 0, len); + + if ((stattmp = malloc(len)) == NULL) { return; + } else { + if (sysctlbyname("net.inet.tcp.stats", stattmp, &len, + zflag ? zerostat : NULL, zflag ? len : 0) < 0) { + warn("sysctl: net.inet.tcp.stats"); + free(stattmp); + return; + } else { + if ((stattmp = realloc(stattmp, len)) == NULL) { + warn("tcp_stats"); + return; + } + } } + cpucnt = len / sizeof(struct tcp_stats); + tcp_stats_agg(stattmp, &tcpstat, cpucnt); #ifdef INET6 if (tcp_done != 0) @@ -458,6 +495,7 @@ tcp_stats(u_long off __unused, char *name, int af __unused) p(tcps_sc_zonefail, "\t\t%lu zone failures\n"); p(tcps_sc_sendcookie, "\t%lu cookies sent\n"); p(tcps_sc_recvcookie, "\t%lu cookies received\n"); + free(stattmp); #undef p #undef p1a #undef p2 -- 2.41.0