From c83e573d02bea89e07d19abdff35f9c105f8bd32 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 13 Sep 2013 17:14:39 +0800 Subject: [PATCH] mbuf: Fix jcluster support - Free the jcluster to the correct objcache, which solves "jcluster mbuf" objcache exhaustion. - By default, set the amount of the jclusters to half the amount of the normal clusters. jcluster will be used on TCP sending path in the later commit to improve TSO performance for 10Ge. - Add mbuf stat for jcluster; adjust netstat(1) to show it. - Add m_getlj(), which will be used on TCP sending path in the later commit to improve TSO performnace for 10Ge. --- sys/kern/uipc_mbuf.c | 40 +++++++++++++++++++++++++++++---------- sys/sys/mbuf.h | 16 +++++++++++++++- usr.bin/netstat/main.c | 5 ++++- usr.bin/netstat/mbuf.c | 27 ++++++++++++++++++++------ usr.bin/netstat/netstat.h | 2 +- 5 files changed, 71 insertions(+), 19 deletions(-) diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 42696bd70e..765692ef96 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -297,7 +297,7 @@ do_mbstat(SYSCTL_HANDLER_ARGS) { mbstat_total.m_mbufs += mbstat[i].m_mbufs; mbstat_total.m_clusters += mbstat[i].m_clusters; - mbstat_total.m_spare += mbstat[i].m_spare; + mbstat_total.m_jclusters += mbstat[i].m_jclusters; mbstat_total.m_clfree += mbstat[i].m_clfree; mbstat_total.m_drops += mbstat[i].m_drops; mbstat_total.m_wait += mbstat[i].m_wait; @@ -374,6 +374,7 @@ static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta"); static void m_reclaim (void); static void m_mclref(void *arg); static void m_mclfree(void *arg); +static void m_mjclfree(void *arg); /* * NOTE: Default NMBUFS must take into account a possible DOS attack @@ -389,7 +390,7 @@ static void m_mclfree(void *arg); #define MCL_CACHEFRAC 4 #endif #ifndef NMBJCLUSTERS -#define NMBJCLUSTERS 2048 +#define NMBJCLUSTERS (NMBCLUSTERS / 2) #endif #ifndef NMBUFS #define NMBUFS (nmbclusters * 2 + maxfiles) @@ -526,7 +527,10 @@ linkjcluster(struct mbuf *m, struct mbcluster *cl, uint size) m->m_ext.ext_arg = cl; m->m_ext.ext_buf = cl->mcl_data; m->m_ext.ext_ref = m_mclref; - m->m_ext.ext_free = m_mclfree; + if (size != MCLBYTES) + m->m_ext.ext_free = m_mjclfree; + else + m->m_ext.ext_free = m_mclfree; m->m_ext.ext_size = size; atomic_add_int(&cl->mcl_refs, 1); @@ -705,7 +709,7 @@ mbinit(void *dummy) objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); mb_limit += limit; - limit = nmbjclusters / 4; /* XXX really rarely used */ + limit = nmbjclusters; mbufjcluster_cache = objcache_create("mbuf + jcluster", limit, 0, mbufjcluster_ctor, mbufcluster_dtor, NULL, @@ -714,7 +718,7 @@ mbinit(void *dummy) limit = nmbjclusters; mbufphdrjcluster_cache = objcache_create("mbuf pkt hdr + jcluster", - limit, nmbjclusters / 16, + limit, 0, mbufphdrjcluster_ctor, mbufcluster_dtor, NULL, objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); mb_limit += limit; @@ -899,7 +903,7 @@ m_getclr(int how, int type) static struct mbuf * m_getcl_cache(int how, short type, int flags, struct objcache *mbclc, - struct objcache *mbphclc) + struct objcache *mbphclc, u_long *cl_stats) { struct mbuf *m = NULL; int ocflags = MBTOM(how); @@ -939,7 +943,7 @@ retryonce: mbuftrack(m); ++mbtypes[mycpu->gd_cpuid].stats[type]; - ++mbstat[mycpu->gd_cpuid].m_clusters; + ++(*cl_stats); return (m); } @@ -947,19 +951,22 @@ struct mbuf * m_getjcl(int how, short type, int flags, size_t size) { struct objcache *mbclc, *mbphclc; + u_long *cl_stats; switch (size) { case MCLBYTES: mbclc = mbufcluster_cache; mbphclc = mbufphdrcluster_cache; + cl_stats = &mbstat[mycpu->gd_cpuid].m_clusters; break; default: mbclc = mbufjcluster_cache; mbphclc = mbufphdrjcluster_cache; + cl_stats = &mbstat[mycpu->gd_cpuid].m_jclusters; break; } - return m_getcl_cache(how, type, flags, mbclc, mbphclc); + return m_getcl_cache(how, type, flags, mbclc, mbphclc, cl_stats); } /* @@ -973,7 +980,8 @@ struct mbuf * m_getcl(int how, short type, int flags) { return m_getcl_cache(how, type, flags, - mbufcluster_cache, mbufphdrcluster_cache); + mbufcluster_cache, mbufphdrcluster_cache, + &mbstat[mycpu->gd_cpuid].m_clusters); } /* @@ -1080,6 +1088,17 @@ m_mclfree(void *arg) } } +static void +m_mjclfree(void *arg) +{ + struct mbcluster *mcl = arg; + + if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1) { + --mbstat[mycpu->gd_cpuid].m_jclusters; + objcache_put(mjclmeta_cache, mcl); + } +} + /* * Free a single mbuf and any associated external storage. The successor, * if any, is returned. @@ -1179,13 +1198,14 @@ m_free(struct mbuf *m) objcache_put(mbufphdrjcluster_cache, m); else objcache_put(mbufjcluster_cache, m); + --mbstat[mycpu->gd_cpuid].m_jclusters; } else { if (m->m_flags & M_PHCACHE) objcache_put(mbufphdrcluster_cache, m); else objcache_put(mbufcluster_cache, m); + --mbstat[mycpu->gd_cpuid].m_clusters; } - --mbstat[mycpu->gd_cpuid].m_clusters; } else { /* * Hell. Someone else has a ref on this cluster, diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 59fdfe6de7..aa2b9414aa 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -329,7 +329,7 @@ struct mbuf { struct mbstat { u_long m_mbufs; /* mbufs obtained from page pool */ u_long m_clusters; /* clusters obtained from page pool */ - u_long m_spare; /* spare field */ + u_long m_jclusters; /* jclusters obtained from page pool */ u_long m_clfree; /* free clusters */ u_long m_drops; /* times failed to find space */ u_long m_wait; /* times waited for space */ @@ -586,6 +586,20 @@ m_getl(int len, int how, int type, int flags, int *psize) return (m); } +static __inline struct mbuf * +m_getlj(int len, int how, int type, int flags, int *psize) +{ + if (len > MCLBYTES) { + struct mbuf *m; + + m = m_getjcl(how, type, flags, MJUMPAGESIZE); + if (psize != NULL) + *psize = MJUMPAGESIZE; + return m; + } + return m_getl(len, how, type, flags, psize); +} + /* * Get a single mbuf that covers the requested number of bytes. * This function does not create mbuf chains. It explicitly marks diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c index 9f1f7c1ca1..9cb407ea7c 100644 --- a/usr.bin/netstat/main.c +++ b/usr.bin/netstat/main.c @@ -141,6 +141,8 @@ static struct nlist nl[] = { { .n_name = "_ncpus" }, #define N_CARPSTAT 42 { .n_name = "_carpstats" }, +#define N_NMBJCLUSTERS 43 + { .n_name = "_nmbjclusters" }, { .n_name = NULL }, }; @@ -464,10 +466,11 @@ main(int argc, char **argv) mbpr(nl[N_MBSTAT].n_value, nl[N_MBTYPES].n_value, nl[N_NMBCLUSTERS].n_value, + nl[N_NMBJCLUSTERS].n_value, nl[N_NMBUFS].n_value, nl[N_NCPUS].n_value); } else { - mbpr(0, 0, 0, 0, 0); + mbpr(0, 0, 0, 0, 0, 0); } exit(0); } diff --git a/usr.bin/netstat/mbuf.c b/usr.bin/netstat/mbuf.c index e06a4011c5..389ee6bdc7 100644 --- a/usr.bin/netstat/mbuf.c +++ b/usr.bin/netstat/mbuf.c @@ -62,14 +62,14 @@ static struct mbtypenames { * Print mbuf statistics. */ void -mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr, - u_long ncpusaddr) +mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbjcaddr, + u_long nmbufaddr, u_long ncpusaddr) { u_long totmem, totpossible; struct mbstat *mbstat; struct mbtypenames *mp; - int name[3], nmbclusters, nmbufs, nmbtypes; - size_t nmbclen, nmbuflen, mbstatlen, mbtypeslen; + int name[3], nmbclusters, nmbjclusters, nmbufs, nmbtypes; + size_t nmbclen, nmbjclen, nmbuflen, mbstatlen, mbtypeslen; u_long *mbtypes; int ncpus; int n; @@ -106,6 +106,8 @@ mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr, goto err; if (kread(nmbcaddr, (char *)&nmbclusters, sizeof(int))) goto err; + if (kread(nmbjcaddr, (char *)&nmbjclusters, sizeof(int))) + goto err; if (kread(nmbufaddr, (char *)&nmbufs, sizeof(int))) goto err; for (n = 1; n < ncpus; ++n) { @@ -145,6 +147,13 @@ mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr, goto err; } + nmbjclen = sizeof(int); + if (sysctlbyname("kern.ipc.nmbjclusters", + &nmbjclusters, &nmbjclen, 0, 0) < 0) { + warn("sysctl: retrieving nmbjclusters"); + goto err; + } + nmbuflen = sizeof(int); if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &nmbuflen, 0, 0) < 0) { warn("sysctl: retrieving nmbufs"); @@ -156,10 +165,14 @@ mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr, #define MSIZE (mbstat->m_msize) #undef MCLBYTES #define MCLBYTES (mbstat->m_mclbytes) +#undef MJUMPAGESIZE +#define MJUMPAGESIZE (mbstat->m_mjumpagesize) printf("%lu/%u mbufs in use (current/max):\n", mbstat->m_mbufs, nmbufs); printf("%lu/%u mbuf clusters in use (current/max)\n", mbstat->m_clusters, nmbclusters); + printf("%lu/%u mbuf jumbo clusters in use (current/max)\n", + mbstat->m_jclusters, nmbjclusters); for (mp = mbtypenames; mp->mt_name; mp++) if (mbtypes[mp->mt_type]) { seen[mp->mt_type] = YES; @@ -173,8 +186,10 @@ mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr, printf("\t%lu mbufs and mbuf clusters allocated to \n", mbtypes[i], i); } - totmem = mbstat->m_mbufs * MSIZE + mbstat->m_clusters * MCLBYTES; - totpossible = nmbclusters * MCLBYTES + MSIZE * nmbufs; + totmem = mbstat->m_mbufs * MSIZE + mbstat->m_clusters * MCLBYTES + + mbstat->m_jclusters * MJUMPAGESIZE; + totpossible = MSIZE * nmbufs + nmbclusters * MCLBYTES + + nmbjclusters * MJUMPAGESIZE; printf("%lu Kbytes allocated to network (%lu%% of mb_map in use)\n", totmem / 1024, (totmem * 100) / totpossible); printf("%lu requests for memory denied\n", mbstat->m_drops); diff --git a/usr.bin/netstat/netstat.h b/usr.bin/netstat/netstat.h index fe7531171b..d8203f2e69 100644 --- a/usr.bin/netstat/netstat.h +++ b/usr.bin/netstat/netstat.h @@ -97,7 +97,7 @@ const char *netname6 (struct sockaddr_in6 *, struct in6_addr *); void pfkey_stats (u_long, const char *, int); #endif -void mbpr (u_long, u_long, u_long, u_long, u_long); +void mbpr (u_long, u_long, u_long, u_long, u_long, u_long); void hostpr (u_long, u_long); void impstats (u_long, u_long); -- 2.41.0