X-Git-Url: https://gitweb.dragonflybsd.org/~josepht/dragonfly.git/blobdiff_plain/22bc4325c743eaa6387f06c83246382805f5e5be..33dbeae810812d056db4c1a65d540b9dceeccede:/sys/kern/uipc_mbuf.c diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index e0c803086e..22323f0003 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -74,6 +74,7 @@ #include "opt_mbuf_stress_test.h" #include #include +#include #include #include #include @@ -252,25 +253,27 @@ int m_defragrandomfailures; #endif struct objcache *mbuf_cache, *mbufphdr_cache; -struct objcache *mclmeta_cache; +struct objcache *mclmeta_cache, *mjclmeta_cache; struct objcache *mbufcluster_cache, *mbufphdrcluster_cache; +struct objcache *mbufjcluster_cache, *mbufphdrjcluster_cache; int nmbclusters; int nmbufs; SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, - &max_linkhdr, 0, ""); + &max_linkhdr, 0, "Max size of a link-level header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, - &max_protohdr, 0, ""); -SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); + &max_protohdr, 0, "Max size of a protocol header"); +SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, + "Max size of link+protocol headers"); SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, - &max_datalen, 0, ""); + &max_datalen, 0, "Max data payload size without headers"); SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, - &mbuf_wait, 0, ""); + &mbuf_wait, 0, "Time in ticks to sleep after failed mbuf allocations"); static int do_mbstat(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLTYPE_STRUCT|CTLFLAG_RD, - 0, 0, do_mbstat, "S,mbstat", ""); + 0, 0, do_mbstat, "S,mbstat", "mbuf usage statistics"); static int do_mbtypes(SYSCTL_HANDLER_ARGS); @@ -342,13 +345,13 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, "Maximum number of mbufs available"); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, - &m_defragpackets, 0, ""); + &m_defragpackets, 0, "Number of defragment packets"); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, - &m_defragbytes, 0, ""); + &m_defragbytes, 0, "Number of defragment bytes"); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, - &m_defraguseless, 0, ""); + &m_defraguseless, 0, "Number of useless defragment mbuf chain operations"); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, - &m_defragfailure, 0, ""); + &m_defragfailure, 0, "Number of failed defragment mbuf chain operations"); #ifdef MBUF_STRESS_TEST SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, &m_defragrandomfailures, 0, ""); @@ -356,17 +359,23 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); static MALLOC_DEFINE(M_MBUFCL, "mbufcl", "mbufcl"); +static MALLOC_DEFINE(M_MJBUFCL, "mbufcl", "mbufcl"); static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta"); +static MALLOC_DEFINE(M_MJCLMETA, "mjclmeta", "mjclmeta"); static void m_reclaim (void); static void m_mclref(void *arg); static void m_mclfree(void *arg); +/* + * NOTE: Default NMBUFS must take into account a possible DOS attack + * using fd passing on unix domain sockets. + */ #ifndef NMBCLUSTERS #define NMBCLUSTERS (512 + maxusers * 16) #endif #ifndef NMBUFS -#define NMBUFS (nmbclusters * 2) +#define NMBUFS (nmbclusters * 2 + maxfiles) #endif /* @@ -455,6 +464,23 @@ mclmeta_ctor(void *obj, void *private, int ocflags) return (TRUE); } +static boolean_t +mjclmeta_ctor(void *obj, void *private, int ocflags) +{ + struct mbcluster *cl = obj; + void *buf; + + if (ocflags & M_NOWAIT) + buf = kmalloc(MJUMPAGESIZE, M_MBUFCL, M_NOWAIT | M_ZERO); + else + buf = kmalloc(MJUMPAGESIZE, M_MBUFCL, M_INTWAIT | M_ZERO); + if (buf == NULL) + return (FALSE); + cl->mcl_refs = 0; + cl->mcl_data = buf; + return (TRUE); +} + static void mclmeta_dtor(void *obj, void *private) { @@ -465,7 +491,7 @@ mclmeta_dtor(void *obj, void *private) } static void -linkcluster(struct mbuf *m, struct mbcluster *cl) +linkjcluster(struct mbuf *m, struct mbcluster *cl, uint size) { /* * Add the cluster to the mbuf. The caller will detect that the @@ -475,13 +501,19 @@ linkcluster(struct mbuf *m, struct mbcluster *cl) m->m_ext.ext_buf = cl->mcl_data; m->m_ext.ext_ref = m_mclref; m->m_ext.ext_free = m_mclfree; - m->m_ext.ext_size = MCLBYTES; + m->m_ext.ext_size = size; atomic_add_int(&cl->mcl_refs, 1); m->m_data = m->m_ext.ext_buf; m->m_flags |= M_EXT | M_EXT_CLUSTER; } +static void +linkcluster(struct mbuf *m, struct mbcluster *cl) +{ + linkjcluster(m, cl, MCLBYTES); +} + static boolean_t mbufphdrcluster_ctor(void *obj, void *private, int ocflags) { @@ -499,6 +531,23 @@ mbufphdrcluster_ctor(void *obj, void *private, int ocflags) return (TRUE); } +static boolean_t +mbufphdrjcluster_ctor(void *obj, void *private, int ocflags) +{ + struct mbuf *m = obj; + struct mbcluster *cl; + + mbufphdr_ctor(obj, private, ocflags); + cl = objcache_get(mjclmeta_cache, ocflags); + if (cl == NULL) { + ++mbstat[mycpu->gd_cpuid].m_drops; + return (FALSE); + } + m->m_flags |= M_CLCACHE; + linkjcluster(m, cl, MJUMPAGESIZE); + return (TRUE); +} + static boolean_t mbufcluster_ctor(void *obj, void *private, int ocflags) { @@ -516,6 +565,23 @@ mbufcluster_ctor(void *obj, void *private, int ocflags) return (TRUE); } +static boolean_t +mbufjcluster_ctor(void *obj, void *private, int ocflags) +{ + struct mbuf *m = obj; + struct mbcluster *cl; + + mbuf_ctor(obj, private, ocflags); + cl = objcache_get(mjclmeta_cache, ocflags); + if (cl == NULL) { + ++mbstat[mycpu->gd_cpuid].m_drops; + return (FALSE); + } + m->m_flags |= M_CLCACHE; + linkjcluster(m, cl, MJUMPAGESIZE); + return (TRUE); +} + /* * Used for both the cluster and cluster PHDR caches. * @@ -533,7 +599,10 @@ mbufcluster_dtor(void *obj, void *private) mcl = m->m_ext.ext_arg; KKASSERT(mcl->mcl_refs == 1); mcl->mcl_refs = 0; - objcache_put(mclmeta_cache, mcl); + if (m->m_flags & M_EXT && m->m_ext.ext_size != MCLBYTES) + objcache_put(mjclmeta_cache, mcl); + else + objcache_put(mclmeta_cache, mcl); } } @@ -555,6 +624,7 @@ mbinit(void *dummy) for (i = 0; i < ncpus; i++) { atomic_set_long_nonlocked(&mbstat[i].m_msize, MSIZE); atomic_set_long_nonlocked(&mbstat[i].m_mclbytes, MCLBYTES); + atomic_set_long_nonlocked(&mbstat[i].m_mjumpagesize, MJUMPAGESIZE); atomic_set_long_nonlocked(&mbstat[i].m_minclsize, MINCLSIZE); atomic_set_long_nonlocked(&mbstat[i].m_mlen, MLEN); atomic_set_long_nonlocked(&mbstat[i].m_mhlen, MHLEN); @@ -584,6 +654,11 @@ mbinit(void *dummy) mclmeta_ctor, mclmeta_dtor, NULL, objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); + cl_limit = nmbclusters; + mjclmeta_cache = objcache_create("jcluster mbuf", &cl_limit, 0, + mjclmeta_ctor, mclmeta_dtor, NULL, + objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); + limit = nmbclusters; mbufcluster_cache = objcache_create("mbuf + cluster", &limit, 0, mbufcluster_ctor, mbufcluster_dtor, NULL, @@ -596,6 +671,18 @@ mbinit(void *dummy) objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); mb_limit += limit; + limit = nmbclusters; + mbufjcluster_cache = objcache_create("mbuf + jcluster", &limit, 0, + mbufjcluster_ctor, mbufcluster_dtor, NULL, + objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); + mb_limit += limit; + + limit = nmbclusters; + mbufphdrjcluster_cache = objcache_create("mbuf pkt hdr + jcluster", + &limit, 64, mbufphdrjcluster_ctor, mbufcluster_dtor, NULL, + objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); + mb_limit += limit; + /* * Adjust backing kmalloc pools' limit * @@ -605,7 +692,8 @@ mbinit(void *dummy) cl_limit += cl_limit / 8; kmalloc_raise_limit(mclmeta_malloc_args.mtype, mclmeta_malloc_args.objsize * cl_limit); - kmalloc_raise_limit(M_MBUFCL, MCLBYTES * cl_limit); + kmalloc_raise_limit(M_MBUFCL, MCLBYTES * cl_limit * 3/4 + MJUMPAGESIZE * cl_limit / 4); + /*kmalloc_raise_limit(M_MBUFCL, MCLBYTES * cl_limit);*/ mb_limit += mb_limit / 8; kmalloc_raise_limit(mbuf_malloc_args.mtype, @@ -666,9 +754,13 @@ static void __inline updatestats(struct mbuf *m, int type) { struct globaldata *gd = mycpu; - m->m_type = type; + m->m_type = type; mbuftrack(m); +#ifdef MBUF_DEBUG + KASSERT(m->m_next == NULL, ("mbuf %p: bad m_next in get", m)); + KASSERT(m->m_nextpkt == NULL, ("mbuf %p: bad m_nextpkt in get", m)); +#endif atomic_add_long_nonlocked(&mbtypes[gd->gd_cpuid][type], 1); atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mbufs, 1); @@ -694,9 +786,11 @@ retryonce: struct objcache *reclaimlist[] = { mbufphdr_cache, mbufcluster_cache, - mbufphdrcluster_cache + mbufphdrcluster_cache, + mbufjcluster_cache, + mbufphdrjcluster_cache }; - const int nreclaims = __arysize(reclaimlist); + const int nreclaims = NELEM(reclaimlist); if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) m_reclaim(); @@ -705,6 +799,10 @@ retryonce: ++mbstat[mycpu->gd_cpuid].m_drops; return (NULL); } +#ifdef MBUF_DEBUG + KASSERT(m->m_data == m->m_dat, ("mbuf %p: bad m_data in get", m)); +#endif + m->m_len = 0; updatestats(m, type); return (m); @@ -725,9 +823,10 @@ retryonce: if ((how & MB_TRYWAIT) && ntries++ == 0) { struct objcache *reclaimlist[] = { mbuf_cache, - mbufcluster_cache, mbufphdrcluster_cache + mbufcluster_cache, mbufphdrcluster_cache, + mbufjcluster_cache, mbufphdrjcluster_cache }; - const int nreclaims = __arysize(reclaimlist); + const int nreclaims = NELEM(reclaimlist); if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) m_reclaim(); @@ -736,6 +835,11 @@ retryonce: ++mbstat[mycpu->gd_cpuid].m_drops; return (NULL); } +#ifdef MBUF_DEBUG + KASSERT(m->m_data == m->m_pktdat, ("mbuf %p: bad m_data in get", m)); +#endif + m->m_len = 0; + m->m_pkthdr.len = 0; updatestats(m, type); return (m); @@ -756,6 +860,51 @@ m_getclr(int how, int type) return (m); } +struct mbuf * +m_getjcl(int how, short type, int flags, size_t size) +{ + struct mbuf *m = NULL; + int ocflags = MBTOM(how); + int ntries = 0; + +retryonce: + + if (flags & M_PKTHDR) + m = objcache_get(mbufphdrjcluster_cache, ocflags); + else + m = objcache_get(mbufjcluster_cache, ocflags); + + if (m == NULL) { + if ((how & MB_TRYWAIT) && ntries++ == 0) { + struct objcache *reclaimlist[1]; + + if (flags & M_PKTHDR) + reclaimlist[0] = mbufjcluster_cache; + else + reclaimlist[0] = mbufphdrjcluster_cache; + if (!objcache_reclaimlist(reclaimlist, 1, ocflags)) + m_reclaim(); + goto retryonce; + } + ++mbstat[mycpu->gd_cpuid].m_drops; + return (NULL); + } + +#ifdef MBUF_DEBUG + KASSERT(m->m_data == m->m_ext.ext_buf, + ("mbuf %p: bad m_data in get", m)); +#endif + m->m_type = type; + m->m_len = 0; + m->m_pkthdr.len = 0; /* just do it unconditonally */ + + mbuftrack(m); + + atomic_add_long_nonlocked(&mbtypes[mycpu->gd_cpuid][type], 1); + atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1); + return (m); +} + /* * Returns an mbuf with an attached cluster. * Because many network drivers use this kind of buffers a lot, it is @@ -793,7 +942,13 @@ retryonce: return (NULL); } +#ifdef MBUF_DEBUG + KASSERT(m->m_data == m->m_ext.ext_buf, + ("mbuf %p: bad m_data in get", m)); +#endif m->m_type = type; + m->m_len = 0; + m->m_pkthdr.len = 0; /* just do it unconditonally */ mbuftrack(m); @@ -913,13 +1068,24 @@ m_mclfree(void *arg) * code does not call M_PREPEND properly. * (example: call to bpf_mtap from drivers) */ + +#ifdef MBUF_DEBUG + +struct mbuf * +_m_free(struct mbuf *m, const char *func) + +#else + struct mbuf * m_free(struct mbuf *m) + +#endif { struct mbuf *n; struct globaldata *gd = mycpu; KASSERT(m->m_type != MT_FREE, ("freeing free mbuf %p", m)); + KASSERT(M_TRAILINGSPACE(m) >= 0, ("overflowed mbuf %p", m)); atomic_subtract_long_nonlocked(&mbtypes[gd->gd_cpuid][m->m_type], 1); n = m->m_next; @@ -930,6 +1096,9 @@ m_free(struct mbuf *m) */ m->m_next = NULL; mbufuntrack(m); +#ifdef MBUF_DEBUG + m->m_hdr.mh_lastfunc = func; +#endif #ifdef notyet KKASSERT(m->m_nextpkt == NULL); #else @@ -985,10 +1154,17 @@ m_free(struct mbuf *m) * an mbuf). */ m->m_data = m->m_ext.ext_buf; - if (m->m_flags & M_PHCACHE) - objcache_put(mbufphdrcluster_cache, m); - else - objcache_put(mbufcluster_cache, m); + if (m->m_flags & M_EXT && m->m_ext.ext_size != MCLBYTES) { + if (m->m_flags & M_PHCACHE) + objcache_put(mbufphdrjcluster_cache, m); + else + objcache_put(mbufjcluster_cache, m); + } else { + if (m->m_flags & M_PHCACHE) + objcache_put(mbufphdrcluster_cache, m); + else + objcache_put(mbufcluster_cache, m); + } atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1); } else { /* @@ -1003,12 +1179,20 @@ m_free(struct mbuf *m) * XXX we could try to connect another cluster to * it. */ + m->m_ext.ext_free(m->m_ext.ext_arg); m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); - if (m->m_flags & M_PHCACHE) - objcache_dtor(mbufphdrcluster_cache, m); - else - objcache_dtor(mbufcluster_cache, m); + if (m->m_ext.ext_size == MCLBYTES) { + if (m->m_flags & M_PHCACHE) + objcache_dtor(mbufphdrcluster_cache, m); + else + objcache_dtor(mbufcluster_cache, m); + } else { + if (m->m_flags & M_PHCACHE) + objcache_dtor(mbufphdrjcluster_cache, m); + else + objcache_dtor(mbufjcluster_cache, m); + } } break; case M_EXT | M_EXT_CLUSTER: @@ -1050,6 +1234,17 @@ m_free(struct mbuf *m) return (n); } +#ifdef MBUF_DEBUG + +void +_m_freem(struct mbuf *m, const char *func) +{ + while (m) + m = _m_free(m, func); +} + +#else + void m_freem(struct mbuf *m) { @@ -1057,6 +1252,8 @@ m_freem(struct mbuf *m) m = m_free(m); } +#endif + /* * mbuf utility routines */ @@ -1351,7 +1548,10 @@ m_dup_data(struct mbuf *m, int how) * Optimize the mbuf allocation but do not get too carried away. */ if (m->m_next || m->m_len > MLEN) - gsize = MCLBYTES; + if (m->m_flags & M_EXT && m->m_ext.ext_size == MCLBYTES) + gsize = MCLBYTES; + else + gsize = MJUMPAGESIZE; else gsize = MLEN;