Rollup mbuf/objcache fixes.
authorMatthew Dillon <dillon@dragonflybsd.org>
Wed, 8 Jun 2005 22:22:59 +0000 (22:22 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Wed, 8 Jun 2005 22:22:59 +0000 (22:22 +0000)
* Completely replace the blocking algorithm that is used to stall when the
  depot has insufficient resources.

* Use __offsetof() to calculate variable length structural sizes.

* #if 0 out balancing code for now.  There isn't actually much of a need for
  it since each cpu can hold no more then two magazines anyway.  The depot
  will have the rest.

* Increase the magazine capacity from 5 to 256 elements to improve
  performance.

* The mbufs were being returned to a different objcache then they had been
  allocated from, due to a dependance on M_PKTHDR  and M_EXT.  But these
  flags can change unexpectedly and this led to lockups in the objcache
  code and other issues.  Also the share count on the cluster may mean
  that it is not possible to release an mbuf+cluster back to its original
  mbuf+cluster cache... it may have to be DTORed instead.

* Change the way ref counts are handled in the mclmeta_cache.  The cache
  returns and expects a ref count of 0 now, and the link code deals with
  bumping it.

sys/kern/kern_objcache.c
sys/kern/uipc_mbuf.c
sys/sys/mbuf.h
sys/sys/objcache.h

index 5b80acb..331c966 100644 (file)
@@ -29,7 +29,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/kern/kern_objcache.c,v 1.1 2005/06/07 19:07:11 hsu Exp $
+ * $DragonFly: src/sys/kern/kern_objcache.c,v 1.2 2005/06/08 22:22:59 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -46,7 +46,7 @@
 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
 
-#define        INITIAL_MAG_CAPACITY    5
+#define        INITIAL_MAG_CAPACITY    256
 
 struct magazine {
        int                      rounds;
@@ -71,13 +71,11 @@ struct magazinedepot {
        struct magazinelist     emptymagazines;
        int                     magcapacity;
 
-       struct lwkt_token       token;  /* protects all fields in this struct */
+       /* protect this structure */
+       struct lwkt_token       token;
 
-       int                     cluster_balance; /* outstanding objects */
-       int                     cluster_limit;   /* new obj creation limit */
-
-       /* statistics */
-       int                     emptymagazines_cumulative;
+       /* magazines not yet allocated towards limit */
+       int                     unallocated_objects;
 
        /* infrequently used fields */
        int                     waiting;        /* waiting for another cpu to
@@ -144,8 +142,8 @@ mag_alloc(int capacity)
 {
        struct magazine *mag;
 
-       mag = malloc(sizeof(struct magazine) + capacity * sizeof(void *),
-           M_OBJMAG, M_INTWAIT);
+       mag = malloc(__offsetof(struct magazine, objects[capacity]),
+                       M_OBJMAG, M_INTWAIT | M_ZERO);
        mag->capacity = capacity;
        mag->rounds = 0;
        return (mag);
@@ -166,8 +164,8 @@ objcache_create(char *name, int cluster_limit, int mag_capacity,
        int cpuid;
 
        /* allocate object cache structure */
-       oc = malloc(sizeof(struct objcache) +
-           ncpus * sizeof(struct percpu_objcache), M_OBJCACHE, M_WAITOK);
+       oc = malloc(__offsetof(struct objcache, cache_percpu[ncpus]),
+                   M_OBJCACHE, M_WAITOK | M_ZERO);
        oc->name = strdup(name, M_TEMP);
        oc->ctor = ctor;
        oc->dtor = dtor;
@@ -177,17 +175,27 @@ objcache_create(char *name, int cluster_limit, int mag_capacity,
 
        /* initialize depots */
        depot = &oc->depot[0];
+
+       lwkt_token_init(&depot->token);
        SLIST_INIT(&depot->fullmagazines);
        SLIST_INIT(&depot->emptymagazines);
-       depot->cluster_limit = cluster_limit;
-       depot->cluster_balance = 0;
-       depot->emptymagazines_cumulative = 0;
-       lwkt_token_init(&depot->token);
+
        if (mag_capacity == 0)
                mag_capacity = INITIAL_MAG_CAPACITY;
        depot->magcapacity = mag_capacity;
+
+       /*
+        * The cluster_limit must be sufficient to have three magazines per
+        * cpu.
+        */
+       if (cluster_limit == 0) {
+               depot->unallocated_objects = -1;
+       } else {
+               if (cluster_limit < mag_capacity * ncpus * 3)
+                       cluster_limit = mag_capacity * ncpus * 3;
+               depot->unallocated_objects = cluster_limit;
+       }
        oc->alloc = alloc;
-       depot->contested = 0;
 
        /* initialize per-cpu caches */
        for (cpuid = 0; cpuid < ncpus; cpuid++) {
@@ -195,12 +203,7 @@ objcache_create(char *name, int cluster_limit, int mag_capacity,
 
                cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
                cache_percpu->previous_magazine = mag_alloc(mag_capacity);
-               cache_percpu->gets_cumulative = 0;
-               cache_percpu->gets_null = 0;
-               cache_percpu->puts_cumulative = 0;
-               cache_percpu->puts_othercluster = 0;
        }
-
        lwkt_gettoken(&olock, &objcachelist_token);
        SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
        lwkt_reltoken(&olock);
@@ -209,6 +212,7 @@ objcache_create(char *name, int cluster_limit, int mag_capacity,
 }
 
 #define MAGAZINE_EMPTY(mag)    (mag->rounds == 0)
+#define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
 #define MAGAZINE_FULL(mag)     (mag->rounds == mag->capacity)
 
 #define        swap(x, y)      ({ struct magazine *t = x; x = y; y = t; })
@@ -221,6 +225,7 @@ objcache_get(struct objcache *oc, int ocflags)
 {
        struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
        struct magazine *loadedmag;
+       struct magazine *emptymag;
        void *obj;
        struct magazinedepot *depot;
        lwkt_tokref ilock;
@@ -235,87 +240,108 @@ retry:
         * out interrupt handlers on the same processor.
         */
        loadedmag = cpucache->loaded_magazine;
-       if (!MAGAZINE_EMPTY(loadedmag)) {
-alloc:         obj = loadedmag->objects[--loadedmag->rounds];
+       if (MAGAZINE_NOTEMPTY(loadedmag)) {
+               obj = loadedmag->objects[--loadedmag->rounds];
                crit_exit();
                return (obj);
        }
 
        /* Previous magazine has an object. */
-       if (!MAGAZINE_EMPTY(cpucache->previous_magazine)) {
+       if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
                swap(cpucache->loaded_magazine, cpucache->previous_magazine);
                loadedmag = cpucache->loaded_magazine;
-               goto alloc;
+               obj = loadedmag->objects[--loadedmag->rounds];
+               crit_exit();
+               return (obj);
        }
 
        /*
-        * Both magazines empty.  Get a full magazine from the depot.
+        * Both magazines empty.  Get a full magazine from the depot and
+        * move one of the empty ones to the depot.  Do this even if we
+        * block on the token to avoid a non-optimal corner case.
+        *
+        * Obtain the depot token.
         */
-
-       /* Obtain the depot token. */
        depot = &oc->depot[myclusterid];
        if (!lwkt_trytoken(&ilock, &depot->token)) {
                lwkt_gettoken(&ilock, &depot->token);
                ++depot->contested;
-               if (!MAGAZINE_EMPTY(cpucache->loaded_magazine) ||
-                   !MAGAZINE_EMPTY(cpucache->previous_magazine)) {
-                       lwkt_reltoken(&ilock);
-                       goto retry;
-               }
        }
 
        /* Check if depot has a full magazine. */
        if (!SLIST_EMPTY(&depot->fullmagazines)) {
-               if (cpucache->previous_magazine->capacity == depot->magcapacity)
-                       SLIST_INSERT_HEAD(&depot->emptymagazines,
-                                         cpucache->previous_magazine,
-                                         nextmagazine);
-               else
-                       free(cpucache->previous_magazine, M_OBJMAG);
+               emptymag = cpucache->previous_magazine;
                cpucache->previous_magazine = cpucache->loaded_magazine;
                cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
-               loadedmag = cpucache->loaded_magazine;
                SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
+
+               /*
+                * Return emptymag to the depot.  Due to blocking it may
+                * not be entirely empty.
+                */
+               if (MAGAZINE_EMPTY(emptymag)) {
+                       SLIST_INSERT_HEAD(&depot->emptymagazines,
+                                         emptymag, nextmagazine);
+               } else {
+                       /*
+                        * NOTE: magazine is not necessarily entirely full
+                        */
+                       SLIST_INSERT_HEAD(&depot->fullmagazines,
+                                         emptymag, nextmagazine);
+                       if (depot->waiting)
+                               wakeup(depot);
+               }
                lwkt_reltoken(&ilock);
-               goto alloc;
+               goto retry;
        }
 
        /*
-        * Depot layer empty.
+        * The depot does not have any non-empty magazines.  If we have
+        * not hit our object limit we can allocate a new object using
+        * the back-end allocator.
+        *
+        * note: unallocated_objects can be initialized to -1, which has
+        * the effect of removing any allocation limits.
         */
+       if (depot->unallocated_objects) {
+               --depot->unallocated_objects;
+               lwkt_reltoken(&ilock);
+               crit_exit();
 
-       /* Check object allocation limit. */
-       if (depot->cluster_balance >= depot->cluster_limit) {
-               if (ocflags & M_NULLOK)
-                       goto failed;
-               /* Wait until someone frees an existing object. */
-               if (ocflags & M_WAITOK) {
-                       ++cpucache->waiting;
-                       ++depot->waiting;
-                       tsleep(depot, PCATCH, "objcache_get", 0);
-                       --cpucache->waiting;
-                       --depot->waiting;
+               obj = oc->alloc(oc->allocator_args, ocflags);
+               if (obj) {
+                       if (oc->ctor(obj, oc->private, ocflags))
+                               return (obj);
+                       oc->free(obj, oc->allocator_args);
+                       lwkt_gettoken(&ilock, &depot->token);
+                       ++depot->unallocated_objects;
+                       if (depot->waiting)
+                               wakeup(depot);
                        lwkt_reltoken(&ilock);
-                       goto retry;
+                       obj = NULL;
                }
+               if (obj == NULL) {
+                       crit_enter();
+                       ++cpucache->gets_null;
+                       crit_exit();
+               }
+               return(obj);
        }
-       crit_exit();
 
-       /* Allocate a new object using the back-end allocator. */
-       obj = oc->alloc(oc->allocator_args, ocflags);
-       if (obj) {
-               if (oc->ctor(obj, oc->private, ocflags)) {
-                       ++depot->cluster_balance;
-                       lwkt_reltoken(&ilock);
-                       return (obj);                   /* common case */
-               }
-               oc->free(obj, oc->allocator_args);
+       /*
+        * Otherwise block if allowed to.
+        */
+       if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
+               ++cpucache->waiting;
+               ++depot->waiting;
+               tsleep(depot, PCATCH, "objcache_get", 0);
+               --cpucache->waiting;
+               --depot->waiting;
+               lwkt_reltoken(&ilock);
+               goto retry;
        }
-       crit_enter();
-failed:
        ++cpucache->gets_null;
        crit_exit();
-       lwkt_reltoken(&ilock);
        return (NULL);
 }
 
@@ -352,7 +378,6 @@ objcache_nop_alloc(void *allocator_args, int ocflags)
 void
 objcache_nop_free(void *obj, void *allocator_args)
 {
-       return;
 }
 
 /*
@@ -365,7 +390,6 @@ objcache_put(struct objcache *oc, void *obj)
        struct magazine *loadedmag;
        struct magazinedepot *depot;
        lwkt_tokref ilock;
-       struct magazine *emptymag;
 
        crit_enter();
        ++cpucache->puts_cumulative;
@@ -386,71 +410,104 @@ retry:
         */
        loadedmag = cpucache->loaded_magazine;
        if (!MAGAZINE_FULL(loadedmag)) {
-free:          loadedmag->objects[loadedmag->rounds++] = obj;
+               loadedmag->objects[loadedmag->rounds++] = obj;
                if (cpucache->waiting)
                        wakeup(&oc->depot[myclusterid]);
                crit_exit();
                return;
        }
 
-       /* Current magazine full, but previous magazine empty. */
+       /*
+        * Current magazine full, but previous magazine has room.  XXX
+        */
        if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
                swap(cpucache->loaded_magazine, cpucache->previous_magazine);
                loadedmag = cpucache->loaded_magazine;
-               goto free;
+               loadedmag->objects[loadedmag->rounds++] = obj;
+               if (cpucache->waiting)
+                       wakeup(&oc->depot[myclusterid]);
+               crit_exit();
+               return;
        }
 
        /*
-        * Both magazines full.  Get an empty magazine from the depot.
+        * Both magazines full.  Get an empty magazine from the depot and
+        * move a full loaded magazine to the depot.  Even though the
+        * magazine may wind up with space available after we block on
+        * the token, we still cycle it through to avoid the non-optimal
+        * corner-case.
+        *
+        * Obtain the depot token.
         */
-
-       /* Obtain the depot token. */
        depot = &oc->depot[myclusterid];
        if (!lwkt_trytoken(&ilock, &depot->token)) {
-               crit_exit();
                lwkt_gettoken(&ilock, &depot->token);
                ++depot->contested;
-               crit_enter();
-               if (!MAGAZINE_FULL(cpucache->loaded_magazine) ||
-                   !MAGAZINE_FULL(cpucache->previous_magazine)) {
-                       lwkt_reltoken(&ilock);
-                       goto retry;
-               }
        }
 
-       /* Check if depot has empty magazine. */
+       /*
+        * If an empty magazine is available in the depot, cycle it
+        * through and retry.
+        */
        if (!SLIST_EMPTY(&depot->emptymagazines)) {
-               emptymag = SLIST_FIRST(&depot->emptymagazines);
+               loadedmag = cpucache->previous_magazine;
+               cpucache->previous_magazine = cpucache->loaded_magazine;
+               cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
                SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
-haveemptymag:  if (cpucache->previous_magazine->capacity == depot->magcapacity)
+
+               /*
+                * Return loadedmag to the depot.  Due to blocking it may
+                * not be entirely full and could even be empty.
+                */
+               if (MAGAZINE_EMPTY(loadedmag)) {
+                       SLIST_INSERT_HEAD(&depot->emptymagazines,
+                                         loadedmag, nextmagazine);
+               } else {
                        SLIST_INSERT_HEAD(&depot->fullmagazines,
-                           cpucache->previous_magazine, nextmagazine);
-               else
-                       free(cpucache->previous_magazine, M_OBJMAG);
-               cpucache->previous_magazine = cpucache->loaded_magazine;
-               cpucache->loaded_magazine = emptymag;
-               loadedmag = cpucache->loaded_magazine;
+                                         loadedmag, nextmagazine);
+                       if (depot->waiting)
+                               wakeup(depot);
+               }
                lwkt_reltoken(&ilock);
-               goto free;
+               goto retry;
        }
 
-       /* Allocate a new empty magazine. */
-       if (depot->cluster_balance < depot->cluster_limit + depot->magcapacity){
-               emptymag = mag_alloc(depot->magcapacity);
-               ++depot->emptymagazines_cumulative;
-               goto haveemptymag;
-       }
-
-       --depot->cluster_balance;
-       KKASSERT(depot->cluster_balance >= 0);
+       /*
+        * An empty mag is not available.  This is a corner case which can
+        * occur due to cpus holding partially full magazines.  Do not try
+        * to allocate a mag, just free the object.
+        */
+       ++depot->unallocated_objects;
        if (depot->waiting)
                wakeup(depot);
        lwkt_reltoken(&ilock);
        crit_exit();
+       oc->dtor(obj, oc->private);
+       oc->free(obj, oc->allocator_args);
+}
+
+/*
+ * The object is being put back into the cache, but the caller has
+ * indicated that the object is not in any shape to be reused and should
+ * be dtor'd immediately.
+ */
+void
+objcache_dtor(struct objcache *oc, void *obj)
+{
+       struct magazinedepot *depot;
+       lwkt_tokref ilock;
 
+       depot = &oc->depot[myclusterid];
+       if (!lwkt_trytoken(&ilock, &depot->token)) {
+               lwkt_gettoken(&ilock, &depot->token);
+               ++depot->contested;
+       }
+       ++depot->unallocated_objects;
+       if (depot->waiting)
+               wakeup(depot);
+       lwkt_reltoken(&ilock);
        oc->dtor(obj, oc->private);
        oc->free(obj, oc->allocator_args);
-       return;
 }
 
 /*
@@ -470,16 +527,21 @@ null_dtor(void *obj, void *private)
 static int
 mag_purge(struct objcache *oc, struct magazine *mag)
 {
+       int ndeleted;
        void *obj;
-       int i;
 
-       for (i = 0; i < mag->rounds; i++) {
-               obj = mag->objects[i];
+       ndeleted = 0;
+       crit_enter();
+       while (mag->rounds) {
+               obj = mag->objects[--mag->rounds];
+               crit_exit();
                oc->dtor(obj, oc->private);
                oc->free(obj, oc->allocator_args);
+               ++ndeleted;
+               crit_enter();
        }
-
-       return (mag->rounds);
+       crit_exit();
+       return(ndeleted);
 }
 
 /*
@@ -510,9 +572,12 @@ maglist_purge(struct objcache *oc, struct magazinelist *maglist,
 static void
 depot_purge(struct magazinedepot *depot, struct objcache *oc)
 {
-       depot->cluster_balance -= maglist_purge(oc, &depot->fullmagazines,
-                                               TRUE);
-       maglist_purge(oc, &depot->emptymagazines, TRUE);
+       depot->unallocated_objects += 
+               maglist_purge(oc, &depot->fullmagazines, TRUE);
+       depot->unallocated_objects +=
+               maglist_purge(oc, &depot->emptymagazines, TRUE);
+       if (depot->unallocated_objects && depot->waiting)
+               wakeup(depot);
 }
 
 #ifdef notneeded
@@ -525,6 +590,7 @@ objcache_reclaim(struct objcache *oc)
        mag_purge(oc, cache_percpu->loaded_magazine);
        mag_purge(oc, cache_percpu->previous_magazine);
 
+       /* XXX need depot token */
        depot_purge(depot, oc);
 }
 #endif
@@ -553,7 +619,9 @@ objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
                    (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
                        crit_exit();
                        lwkt_gettoken(&ilock, &depot->token);
-                       depot->cluster_balance -= ndel;
+                       depot->unallocated_objects += ndel;
+                       if (depot->unallocated_objects && depot->waiting)
+                               wakeup(depot);
                        lwkt_reltoken(&ilock);
                        return (TRUE);
                }
@@ -561,7 +629,9 @@ objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
                lwkt_gettoken(&ilock, &depot->token);
                if ((ndel =
                     maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
-                       depot->cluster_balance -= ndel;
+                       depot->unallocated_objects += ndel;
+                       if (depot->unallocated_objects && depot->waiting)
+                               wakeup(depot);
                        lwkt_reltoken(&ilock);
                        return (TRUE);
                }
@@ -580,6 +650,7 @@ objcache_destroy(struct objcache *oc)
        struct percpu_objcache *cache_percpu;
        int clusterid, cpuid;
 
+       /* XXX need depot token? */
        for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
                depot_purge(&oc->depot[clusterid], oc);
 
@@ -597,6 +668,7 @@ objcache_destroy(struct objcache *oc)
        free(oc, M_OBJCACHE);
 }
 
+#if 0
 /*
  * Populate the per-cluster depot with elements from a linear block
  * of memory.  Must be called for individually for each cluster.
@@ -616,18 +688,20 @@ objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
        while (p < end) {
                if (MAGAZINE_FULL(emptymag)) {
                        emptymag = mag_alloc(depot->magcapacity);
-                       ++depot->emptymagazines_cumulative;
                        SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
                                          nextmagazine);
                }
                emptymag->objects[emptymag->rounds++] = p;
                p += size;
        }
-       depot->cluster_balance += nelts;
+       depot->unallocated_objects += nelts;
+       if (depot->unallocated_objects && depot->waiting)
+               wakeup(depot);
        lwkt_reltoken(&ilock);
-       return;
 }
+#endif
 
+#if 0
 /*
  * Check depot contention once a minute.
  * 2 contested locks per second allowed.
@@ -671,13 +745,17 @@ objcache_timer(void *dummy)
                      objcache_timer, NULL);
 }
 
+#endif
+
 static void
 objcache_init(void)
 {
        lwkt_token_init(&objcachelist_token);
-       objcache_rebalance_period = 60 * hz;
+#if 0
        callout_init(&objcache_callout);
+       objcache_rebalance_period = 60 * hz;
        callout_reset(&objcache_callout, objcache_rebalance_period,
                      objcache_timer, NULL);
+#endif
 }
 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);
index 0d1f8e7..32ea56f 100644 (file)
@@ -82,7 +82,7 @@
  *
  * @(#)uipc_mbuf.c     8.2 (Berkeley) 1/4/94
  * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $
- * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.43 2005/06/08 19:29:32 dillon Exp $
+ * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.44 2005/06/08 22:22:59 dillon Exp $
  */
 
 #include "opt_param.h"
@@ -246,7 +246,7 @@ mbufphdr_ctor(void *obj, void *private, int ocflags)
        m->m_next = NULL;
        m->m_nextpkt = NULL;
        m->m_data = m->m_pktdat;
-       m->m_flags = M_PKTHDR;
+       m->m_flags = M_PKTHDR | M_PHCACHE;
 
        m->m_pkthdr.rcvif = NULL;       /* eliminate XXX JH */
        SLIST_INIT(&m->m_pkthdr.tags);
@@ -271,7 +271,7 @@ mclmeta_ctor(void *obj, void *private, int ocflags)
                buf = malloc(MCLBYTES, M_MBUFCL, M_INTWAIT | M_ZERO);
        if (buf == NULL)
                return (FALSE);
-       cl->mcl_refs = 1;
+       cl->mcl_refs = 0;
        cl->mcl_data = buf;
        return (TRUE);
 }
@@ -288,6 +288,7 @@ linkcluster(struct mbuf *m, struct mbcluster *cl)
        m->m_ext.ext_ref = m_mclref;
        m->m_ext.ext_free = m_mclfree;
        m->m_ext.ext_size = MCLBYTES;
+       ++cl->mcl_refs;
 
        m->m_data = m->m_ext.ext_buf;
        m->m_flags |= M_EXT | M_EXT_CLUSTER;
@@ -303,6 +304,7 @@ mbufphdrcluster_ctor(void *obj, void *private, int ocflags)
        cl = objcache_get(mclmeta_cache, ocflags);
        if (cl == NULL)
                return (FALSE);
+       m->m_flags |= M_CLCACHE;
        linkcluster(m, cl);
        return (TRUE);
 }
@@ -317,6 +319,7 @@ mbufcluster_ctor(void *obj, void *private, int ocflags)
        cl = objcache_get(mclmeta_cache, ocflags);
        if (cl == NULL)
                return (FALSE);
+       m->m_flags |= M_CLCACHE;
        linkcluster(m, cl);
        return (TRUE);
 }
@@ -326,24 +329,29 @@ mclmeta_dtor(void *obj, void *private)
 {
        struct mbcluster *mcl = obj;
 
-       KKASSERT(mcl->mcl_refs == 1);
+       KKASSERT(mcl->mcl_refs == 0);
        free(mcl->mcl_data, M_MBUFCL);
 }
 
+/*
+ * Used for both the cluster and cluster PHDR caches.
+ *
+ * The mbuf may have lost its cluster due to sharing, deal
+ * with the situation by checking M_EXT.
+ */
 static void
 mbufcluster_dtor(void *obj, void *private)
 {
        struct mbuf *m = obj;
+       struct mbcluster *mcl;
 
-       objcache_put(mclmeta_cache, m->m_ext.ext_arg);
-}
-
-static void
-mbufphdrcluster_dtor(void *obj, void *private)
-{
-       struct mbuf *m = obj;
-
-       objcache_put(mclmeta_cache, m->m_ext.ext_arg);
+       if (m->m_flags & M_EXT) {
+               KKASSERT((m->m_flags & M_EXT_CLUSTER) != 0);
+               mcl = m->m_ext.ext_arg;
+               KKASSERT(mcl->mcl_refs == 1);
+               mcl->mcl_refs = 0;
+               objcache_put(mclmeta_cache, mcl);
+       }
 }
 
 struct objcache_malloc_args mbuf_malloc_args = { MSIZE, M_MBUF };
@@ -373,7 +381,7 @@ mbinit(void *dummy)
            mbufcluster_ctor, mbufcluster_dtor, NULL,
            objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args);
        mbufphdrcluster_cache = objcache_create("mbuf pkt hdr + cluster",
-           nmbclusters, 64, mbufphdrcluster_ctor, mbufphdrcluster_dtor, NULL,
+           nmbclusters, 64, mbufphdrcluster_ctor, mbufcluster_dtor, NULL,
            objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args);
        return;
 }
@@ -623,6 +631,7 @@ m_mclget(struct mbuf *m, int how)
 {
        struct mbcluster *mcl;
 
+       KKASSERT((m->m_flags & M_EXT) == 0);
        mcl = objcache_get(mclmeta_cache, MBTOM(how));
        if (mcl == NULL)
                return;
@@ -647,8 +656,14 @@ m_mclfree(void *arg)
 {
        struct mbcluster *mcl = arg;
 
-       KKASSERT(mcl->mcl_refs > 1);
-       atomic_subtract_int(&mcl->mcl_refs, 1);
+       /* XXX interrupt race.  Currently called from a critical section */
+       if (mcl->mcl_refs > 1) {
+               atomic_subtract_int(&mcl->mcl_refs, 1);
+       } else {
+               KKASSERT(mcl->mcl_refs == 1);
+               mcl->mcl_refs = 0;
+               objcache_put(mclmeta_cache, mcl);
+       }
 }
 
 extern void db_print_backtrace(void);
@@ -692,45 +707,106 @@ m_free(struct mbuf *m)
        }
 #endif
        if (m->m_flags & M_PKTHDR) {
-               m->m_pkthdr.rcvif = NULL;       /* eliminate XXX JH */
                m_tag_delete_chain(m);          /* eliminate XXX JH */
+       }
+
+       m->m_flags &= (M_EXT | M_EXT_CLUSTER | M_CLCACHE | M_PHCACHE);
+
+       /*
+        * Clean the M_PKTHDR state so we can return the mbuf to its original
+        * cache.  This is based on the PHCACHE flag which tells us whether
+        * the mbuf was originally allocated out of a packet-header cache
+        * or a non-packet-header cache.
+        */
+       if (m->m_flags & M_PHCACHE) {
+               m->m_flags |= M_PKTHDR;
+               m->m_pkthdr.rcvif = NULL;       /* eliminate XXX JH */
                m->m_pkthdr.csum_flags = 0;     /* eliminate XXX JH */
                m->m_pkthdr.fw_flags = 0;       /* eliminate XXX JH */
        }
-       m->m_flags &= (M_PKTHDR | M_EXT | M_EXT_CLUSTER);
 
-       if (m->m_flags & M_EXT) {
-               crit_enter();   /* XXX not MP safe */
-                               /* interrupt race decrementing count to 0 */
-               if (m_sharecount(m) > 1) {
+       /*
+        * Handle remaining flags combinations.  M_CLCACHE tells us whether
+        * the mbuf was originally allocated from a cluster cache or not,
+        * and is totally separate from whether the mbuf is currently
+        * associated with a cluster.
+        */
+       crit_enter();
+       switch(m->m_flags & (M_CLCACHE | M_EXT | M_EXT_CLUSTER)) {
+       case M_CLCACHE | M_EXT | M_EXT_CLUSTER:
+               /*
+                * mbuf+cluster cache case.  The mbuf was allocated from the
+                * combined mbuf_cluster cache and can be returned to the
+                * cache if the cluster hasn't been shared.
+                */
+               if (m_sharecount(m) == 1) {
+                       /*
+                        * The cluster has not been shared, we can just
+                        * reset the data pointer and return the mbuf
+                        * to the cluster cache.  Note that the reference
+                        * count is left intact (it is still associated with
+                        * an mbuf).
+                        */
+                       m->m_data = m->m_ext.ext_buf;
+                       if (m->m_flags & M_PHCACHE)
+                               objcache_put(mbufphdrcluster_cache, m);
+                       else
+                               objcache_put(mbufcluster_cache, m);
+               } else {
+                       /*
+                        * Hell.  Someone else has a ref on this cluster,
+                        * we have to disconnect it which means we can't
+                        * put it back into the mbufcluster_cache, we
+                        * have to destroy the mbuf.
+                        *
+                        * XXX we could try to connect another cluster to
+                        * it.
+                        */
                        m->m_ext.ext_free(m->m_ext.ext_arg); 
                        m->m_flags &= ~(M_EXT | M_EXT_CLUSTER);
-                       crit_exit();
-                       goto detachmbuf;
+                       if (m->m_flags & M_PHCACHE)
+                               objcache_dtor(mbufphdrcluster_cache, m);
+                       else
+                               objcache_dtor(mbufcluster_cache, m);
                }
-               crit_exit();
-               KKASSERT(((struct mbcluster *)m->m_ext.ext_arg)->mcl_refs == 1);
-               m->m_data = m->m_ext.ext_buf;
-               if (m->m_flags & M_PKTHDR)
-                       objcache_put(mbufphdrcluster_cache, m);
-               else
-                       objcache_put(mbufcluster_cache, m);
-               crit_enter();
                --mbstat.m_clusters;
-               crit_exit();
-       } else {
-detachmbuf:
-               if (m->m_flags & M_PKTHDR) {
+               break;
+       case M_EXT | M_EXT_CLUSTER:
+               /*
+                * Normal cluster associated with an mbuf that was allocated
+                * from the normal mbuf pool rather then the cluster pool.
+                * The cluster has to be independantly disassociated from the
+                * mbuf.
+                */
+               --mbstat.m_clusters;
+               /* fall through */
+       case M_EXT:
+               /*
+                * Normal cluster association case, disconnect the cluster from
+                * the mbuf.  The cluster may or may not be custom.
+                */
+               m->m_ext.ext_free(m->m_ext.ext_arg); 
+               m->m_flags &= ~(M_EXT | M_EXT_CLUSTER);
+               /* fall through */
+       case 0:
+               /*
+                * return the mbuf to the mbuf cache.
+                */
+               if (m->m_flags & M_PHCACHE) {
                        m->m_data = m->m_pktdat;
                        objcache_put(mbufphdr_cache, m);
                } else {
                        m->m_data = m->m_dat;
                        objcache_put(mbuf_cache, m);
                }
-               crit_enter();
                --mbstat.m_mbufs;
-               crit_exit();
+               break;
+       default:
+               if (!panicstr)
+                       panic("bad mbuf flags %p %08x\n", m, m->m_flags);
+               break;
        }
+       crit_exit();
        return (n);
 }
 
@@ -1341,7 +1417,7 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
 {
        KASSERT(!(to->m_flags & M_EXT), ("m_move_pkthdr: to has cluster"));
 
-       to->m_flags = from->m_flags & M_COPYFLAGS;
+       to->m_flags |= from->m_flags & M_COPYFLAGS;
        to->m_data = to->m_pktdat;
        to->m_pkthdr = from->m_pkthdr;          /* especially tags */
        SLIST_INIT(&from->m_pkthdr.tags);       /* purge tags from src */
index 8c03958..db02034 100644 (file)
@@ -34,7 +34,7 @@
  *
  *     @(#)mbuf.h      8.5 (Berkeley) 2/19/95
  * $FreeBSD: src/sys/sys/mbuf.h,v 1.44.2.17 2003/04/15 06:15:02 silby Exp $
- * $DragonFly: src/sys/sys/mbuf.h,v 1.28 2005/06/07 19:08:55 hsu Exp $
+ * $DragonFly: src/sys/sys/mbuf.h,v 1.29 2005/06/08 22:22:58 dillon Exp $
  */
 
 #ifndef _SYS_MBUF_H_
@@ -181,7 +181,9 @@ struct mbuf {
 #define        M_FRAG          0x0400  /* packet is a fragment of a larger packet */
 #define        M_FIRSTFRAG     0x0800  /* packet is first fragment */
 #define        M_LASTFRAG      0x1000  /* packet is last fragment */
+#define        M_CLCACHE       0x2000  /* mbuf allocated from the cluster cache */
 #define M_EXT_CLUSTER  0x4000  /* standard cluster else special */
+#define        M_PHCACHE       0x8000  /* mbuf allocated from the pkt header cache */
 
 /*
  * Flags copied when copying m_pkthdr.
index 68e72e5..9824271 100644 (file)
@@ -29,7 +29,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/sys/objcache.h,v 1.1 2005/06/07 19:07:11 hsu Exp $
+ * $DragonFly: src/sys/sys/objcache.h,v 1.2 2005/06/08 22:22:58 dillon Exp $
  */
 
 #ifndef _OBJCACHE_H_
@@ -58,6 +58,7 @@ struct objcache
                         void *allocator_args);
 void   *objcache_get(struct objcache *oc, int ocflags);
 void    objcache_put(struct objcache *oc, void *obj);
+void    objcache_dtor(struct objcache *oc, void *obj);
 void    objcache_populate_linear(struct objcache *oc, void *elts, int nelts,
                                  int size);
 boolean_t objcache_reclaimlist(struct objcache *oc[], int nlist, int ocflags);