2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $DragonFly: src/sys/kern/kern_objcache.c,v 1.11 2006/09/05 03:48:12 dillon Exp $
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/thread.h>
44 #include <sys/thread2.h>
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
49 #define INITIAL_MAG_CAPACITY 256
54 SLIST_ENTRY(magazine) nextmagazine;
58 SLIST_HEAD(magazinelist, magazine);
61 * per-cluster cache of magazines
62 * All fields in this structure are protected by the token.
64 struct magazinedepot {
66 * The per-cpu object caches only exchanges completely full or
67 * completely empty magazines with the depot layer, so only have
68 * to cache these two types of magazines.
70 struct magazinelist fullmagazines;
71 struct magazinelist emptymagazines;
74 /* protect this structure */
75 struct lwkt_token token;
77 /* magazines not yet allocated towards limit */
78 int unallocated_objects;
80 /* infrequently used fields */
81 int waiting; /* waiting for another cpu to
82 * return a full magazine to
84 int contested; /* depot contention count */
88 * per-cpu object cache
89 * All fields in this structure are protected by crit_enter().
91 struct percpu_objcache {
92 struct magazine *loaded_magazine; /* active magazine */
93 struct magazine *previous_magazine; /* backup magazine */
96 int gets_cumulative; /* total calls to get */
97 int gets_null; /* objcache_get returned NULL */
98 int puts_cumulative; /* total calls to put */
99 int puts_othercluster; /* returned to other cluster */
101 /* infrequently used fields */
102 int waiting; /* waiting for a thread on this cpu to
103 * return an obj to the per-cpu cache */
106 /* only until we have NUMA cluster topology information XXX */
107 #define MAXCLUSTERS 1
108 #define myclusterid 0
109 #define CLUSTER_OF(obj) 0
112 * Two-level object cache consisting of NUMA cluster-level depots of
113 * fully loaded or completely empty magazines and cpu-level caches of
114 * individual objects.
119 /* object constructor and destructor from blank storage */
120 objcache_ctor_fn *ctor;
121 objcache_dtor_fn *dtor;
124 /* interface to underlying allocator */
125 objcache_alloc_fn *alloc;
126 objcache_free_fn *free;
127 void *allocator_args;
129 SLIST_ENTRY(objcache) oc_next;
131 /* NUMA-cluster level caches */
132 struct magazinedepot depot[MAXCLUSTERS];
134 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
137 static struct lwkt_token objcachelist_token;
138 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
140 static struct magazine *
141 mag_alloc(int capacity)
143 struct magazine *mag;
145 mag = kmalloc(__offsetof(struct magazine, objects[capacity]),
146 M_OBJMAG, M_INTWAIT | M_ZERO);
147 mag->capacity = capacity;
153 * Create an object cache.
156 objcache_create(const char *name, int cluster_limit, int mag_capacity,
157 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *private,
158 objcache_alloc_fn *alloc, objcache_free_fn *free,
159 void *allocator_args)
162 struct magazinedepot *depot;
166 /* allocate object cache structure */
167 oc = kmalloc(__offsetof(struct objcache, cache_percpu[ncpus]),
168 M_OBJCACHE, M_WAITOK | M_ZERO);
169 oc->name = kstrdup(name, M_TEMP);
172 oc->private = private;
174 oc->allocator_args = allocator_args;
176 /* initialize depots */
177 depot = &oc->depot[0];
179 lwkt_token_init(&depot->token);
180 SLIST_INIT(&depot->fullmagazines);
181 SLIST_INIT(&depot->emptymagazines);
183 if (mag_capacity == 0)
184 mag_capacity = INITIAL_MAG_CAPACITY;
185 depot->magcapacity = mag_capacity;
188 * The cluster_limit must be sufficient to have three magazines per
191 if (cluster_limit == 0) {
192 depot->unallocated_objects = -1;
194 if (cluster_limit < mag_capacity * ncpus * 3)
195 cluster_limit = mag_capacity * ncpus * 3;
196 depot->unallocated_objects = cluster_limit;
200 /* initialize per-cpu caches */
201 for (cpuid = 0; cpuid < ncpus; cpuid++) {
202 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
204 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
205 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
207 lwkt_gettoken(&olock, &objcachelist_token);
208 SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
209 lwkt_reltoken(&olock);
215 objcache_create_simple(malloc_type_t mtype, size_t objsize)
217 struct objcache_malloc_args *margs;
220 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
221 margs->objsize = objsize;
222 margs->mtype = mtype;
223 oc = objcache_create(mtype->ks_shortdesc, 0, 0,
224 null_ctor, null_dtor, NULL,
225 objcache_malloc_alloc, objcache_malloc_free,
230 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
231 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
232 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
234 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
237 * Get an object from the object cache.
239 * WARNING! ocflags are only used when we have to go to the underlying
240 * allocator, so we cannot depend on flags such as M_ZERO.
243 objcache_get(struct objcache *oc, int ocflags)
245 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
246 struct magazine *loadedmag;
247 struct magazine *emptymag;
249 struct magazinedepot *depot;
253 ++cpucache->gets_cumulative;
257 * Loaded magazine has an object. This is the hot path.
258 * It is lock-free and uses a critical section to block
259 * out interrupt handlers on the same processor.
261 loadedmag = cpucache->loaded_magazine;
262 if (MAGAZINE_NOTEMPTY(loadedmag)) {
263 obj = loadedmag->objects[--loadedmag->rounds];
268 /* Previous magazine has an object. */
269 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
270 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
271 loadedmag = cpucache->loaded_magazine;
272 obj = loadedmag->objects[--loadedmag->rounds];
278 * Both magazines empty. Get a full magazine from the depot and
279 * move one of the empty ones to the depot.
281 * Obtain the depot token.
283 depot = &oc->depot[myclusterid];
284 lwkt_gettoken(&ilock, &depot->token);
287 * We might have blocked obtaining the token, we must recheck
288 * the cpucache before potentially falling through to the blocking
289 * code or we might deadlock the tsleep() on a low-memory machine.
291 if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
292 MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
294 lwkt_reltoken(&ilock);
298 /* Check if depot has a full magazine. */
299 if (!SLIST_EMPTY(&depot->fullmagazines)) {
300 emptymag = cpucache->previous_magazine;
301 cpucache->previous_magazine = cpucache->loaded_magazine;
302 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
303 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
306 * Return emptymag to the depot.
308 KKASSERT(MAGAZINE_EMPTY(emptymag));
309 SLIST_INSERT_HEAD(&depot->emptymagazines,
310 emptymag, nextmagazine);
311 lwkt_reltoken(&ilock);
316 * The depot does not have any non-empty magazines. If we have
317 * not hit our object limit we can allocate a new object using
318 * the back-end allocator.
320 * note: unallocated_objects can be initialized to -1, which has
321 * the effect of removing any allocation limits.
323 if (depot->unallocated_objects) {
324 --depot->unallocated_objects;
325 lwkt_reltoken(&ilock);
328 obj = oc->alloc(oc->allocator_args, ocflags);
330 if (oc->ctor(obj, oc->private, ocflags))
332 oc->free(obj, oc->allocator_args);
333 lwkt_gettoken(&ilock, &depot->token);
334 ++depot->unallocated_objects;
337 lwkt_reltoken(&ilock);
343 * makes debugging easier when gets_cumulative does
344 * not include gets_null.
346 ++cpucache->gets_null;
347 --cpucache->gets_cumulative;
354 * Otherwise block if allowed to.
356 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
359 tsleep(depot, 0, "objcache_get", 0);
362 lwkt_reltoken(&ilock);
369 ++cpucache->gets_null;
370 --cpucache->gets_cumulative;
372 lwkt_reltoken(&ilock);
377 * Wrapper for malloc allocation routines.
380 objcache_malloc_alloc(void *allocator_args, int ocflags)
382 struct objcache_malloc_args *alloc_args = allocator_args;
384 return (kmalloc(alloc_args->objsize, alloc_args->mtype,
385 ocflags & OC_MFLAGS));
389 objcache_malloc_free(void *obj, void *allocator_args)
391 struct objcache_malloc_args *alloc_args = allocator_args;
393 kfree(obj, alloc_args->mtype);
397 * Wrapper for allocation policies that pre-allocate at initialization time
398 * and don't do run-time allocation.
401 objcache_nop_alloc(void *allocator_args, int ocflags)
407 objcache_nop_free(void *obj, void *allocator_args)
412 * Return an object to the object cache.
415 objcache_put(struct objcache *oc, void *obj)
417 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
418 struct magazine *loadedmag;
419 struct magazinedepot *depot;
423 ++cpucache->puts_cumulative;
425 if (CLUSTER_OF(obj) != myclusterid) {
427 /* use lazy IPI to send object to owning cluster XXX todo */
428 ++cpucache->puts_othercluster;
436 * Free slot available in loaded magazine. This is the hot path.
437 * It is lock-free and uses a critical section to block out interrupt
438 * handlers on the same processor.
440 loadedmag = cpucache->loaded_magazine;
441 if (!MAGAZINE_FULL(loadedmag)) {
442 loadedmag->objects[loadedmag->rounds++] = obj;
443 if (cpucache->waiting)
444 wakeup_mycpu(&oc->depot[myclusterid]);
450 * Current magazine full, but previous magazine has room. XXX
452 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
453 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
454 loadedmag = cpucache->loaded_magazine;
455 loadedmag->objects[loadedmag->rounds++] = obj;
456 if (cpucache->waiting)
457 wakeup_mycpu(&oc->depot[myclusterid]);
463 * Both magazines full. Get an empty magazine from the depot and
464 * move a full loaded magazine to the depot. Even though the
465 * magazine may wind up with space available after we block on
466 * the token, we still cycle it through to avoid the non-optimal
469 * Obtain the depot token.
471 depot = &oc->depot[myclusterid];
472 lwkt_gettoken(&ilock, &depot->token);
475 * If an empty magazine is available in the depot, cycle it
478 if (!SLIST_EMPTY(&depot->emptymagazines)) {
479 loadedmag = cpucache->previous_magazine;
480 cpucache->previous_magazine = cpucache->loaded_magazine;
481 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
482 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
485 * Return loadedmag to the depot. Due to blocking it may
486 * not be entirely full and could even be empty.
488 if (MAGAZINE_EMPTY(loadedmag)) {
489 SLIST_INSERT_HEAD(&depot->emptymagazines,
490 loadedmag, nextmagazine);
492 SLIST_INSERT_HEAD(&depot->fullmagazines,
493 loadedmag, nextmagazine);
497 lwkt_reltoken(&ilock);
502 * An empty mag is not available. This is a corner case which can
503 * occur due to cpus holding partially full magazines. Do not try
504 * to allocate a mag, just free the object.
506 ++depot->unallocated_objects;
509 lwkt_reltoken(&ilock);
511 oc->dtor(obj, oc->private);
512 oc->free(obj, oc->allocator_args);
516 * The object is being put back into the cache, but the caller has
517 * indicated that the object is not in any shape to be reused and should
518 * be dtor'd immediately.
521 objcache_dtor(struct objcache *oc, void *obj)
523 struct magazinedepot *depot;
526 depot = &oc->depot[myclusterid];
527 lwkt_gettoken(&ilock, &depot->token);
528 ++depot->unallocated_objects;
531 lwkt_reltoken(&ilock);
532 oc->dtor(obj, oc->private);
533 oc->free(obj, oc->allocator_args);
537 * Utility routine for objects that don't require any de-construction.
540 null_dtor(void *obj, void *private)
546 null_ctor(void *obj, void *private, int ocflags)
552 * De-construct and de-allocate objects in a magazine.
553 * Returns the number of objects freed.
554 * Does not de-allocate the magazine itself.
557 mag_purge(struct objcache *oc, struct magazine *mag)
564 while (mag->rounds) {
565 obj = mag->objects[--mag->rounds];
567 oc->dtor(obj, oc->private);
568 oc->free(obj, oc->allocator_args);
577 * De-allocate all magazines in a magazine list.
578 * Returns number of objects de-allocated.
581 maglist_purge(struct objcache *oc, struct magazinelist *maglist,
584 struct magazine *mag;
587 /* can't use SLIST_FOREACH because blocking releases the depot token */
588 while ((mag = SLIST_FIRST(maglist))) {
589 SLIST_REMOVE_HEAD(maglist, nextmagazine);
590 ndeleted += mag_purge(oc, mag); /* could block! */
591 kfree(mag, M_OBJMAG); /* could block! */
592 if (!purgeall && ndeleted > 0)
599 * De-allocates all magazines on the full and empty magazine lists.
602 depot_purge(struct magazinedepot *depot, struct objcache *oc)
604 depot->unallocated_objects +=
605 maglist_purge(oc, &depot->fullmagazines, TRUE);
606 depot->unallocated_objects +=
607 maglist_purge(oc, &depot->emptymagazines, TRUE);
608 if (depot->unallocated_objects && depot->waiting)
614 objcache_reclaim(struct objcache *oc)
616 struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
617 struct magazinedepot *depot = &oc->depot[myclusterid];
619 mag_purge(oc, cache_percpu->loaded_magazine);
620 mag_purge(oc, cache_percpu->previous_magazine);
622 /* XXX need depot token */
623 depot_purge(depot, oc);
628 * Try to free up some memory. Return as soon as some free memory found.
629 * For each object cache on the reclaim list, first try the current per-cpu
630 * cache, then the full magazine depot.
633 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
636 struct percpu_objcache *cpucache;
637 struct magazinedepot *depot;
641 for (i = 0; i < nlist; i++) {
643 cpucache = &oc->cache_percpu[mycpuid];
644 depot = &oc->depot[myclusterid];
647 if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
648 (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
650 lwkt_gettoken(&ilock, &depot->token);
651 depot->unallocated_objects += ndel;
652 if (depot->unallocated_objects && depot->waiting)
654 lwkt_reltoken(&ilock);
658 lwkt_gettoken(&ilock, &depot->token);
660 maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
661 depot->unallocated_objects += ndel;
662 if (depot->unallocated_objects && depot->waiting)
664 lwkt_reltoken(&ilock);
667 lwkt_reltoken(&ilock);
673 * Destroy an object cache. Must have no existing references.
674 * XXX Not clear this is a useful API function.
677 objcache_destroy(struct objcache *oc)
679 struct percpu_objcache *cache_percpu;
680 int clusterid, cpuid;
682 /* XXX need depot token? */
683 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
684 depot_purge(&oc->depot[clusterid], oc);
686 for (cpuid = 0; cpuid < ncpus; cpuid++) {
687 cache_percpu = &oc->cache_percpu[cpuid];
689 mag_purge(oc, cache_percpu->loaded_magazine);
690 kfree(cache_percpu->loaded_magazine, M_OBJMAG);
692 mag_purge(oc, cache_percpu->previous_magazine);
693 kfree(cache_percpu->previous_magazine, M_OBJMAG);
696 kfree(oc->name, M_TEMP);
697 kfree(oc, M_OBJCACHE);
702 * Populate the per-cluster depot with elements from a linear block
703 * of memory. Must be called for individually for each cluster.
704 * Populated depots should not be destroyed.
707 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
710 char *end = (char *)base + (nelts * size);
711 struct magazinedepot *depot = &oc->depot[myclusterid];
713 struct magazine sentinelfullmag = { 0, 0 };
714 struct magazine *emptymag = &sentinelfullmag;
716 lwkt_gettoken(&ilock, &depot->token);
718 if (MAGAZINE_FULL(emptymag)) {
719 emptymag = mag_alloc(depot->magcapacity);
720 SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
723 emptymag->objects[emptymag->rounds++] = p;
726 depot->unallocated_objects += nelts;
727 if (depot->unallocated_objects && depot->waiting)
729 lwkt_reltoken(&ilock);
735 * Check depot contention once a minute.
736 * 2 contested locks per second allowed.
738 static int objcache_rebalance_period;
739 static const int objcache_contention_rate = 120;
740 static struct callout objcache_callout;
742 #define MAXMAGSIZE 512
745 * Check depot contention and increase magazine size if necessary.
748 objcache_timer(void *dummy)
751 struct magazinedepot *depot;
752 lwkt_tokref olock, dlock;
754 lwkt_gettoken(&olock, &objcachelist_token);
755 SLIST_FOREACH(oc, &allobjcaches, oc_next) {
756 depot = &oc->depot[myclusterid];
757 if (depot->magcapacity < MAXMAGSIZE) {
758 if (depot->contested > objcache_contention_rate) {
759 lwkt_gettoken(&dlock, &depot->token);
760 depot_purge(depot, oc);
761 depot->magcapacity *= 2;
762 lwkt_reltoken(&dlock);
763 printf("objcache_timer: increasing cache %s"
764 " magsize to %d, contested %d times\n",
765 oc->name, depot->magcapacity,
768 depot->contested = 0;
771 lwkt_reltoken(&olock);
773 callout_reset(&objcache_callout, objcache_rebalance_period,
774 objcache_timer, NULL);
782 lwkt_token_init(&objcachelist_token);
784 callout_init(&objcache_callout);
785 objcache_rebalance_period = 60 * hz;
786 callout_reset(&objcache_callout, objcache_rebalance_period,
787 objcache_timer, NULL);
790 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);