2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $DragonFly: src/sys/kern/kern_objcache.c,v 1.7 2006/06/01 06:10:50 dillon Exp $
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/thread.h>
44 #include <sys/thread2.h>
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
49 #define INITIAL_MAG_CAPACITY 256
54 SLIST_ENTRY(magazine) nextmagazine;
58 SLIST_HEAD(magazinelist, magazine);
61 * per-cluster cache of magazines
62 * All fields in this structure are protected by the token.
64 struct magazinedepot {
66 * The per-cpu object caches only exchanges completely full or
67 * completely empty magazines with the depot layer, so only have
68 * to cache these two types of magazines.
70 struct magazinelist fullmagazines;
71 struct magazinelist emptymagazines;
74 /* protect this structure */
75 struct lwkt_token token;
77 /* magazines not yet allocated towards limit */
78 int unallocated_objects;
80 /* infrequently used fields */
81 int waiting; /* waiting for another cpu to
82 * return a full magazine to
84 int contested; /* depot contention count */
88 * per-cpu object cache
89 * All fields in this structure are protected by crit_enter().
91 struct percpu_objcache {
92 struct magazine *loaded_magazine; /* active magazine */
93 struct magazine *previous_magazine; /* backup magazine */
96 int gets_cumulative; /* total calls to get */
97 int gets_null; /* objcache_get returned NULL */
98 int puts_cumulative; /* total calls to put */
99 int puts_othercluster; /* returned to other cluster */
101 /* infrequently used fields */
102 int waiting; /* waiting for a thread on this cpu to
103 * return an obj to the per-cpu cache */
106 /* only until we have NUMA cluster topology information XXX */
107 #define MAXCLUSTERS 1
108 #define myclusterid 0
109 #define CLUSTER_OF(obj) 0
112 * Two-level object cache consisting of NUMA cluster-level depots of
113 * fully loaded or completely empty magazines and cpu-level caches of
114 * individual objects.
119 /* object constructor and destructor from blank storage */
120 objcache_ctor_fn *ctor;
121 objcache_dtor_fn *dtor;
124 /* interface to underlying allocator */
125 objcache_alloc_fn *alloc;
126 objcache_free_fn *free;
127 void *allocator_args;
129 SLIST_ENTRY(objcache) oc_next;
131 /* NUMA-cluster level caches */
132 struct magazinedepot depot[MAXCLUSTERS];
134 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
137 static struct lwkt_token objcachelist_token;
138 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
140 static struct magazine *
141 mag_alloc(int capacity)
143 struct magazine *mag;
145 mag = malloc(__offsetof(struct magazine, objects[capacity]),
146 M_OBJMAG, M_INTWAIT | M_ZERO);
147 mag->capacity = capacity;
153 * Create an object cache.
156 objcache_create(const char *name, int cluster_limit, int mag_capacity,
157 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *private,
158 objcache_alloc_fn *alloc, objcache_free_fn *free,
159 void *allocator_args)
162 struct magazinedepot *depot;
166 /* allocate object cache structure */
167 oc = malloc(__offsetof(struct objcache, cache_percpu[ncpus]),
168 M_OBJCACHE, M_WAITOK | M_ZERO);
169 oc->name = strdup(name, M_TEMP);
172 oc->private = private;
174 oc->allocator_args = allocator_args;
176 /* initialize depots */
177 depot = &oc->depot[0];
179 lwkt_token_init(&depot->token);
180 SLIST_INIT(&depot->fullmagazines);
181 SLIST_INIT(&depot->emptymagazines);
183 if (mag_capacity == 0)
184 mag_capacity = INITIAL_MAG_CAPACITY;
185 depot->magcapacity = mag_capacity;
188 * The cluster_limit must be sufficient to have three magazines per
191 if (cluster_limit == 0) {
192 depot->unallocated_objects = -1;
194 if (cluster_limit < mag_capacity * ncpus * 3)
195 cluster_limit = mag_capacity * ncpus * 3;
196 depot->unallocated_objects = cluster_limit;
200 /* initialize per-cpu caches */
201 for (cpuid = 0; cpuid < ncpus; cpuid++) {
202 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
204 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
205 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
207 lwkt_gettoken(&olock, &objcachelist_token);
208 SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
209 lwkt_reltoken(&olock);
215 objcache_create_simple(malloc_type_t mtype, size_t objsize)
217 struct objcache_malloc_args *margs;
220 margs = malloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
221 margs->objsize = objsize;
222 margs->mtype = mtype;
223 oc = objcache_create(mtype->ks_shortdesc, 0, 0,
224 null_ctor, null_dtor, NULL,
225 objcache_malloc_alloc, objcache_malloc_free,
230 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
231 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
232 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
234 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
237 * Get an object from the object cache.
240 objcache_get(struct objcache *oc, int ocflags)
242 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
243 struct magazine *loadedmag;
244 struct magazine *emptymag;
246 struct magazinedepot *depot;
250 ++cpucache->gets_cumulative;
254 * Loaded magazine has an object. This is the hot path.
255 * It is lock-free and uses a critical section to block
256 * out interrupt handlers on the same processor.
258 loadedmag = cpucache->loaded_magazine;
259 if (MAGAZINE_NOTEMPTY(loadedmag)) {
260 obj = loadedmag->objects[--loadedmag->rounds];
265 /* Previous magazine has an object. */
266 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
267 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
268 loadedmag = cpucache->loaded_magazine;
269 obj = loadedmag->objects[--loadedmag->rounds];
275 * Both magazines empty. Get a full magazine from the depot and
276 * move one of the empty ones to the depot.
278 * Obtain the depot token.
280 depot = &oc->depot[myclusterid];
281 lwkt_gettoken(&ilock, &depot->token);
284 * We might have blocked obtaining the token, we must recheck
285 * the cpucache before potentially falling through to the blocking
286 * code or we might deadlock the tsleep() on a low-memory machine.
288 if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
289 MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
291 lwkt_reltoken(&ilock);
295 /* Check if depot has a full magazine. */
296 if (!SLIST_EMPTY(&depot->fullmagazines)) {
297 emptymag = cpucache->previous_magazine;
298 cpucache->previous_magazine = cpucache->loaded_magazine;
299 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
300 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
303 * Return emptymag to the depot.
305 KKASSERT(MAGAZINE_EMPTY(emptymag));
306 SLIST_INSERT_HEAD(&depot->emptymagazines,
307 emptymag, nextmagazine);
308 lwkt_reltoken(&ilock);
313 * The depot does not have any non-empty magazines. If we have
314 * not hit our object limit we can allocate a new object using
315 * the back-end allocator.
317 * note: unallocated_objects can be initialized to -1, which has
318 * the effect of removing any allocation limits.
320 if (depot->unallocated_objects) {
321 --depot->unallocated_objects;
322 lwkt_reltoken(&ilock);
325 obj = oc->alloc(oc->allocator_args, ocflags);
327 if (oc->ctor(obj, oc->private, ocflags))
329 oc->free(obj, oc->allocator_args);
330 lwkt_gettoken(&ilock, &depot->token);
331 ++depot->unallocated_objects;
334 lwkt_reltoken(&ilock);
340 * makes debugging easier when gets_cumulative does
341 * not include gets_null.
343 ++cpucache->gets_null;
344 --cpucache->gets_cumulative;
351 * Otherwise block if allowed to.
353 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
356 tsleep(depot, 0, "objcache_get", 0);
359 lwkt_reltoken(&ilock);
366 ++cpucache->gets_null;
367 --cpucache->gets_cumulative;
369 lwkt_reltoken(&ilock);
374 * Wrapper for malloc allocation routines.
377 objcache_malloc_alloc(void *allocator_args, int ocflags)
379 struct objcache_malloc_args *alloc_args = allocator_args;
381 return (malloc(alloc_args->objsize, alloc_args->mtype,
382 ocflags & OC_MFLAGS));
386 objcache_malloc_free(void *obj, void *allocator_args)
388 struct objcache_malloc_args *alloc_args = allocator_args;
390 free(obj, alloc_args->mtype);
394 * Wrapper for allocation policies that pre-allocate at initialization time
395 * and don't do run-time allocation.
398 objcache_nop_alloc(void *allocator_args, int ocflags)
404 objcache_nop_free(void *obj, void *allocator_args)
409 * Return an object to the object cache.
412 objcache_put(struct objcache *oc, void *obj)
414 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
415 struct magazine *loadedmag;
416 struct magazinedepot *depot;
420 ++cpucache->puts_cumulative;
422 if (CLUSTER_OF(obj) != myclusterid) {
424 /* use lazy IPI to send object to owning cluster XXX todo */
425 ++cpucache->puts_othercluster;
433 * Free slot available in loaded magazine. This is the hot path.
434 * It is lock-free and uses a critical section to block out interrupt
435 * handlers on the same processor.
437 loadedmag = cpucache->loaded_magazine;
438 if (!MAGAZINE_FULL(loadedmag)) {
439 loadedmag->objects[loadedmag->rounds++] = obj;
440 if (cpucache->waiting)
441 wakeup_mycpu(&oc->depot[myclusterid]);
447 * Current magazine full, but previous magazine has room. XXX
449 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
450 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
451 loadedmag = cpucache->loaded_magazine;
452 loadedmag->objects[loadedmag->rounds++] = obj;
453 if (cpucache->waiting)
454 wakeup_mycpu(&oc->depot[myclusterid]);
460 * Both magazines full. Get an empty magazine from the depot and
461 * move a full loaded magazine to the depot. Even though the
462 * magazine may wind up with space available after we block on
463 * the token, we still cycle it through to avoid the non-optimal
466 * Obtain the depot token.
468 depot = &oc->depot[myclusterid];
469 lwkt_gettoken(&ilock, &depot->token);
472 * If an empty magazine is available in the depot, cycle it
475 if (!SLIST_EMPTY(&depot->emptymagazines)) {
476 loadedmag = cpucache->previous_magazine;
477 cpucache->previous_magazine = cpucache->loaded_magazine;
478 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
479 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
482 * Return loadedmag to the depot. Due to blocking it may
483 * not be entirely full and could even be empty.
485 if (MAGAZINE_EMPTY(loadedmag)) {
486 SLIST_INSERT_HEAD(&depot->emptymagazines,
487 loadedmag, nextmagazine);
489 SLIST_INSERT_HEAD(&depot->fullmagazines,
490 loadedmag, nextmagazine);
494 lwkt_reltoken(&ilock);
499 * An empty mag is not available. This is a corner case which can
500 * occur due to cpus holding partially full magazines. Do not try
501 * to allocate a mag, just free the object.
503 ++depot->unallocated_objects;
506 lwkt_reltoken(&ilock);
508 oc->dtor(obj, oc->private);
509 oc->free(obj, oc->allocator_args);
513 * The object is being put back into the cache, but the caller has
514 * indicated that the object is not in any shape to be reused and should
515 * be dtor'd immediately.
518 objcache_dtor(struct objcache *oc, void *obj)
520 struct magazinedepot *depot;
523 depot = &oc->depot[myclusterid];
524 lwkt_gettoken(&ilock, &depot->token);
525 ++depot->unallocated_objects;
528 lwkt_reltoken(&ilock);
529 oc->dtor(obj, oc->private);
530 oc->free(obj, oc->allocator_args);
534 * Utility routine for objects that don't require any de-construction.
537 null_dtor(void *obj, void *private)
543 null_ctor(void *obj, void *private, int ocflags)
549 * De-construct and de-allocate objects in a magazine.
550 * Returns the number of objects freed.
551 * Does not de-allocate the magazine itself.
554 mag_purge(struct objcache *oc, struct magazine *mag)
561 while (mag->rounds) {
562 obj = mag->objects[--mag->rounds];
564 oc->dtor(obj, oc->private);
565 oc->free(obj, oc->allocator_args);
574 * De-allocate all magazines in a magazine list.
575 * Returns number of objects de-allocated.
578 maglist_purge(struct objcache *oc, struct magazinelist *maglist,
581 struct magazine *mag;
584 /* can't use SLIST_FOREACH because blocking releases the depot token */
585 while ((mag = SLIST_FIRST(maglist))) {
586 SLIST_REMOVE_HEAD(maglist, nextmagazine);
587 ndeleted += mag_purge(oc, mag); /* could block! */
588 free(mag, M_OBJMAG); /* could block! */
589 if (!purgeall && ndeleted > 0)
596 * De-allocates all magazines on the full and empty magazine lists.
599 depot_purge(struct magazinedepot *depot, struct objcache *oc)
601 depot->unallocated_objects +=
602 maglist_purge(oc, &depot->fullmagazines, TRUE);
603 depot->unallocated_objects +=
604 maglist_purge(oc, &depot->emptymagazines, TRUE);
605 if (depot->unallocated_objects && depot->waiting)
611 objcache_reclaim(struct objcache *oc)
613 struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
614 struct magazinedepot *depot = &oc->depot[myclusterid];
616 mag_purge(oc, cache_percpu->loaded_magazine);
617 mag_purge(oc, cache_percpu->previous_magazine);
619 /* XXX need depot token */
620 depot_purge(depot, oc);
625 * Try to free up some memory. Return as soon as some free memory found.
626 * For each object cache on the reclaim list, first try the current per-cpu
627 * cache, then the full magazine depot.
630 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
633 struct percpu_objcache *cpucache;
634 struct magazinedepot *depot;
638 for (i = 0; i < nlist; i++) {
640 cpucache = &oc->cache_percpu[mycpuid];
641 depot = &oc->depot[myclusterid];
644 if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
645 (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
647 lwkt_gettoken(&ilock, &depot->token);
648 depot->unallocated_objects += ndel;
649 if (depot->unallocated_objects && depot->waiting)
651 lwkt_reltoken(&ilock);
655 lwkt_gettoken(&ilock, &depot->token);
657 maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
658 depot->unallocated_objects += ndel;
659 if (depot->unallocated_objects && depot->waiting)
661 lwkt_reltoken(&ilock);
664 lwkt_reltoken(&ilock);
670 * Destroy an object cache. Must have no existing references.
671 * XXX Not clear this is a useful API function.
674 objcache_destroy(struct objcache *oc)
676 struct percpu_objcache *cache_percpu;
677 int clusterid, cpuid;
679 /* XXX need depot token? */
680 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
681 depot_purge(&oc->depot[clusterid], oc);
683 for (cpuid = 0; cpuid < ncpus; cpuid++) {
684 cache_percpu = &oc->cache_percpu[cpuid];
686 mag_purge(oc, cache_percpu->loaded_magazine);
687 free(cache_percpu->loaded_magazine, M_OBJMAG);
689 mag_purge(oc, cache_percpu->previous_magazine);
690 free(cache_percpu->previous_magazine, M_OBJMAG);
693 free(oc->name, M_TEMP);
694 free(oc, M_OBJCACHE);
699 * Populate the per-cluster depot with elements from a linear block
700 * of memory. Must be called for individually for each cluster.
701 * Populated depots should not be destroyed.
704 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
707 char *end = (char *)base + (nelts * size);
708 struct magazinedepot *depot = &oc->depot[myclusterid];
710 struct magazine sentinelfullmag = { 0, 0 };
711 struct magazine *emptymag = &sentinelfullmag;
713 lwkt_gettoken(&ilock, &depot->token);
715 if (MAGAZINE_FULL(emptymag)) {
716 emptymag = mag_alloc(depot->magcapacity);
717 SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
720 emptymag->objects[emptymag->rounds++] = p;
723 depot->unallocated_objects += nelts;
724 if (depot->unallocated_objects && depot->waiting)
726 lwkt_reltoken(&ilock);
732 * Check depot contention once a minute.
733 * 2 contested locks per second allowed.
735 static int objcache_rebalance_period;
736 static const int objcache_contention_rate = 120;
737 static struct callout objcache_callout;
739 #define MAXMAGSIZE 512
742 * Check depot contention and increase magazine size if necessary.
745 objcache_timer(void *dummy)
748 struct magazinedepot *depot;
749 lwkt_tokref olock, dlock;
751 lwkt_gettoken(&olock, &objcachelist_token);
752 SLIST_FOREACH(oc, &allobjcaches, oc_next) {
753 depot = &oc->depot[myclusterid];
754 if (depot->magcapacity < MAXMAGSIZE) {
755 if (depot->contested > objcache_contention_rate) {
756 lwkt_gettoken(&dlock, &depot->token);
757 depot_purge(depot, oc);
758 depot->magcapacity *= 2;
759 lwkt_reltoken(&dlock);
760 printf("objcache_timer: increasing cache %s"
761 " magsize to %d, contested %d times\n",
762 oc->name, depot->magcapacity,
765 depot->contested = 0;
768 lwkt_reltoken(&olock);
770 callout_reset(&objcache_callout, objcache_rebalance_period,
771 objcache_timer, NULL);
779 lwkt_token_init(&objcachelist_token);
781 callout_init(&objcache_callout);
782 objcache_rebalance_period = 60 * hz;
783 callout_reset(&objcache_callout, objcache_rebalance_period,
784 objcache_timer, NULL);
787 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);