2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $DragonFly: src/sys/kern/kern_objcache.c,v 1.6 2006/04/14 02:58:49 dillon Exp $
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/thread.h>
44 #include <sys/thread2.h>
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
49 #define INITIAL_MAG_CAPACITY 256
54 SLIST_ENTRY(magazine) nextmagazine;
58 SLIST_HEAD(magazinelist, magazine);
61 * per-cluster cache of magazines
62 * All fields in this structure are protected by the token.
64 struct magazinedepot {
66 * The per-cpu object caches only exchanges completely full or
67 * completely empty magazines with the depot layer, so only have
68 * to cache these two types of magazines.
70 struct magazinelist fullmagazines;
71 struct magazinelist emptymagazines;
74 /* protect this structure */
75 struct lwkt_token token;
77 /* magazines not yet allocated towards limit */
78 int unallocated_objects;
80 /* infrequently used fields */
81 int waiting; /* waiting for another cpu to
82 * return a full magazine to
84 int contested; /* depot contention count */
88 * per-cpu object cache
89 * All fields in this structure are protected by crit_enter().
91 struct percpu_objcache {
92 struct magazine *loaded_magazine; /* active magazine */
93 struct magazine *previous_magazine; /* backup magazine */
96 int gets_cumulative; /* total calls to get */
97 int gets_null; /* objcache_get returned NULL */
98 int puts_cumulative; /* total calls to put */
99 int puts_othercluster; /* returned to other cluster */
101 /* infrequently used fields */
102 int waiting; /* waiting for a thread on this cpu to
103 * return an obj to the per-cpu cache */
106 /* only until we have NUMA cluster topology information XXX */
107 #define MAXCLUSTERS 1
108 #define myclusterid 0
109 #define CLUSTER_OF(obj) 0
112 * Two-level object cache consisting of NUMA cluster-level depots of
113 * fully loaded or completely empty magazines and cpu-level caches of
114 * individual objects.
119 /* object constructor and destructor from blank storage */
120 objcache_ctor_fn *ctor;
121 objcache_dtor_fn *dtor;
124 /* interface to underlying allocator */
125 objcache_alloc_fn *alloc;
126 objcache_free_fn *free;
127 void *allocator_args;
129 SLIST_ENTRY(objcache) oc_next;
131 /* NUMA-cluster level caches */
132 struct magazinedepot depot[MAXCLUSTERS];
134 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
137 static struct lwkt_token objcachelist_token;
138 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
140 static struct magazine *
141 mag_alloc(int capacity)
143 struct magazine *mag;
145 mag = malloc(__offsetof(struct magazine, objects[capacity]),
146 M_OBJMAG, M_INTWAIT | M_ZERO);
147 mag->capacity = capacity;
153 * Create an object cache.
156 objcache_create(char *name, int cluster_limit, int mag_capacity,
157 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *private,
158 objcache_alloc_fn *alloc, objcache_free_fn *free,
159 void *allocator_args)
162 struct magazinedepot *depot;
166 /* allocate object cache structure */
167 oc = malloc(__offsetof(struct objcache, cache_percpu[ncpus]),
168 M_OBJCACHE, M_WAITOK | M_ZERO);
169 oc->name = strdup(name, M_TEMP);
172 oc->private = private;
174 oc->allocator_args = allocator_args;
176 /* initialize depots */
177 depot = &oc->depot[0];
179 lwkt_token_init(&depot->token);
180 SLIST_INIT(&depot->fullmagazines);
181 SLIST_INIT(&depot->emptymagazines);
183 if (mag_capacity == 0)
184 mag_capacity = INITIAL_MAG_CAPACITY;
185 depot->magcapacity = mag_capacity;
188 * The cluster_limit must be sufficient to have three magazines per
191 if (cluster_limit == 0) {
192 depot->unallocated_objects = -1;
194 if (cluster_limit < mag_capacity * ncpus * 3)
195 cluster_limit = mag_capacity * ncpus * 3;
196 depot->unallocated_objects = cluster_limit;
200 /* initialize per-cpu caches */
201 for (cpuid = 0; cpuid < ncpus; cpuid++) {
202 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
204 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
205 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
207 lwkt_gettoken(&olock, &objcachelist_token);
208 SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
209 lwkt_reltoken(&olock);
214 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
215 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
216 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
218 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
221 * Get an object from the object cache.
224 objcache_get(struct objcache *oc, int ocflags)
226 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
227 struct magazine *loadedmag;
228 struct magazine *emptymag;
230 struct magazinedepot *depot;
234 ++cpucache->gets_cumulative;
238 * Loaded magazine has an object. This is the hot path.
239 * It is lock-free and uses a critical section to block
240 * out interrupt handlers on the same processor.
242 loadedmag = cpucache->loaded_magazine;
243 if (MAGAZINE_NOTEMPTY(loadedmag)) {
244 obj = loadedmag->objects[--loadedmag->rounds];
249 /* Previous magazine has an object. */
250 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
251 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
252 loadedmag = cpucache->loaded_magazine;
253 obj = loadedmag->objects[--loadedmag->rounds];
259 * Both magazines empty. Get a full magazine from the depot and
260 * move one of the empty ones to the depot.
262 * Obtain the depot token.
264 depot = &oc->depot[myclusterid];
265 lwkt_gettoken(&ilock, &depot->token);
268 * We might have blocked obtaining the token, we must recheck
269 * the cpucache before potentially falling through to the blocking
270 * code or we might deadlock the tsleep() on a low-memory machine.
272 if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
273 MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
275 lwkt_reltoken(&ilock);
279 /* Check if depot has a full magazine. */
280 if (!SLIST_EMPTY(&depot->fullmagazines)) {
281 emptymag = cpucache->previous_magazine;
282 cpucache->previous_magazine = cpucache->loaded_magazine;
283 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
284 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
287 * Return emptymag to the depot.
289 KKASSERT(MAGAZINE_EMPTY(emptymag));
290 SLIST_INSERT_HEAD(&depot->emptymagazines,
291 emptymag, nextmagazine);
292 lwkt_reltoken(&ilock);
297 * The depot does not have any non-empty magazines. If we have
298 * not hit our object limit we can allocate a new object using
299 * the back-end allocator.
301 * note: unallocated_objects can be initialized to -1, which has
302 * the effect of removing any allocation limits.
304 if (depot->unallocated_objects) {
305 --depot->unallocated_objects;
306 lwkt_reltoken(&ilock);
309 obj = oc->alloc(oc->allocator_args, ocflags);
311 if (oc->ctor(obj, oc->private, ocflags))
313 oc->free(obj, oc->allocator_args);
314 lwkt_gettoken(&ilock, &depot->token);
315 ++depot->unallocated_objects;
318 lwkt_reltoken(&ilock);
324 * makes debugging easier when gets_cumulative does
325 * not include gets_null.
327 ++cpucache->gets_null;
328 --cpucache->gets_cumulative;
335 * Otherwise block if allowed to.
337 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
340 tsleep(depot, 0, "objcache_get", 0);
343 lwkt_reltoken(&ilock);
350 ++cpucache->gets_null;
351 --cpucache->gets_cumulative;
353 lwkt_reltoken(&ilock);
358 * Wrapper for malloc allocation routines.
361 objcache_malloc_alloc(void *allocator_args, int ocflags)
363 struct objcache_malloc_args *alloc_args = allocator_args;
365 return (malloc(alloc_args->objsize, alloc_args->mtype,
366 ocflags & OC_MFLAGS));
370 objcache_malloc_free(void *obj, void *allocator_args)
372 struct objcache_malloc_args *alloc_args = allocator_args;
374 free(obj, alloc_args->mtype);
378 * Wrapper for allocation policies that pre-allocate at initialization time
379 * and don't do run-time allocation.
382 objcache_nop_alloc(void *allocator_args, int ocflags)
388 objcache_nop_free(void *obj, void *allocator_args)
393 * Return an object to the object cache.
396 objcache_put(struct objcache *oc, void *obj)
398 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
399 struct magazine *loadedmag;
400 struct magazinedepot *depot;
404 ++cpucache->puts_cumulative;
406 if (CLUSTER_OF(obj) != myclusterid) {
408 /* use lazy IPI to send object to owning cluster XXX todo */
409 ++cpucache->puts_othercluster;
417 * Free slot available in loaded magazine. This is the hot path.
418 * It is lock-free and uses a critical section to block out interrupt
419 * handlers on the same processor.
421 loadedmag = cpucache->loaded_magazine;
422 if (!MAGAZINE_FULL(loadedmag)) {
423 loadedmag->objects[loadedmag->rounds++] = obj;
424 if (cpucache->waiting)
425 wakeup_mycpu(&oc->depot[myclusterid]);
431 * Current magazine full, but previous magazine has room. XXX
433 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
434 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
435 loadedmag = cpucache->loaded_magazine;
436 loadedmag->objects[loadedmag->rounds++] = obj;
437 if (cpucache->waiting)
438 wakeup_mycpu(&oc->depot[myclusterid]);
444 * Both magazines full. Get an empty magazine from the depot and
445 * move a full loaded magazine to the depot. Even though the
446 * magazine may wind up with space available after we block on
447 * the token, we still cycle it through to avoid the non-optimal
450 * Obtain the depot token.
452 depot = &oc->depot[myclusterid];
453 lwkt_gettoken(&ilock, &depot->token);
456 * If an empty magazine is available in the depot, cycle it
459 if (!SLIST_EMPTY(&depot->emptymagazines)) {
460 loadedmag = cpucache->previous_magazine;
461 cpucache->previous_magazine = cpucache->loaded_magazine;
462 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
463 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
466 * Return loadedmag to the depot. Due to blocking it may
467 * not be entirely full and could even be empty.
469 if (MAGAZINE_EMPTY(loadedmag)) {
470 SLIST_INSERT_HEAD(&depot->emptymagazines,
471 loadedmag, nextmagazine);
473 SLIST_INSERT_HEAD(&depot->fullmagazines,
474 loadedmag, nextmagazine);
478 lwkt_reltoken(&ilock);
483 * An empty mag is not available. This is a corner case which can
484 * occur due to cpus holding partially full magazines. Do not try
485 * to allocate a mag, just free the object.
487 ++depot->unallocated_objects;
490 lwkt_reltoken(&ilock);
492 oc->dtor(obj, oc->private);
493 oc->free(obj, oc->allocator_args);
497 * The object is being put back into the cache, but the caller has
498 * indicated that the object is not in any shape to be reused and should
499 * be dtor'd immediately.
502 objcache_dtor(struct objcache *oc, void *obj)
504 struct magazinedepot *depot;
507 depot = &oc->depot[myclusterid];
508 lwkt_gettoken(&ilock, &depot->token);
509 ++depot->unallocated_objects;
512 lwkt_reltoken(&ilock);
513 oc->dtor(obj, oc->private);
514 oc->free(obj, oc->allocator_args);
518 * Utility routine for objects that don't require any de-construction.
521 null_dtor(void *obj, void *private)
527 * De-construct and de-allocate objects in a magazine.
528 * Returns the number of objects freed.
529 * Does not de-allocate the magazine itself.
532 mag_purge(struct objcache *oc, struct magazine *mag)
539 while (mag->rounds) {
540 obj = mag->objects[--mag->rounds];
542 oc->dtor(obj, oc->private);
543 oc->free(obj, oc->allocator_args);
552 * De-allocate all magazines in a magazine list.
553 * Returns number of objects de-allocated.
556 maglist_purge(struct objcache *oc, struct magazinelist *maglist,
559 struct magazine *mag;
562 /* can't use SLIST_FOREACH because blocking releases the depot token */
563 while ((mag = SLIST_FIRST(maglist))) {
564 SLIST_REMOVE_HEAD(maglist, nextmagazine);
565 ndeleted += mag_purge(oc, mag); /* could block! */
566 free(mag, M_OBJMAG); /* could block! */
567 if (!purgeall && ndeleted > 0)
574 * De-allocates all magazines on the full and empty magazine lists.
577 depot_purge(struct magazinedepot *depot, struct objcache *oc)
579 depot->unallocated_objects +=
580 maglist_purge(oc, &depot->fullmagazines, TRUE);
581 depot->unallocated_objects +=
582 maglist_purge(oc, &depot->emptymagazines, TRUE);
583 if (depot->unallocated_objects && depot->waiting)
589 objcache_reclaim(struct objcache *oc)
591 struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
592 struct magazinedepot *depot = &oc->depot[myclusterid];
594 mag_purge(oc, cache_percpu->loaded_magazine);
595 mag_purge(oc, cache_percpu->previous_magazine);
597 /* XXX need depot token */
598 depot_purge(depot, oc);
603 * Try to free up some memory. Return as soon as some free memory found.
604 * For each object cache on the reclaim list, first try the current per-cpu
605 * cache, then the full magazine depot.
608 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
611 struct percpu_objcache *cpucache;
612 struct magazinedepot *depot;
616 for (i = 0; i < nlist; i++) {
618 cpucache = &oc->cache_percpu[mycpuid];
619 depot = &oc->depot[myclusterid];
622 if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
623 (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
625 lwkt_gettoken(&ilock, &depot->token);
626 depot->unallocated_objects += ndel;
627 if (depot->unallocated_objects && depot->waiting)
629 lwkt_reltoken(&ilock);
633 lwkt_gettoken(&ilock, &depot->token);
635 maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
636 depot->unallocated_objects += ndel;
637 if (depot->unallocated_objects && depot->waiting)
639 lwkt_reltoken(&ilock);
642 lwkt_reltoken(&ilock);
648 * Destroy an object cache. Must have no existing references.
649 * XXX Not clear this is a useful API function.
652 objcache_destroy(struct objcache *oc)
654 struct percpu_objcache *cache_percpu;
655 int clusterid, cpuid;
657 /* XXX need depot token? */
658 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
659 depot_purge(&oc->depot[clusterid], oc);
661 for (cpuid = 0; cpuid < ncpus; cpuid++) {
662 cache_percpu = &oc->cache_percpu[cpuid];
664 mag_purge(oc, cache_percpu->loaded_magazine);
665 free(cache_percpu->loaded_magazine, M_OBJMAG);
667 mag_purge(oc, cache_percpu->previous_magazine);
668 free(cache_percpu->previous_magazine, M_OBJMAG);
671 free(oc->name, M_TEMP);
672 free(oc, M_OBJCACHE);
677 * Populate the per-cluster depot with elements from a linear block
678 * of memory. Must be called for individually for each cluster.
679 * Populated depots should not be destroyed.
682 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
685 char *end = (char *)base + (nelts * size);
686 struct magazinedepot *depot = &oc->depot[myclusterid];
688 struct magazine sentinelfullmag = { 0, 0 };
689 struct magazine *emptymag = &sentinelfullmag;
691 lwkt_gettoken(&ilock, &depot->token);
693 if (MAGAZINE_FULL(emptymag)) {
694 emptymag = mag_alloc(depot->magcapacity);
695 SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
698 emptymag->objects[emptymag->rounds++] = p;
701 depot->unallocated_objects += nelts;
702 if (depot->unallocated_objects && depot->waiting)
704 lwkt_reltoken(&ilock);
710 * Check depot contention once a minute.
711 * 2 contested locks per second allowed.
713 static int objcache_rebalance_period;
714 static const int objcache_contention_rate = 120;
715 static struct callout objcache_callout;
717 #define MAXMAGSIZE 512
720 * Check depot contention and increase magazine size if necessary.
723 objcache_timer(void *dummy)
726 struct magazinedepot *depot;
727 lwkt_tokref olock, dlock;
729 lwkt_gettoken(&olock, &objcachelist_token);
730 SLIST_FOREACH(oc, &allobjcaches, oc_next) {
731 depot = &oc->depot[myclusterid];
732 if (depot->magcapacity < MAXMAGSIZE) {
733 if (depot->contested > objcache_contention_rate) {
734 lwkt_gettoken(&dlock, &depot->token);
735 depot_purge(depot, oc);
736 depot->magcapacity *= 2;
737 lwkt_reltoken(&dlock);
738 printf("objcache_timer: increasing cache %s"
739 " magsize to %d, contested %d times\n",
740 oc->name, depot->magcapacity,
743 depot->contested = 0;
746 lwkt_reltoken(&olock);
748 callout_reset(&objcache_callout, objcache_rebalance_period,
749 objcache_timer, NULL);
757 lwkt_token_init(&objcachelist_token);
759 callout_init(&objcache_callout);
760 objcache_rebalance_period = 60 * hz;
761 callout_reset(&objcache_callout, objcache_rebalance_period,
762 objcache_timer, NULL);
765 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);