2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $DragonFly: src/sys/kern/kern_objcache.c,v 1.4 2005/07/13 16:06:04 dillon Exp $
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/thread.h>
44 #include <sys/thread2.h>
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
49 #define INITIAL_MAG_CAPACITY 256
54 SLIST_ENTRY(magazine) nextmagazine;
58 SLIST_HEAD(magazinelist, magazine);
61 * per-cluster cache of magazines
62 * All fields in this structure are protected by the token.
64 struct magazinedepot {
66 * The per-cpu object caches only exchanges completely full or
67 * completely empty magazines with the depot layer, so only have
68 * to cache these two types of magazines.
70 struct magazinelist fullmagazines;
71 struct magazinelist emptymagazines;
74 /* protect this structure */
75 struct lwkt_token token;
77 /* magazines not yet allocated towards limit */
78 int unallocated_objects;
80 /* infrequently used fields */
81 int waiting; /* waiting for another cpu to
82 * return a full magazine to
84 int contested; /* depot contention count */
88 * per-cpu object cache
89 * All fields in this structure are protected by crit_enter().
91 struct percpu_objcache {
92 struct magazine *loaded_magazine; /* active magazine */
93 struct magazine *previous_magazine; /* backup magazine */
96 int gets_cumulative; /* total calls to get */
97 int gets_null; /* objcache_get returned NULL */
98 int puts_cumulative; /* total calls to put */
99 int puts_othercluster; /* returned to other cluster */
101 /* infrequently used fields */
102 int waiting; /* waiting for a thread on this cpu to
103 * return an obj to the per-cpu cache */
106 /* only until we have NUMA cluster topology information XXX */
107 #define MAXCLUSTERS 1
108 #define myclusterid 0
109 #define CLUSTER_OF(obj) 0
112 * Two-level object cache consisting of NUMA cluster-level depots of
113 * fully loaded or completely empty magazines and cpu-level caches of
114 * individual objects.
119 /* object constructor and destructor from blank storage */
120 objcache_ctor_fn *ctor;
121 objcache_dtor_fn *dtor;
124 /* interface to underlying allocator */
125 objcache_alloc_fn *alloc;
126 objcache_free_fn *free;
127 void *allocator_args;
129 SLIST_ENTRY(objcache) oc_next;
131 /* NUMA-cluster level caches */
132 struct magazinedepot depot[MAXCLUSTERS];
134 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
137 static struct lwkt_token objcachelist_token;
138 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
140 static struct magazine *
141 mag_alloc(int capacity)
143 struct magazine *mag;
145 mag = malloc(__offsetof(struct magazine, objects[capacity]),
146 M_OBJMAG, M_INTWAIT | M_ZERO);
147 mag->capacity = capacity;
153 * Create an object cache.
156 objcache_create(char *name, int cluster_limit, int mag_capacity,
157 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *private,
158 objcache_alloc_fn *alloc, objcache_free_fn *free,
159 void *allocator_args)
162 struct magazinedepot *depot;
166 /* allocate object cache structure */
167 oc = malloc(__offsetof(struct objcache, cache_percpu[ncpus]),
168 M_OBJCACHE, M_WAITOK | M_ZERO);
169 oc->name = strdup(name, M_TEMP);
172 oc->private = private;
174 oc->allocator_args = allocator_args;
176 /* initialize depots */
177 depot = &oc->depot[0];
179 lwkt_token_init(&depot->token);
180 SLIST_INIT(&depot->fullmagazines);
181 SLIST_INIT(&depot->emptymagazines);
183 if (mag_capacity == 0)
184 mag_capacity = INITIAL_MAG_CAPACITY;
185 depot->magcapacity = mag_capacity;
188 * The cluster_limit must be sufficient to have three magazines per
191 if (cluster_limit == 0) {
192 depot->unallocated_objects = -1;
194 if (cluster_limit < mag_capacity * ncpus * 3)
195 cluster_limit = mag_capacity * ncpus * 3;
196 depot->unallocated_objects = cluster_limit;
200 /* initialize per-cpu caches */
201 for (cpuid = 0; cpuid < ncpus; cpuid++) {
202 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
204 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
205 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
207 lwkt_gettoken(&olock, &objcachelist_token);
208 SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
209 lwkt_reltoken(&olock);
214 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
215 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
216 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
218 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
221 * Get an object from the object cache.
224 objcache_get(struct objcache *oc, int ocflags)
226 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
227 struct magazine *loadedmag;
228 struct magazine *emptymag;
230 struct magazinedepot *depot;
234 ++cpucache->gets_cumulative;
238 * Loaded magazine has an object. This is the hot path.
239 * It is lock-free and uses a critical section to block
240 * out interrupt handlers on the same processor.
242 loadedmag = cpucache->loaded_magazine;
243 if (MAGAZINE_NOTEMPTY(loadedmag)) {
244 obj = loadedmag->objects[--loadedmag->rounds];
249 /* Previous magazine has an object. */
250 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
251 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
252 loadedmag = cpucache->loaded_magazine;
253 obj = loadedmag->objects[--loadedmag->rounds];
259 * Both magazines empty. Get a full magazine from the depot and
260 * move one of the empty ones to the depot. Do this even if we
261 * block on the token to avoid a non-optimal corner case.
263 * Obtain the depot token.
265 depot = &oc->depot[myclusterid];
267 if (!lwkt_trytoken(&ilock, &depot->token)) {
268 lwkt_gettoken(&ilock, &depot->token);
272 lwkt_gettoken(&ilock, &depot->token);
275 /* Check if depot has a full magazine. */
276 if (!SLIST_EMPTY(&depot->fullmagazines)) {
277 emptymag = cpucache->previous_magazine;
278 cpucache->previous_magazine = cpucache->loaded_magazine;
279 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
280 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
283 * Return emptymag to the depot. Due to blocking it may
284 * not be entirely empty.
286 if (MAGAZINE_EMPTY(emptymag)) {
287 SLIST_INSERT_HEAD(&depot->emptymagazines,
288 emptymag, nextmagazine);
291 * NOTE: magazine is not necessarily entirely full
293 SLIST_INSERT_HEAD(&depot->fullmagazines,
294 emptymag, nextmagazine);
298 lwkt_reltoken(&ilock);
303 * The depot does not have any non-empty magazines. If we have
304 * not hit our object limit we can allocate a new object using
305 * the back-end allocator.
307 * note: unallocated_objects can be initialized to -1, which has
308 * the effect of removing any allocation limits.
310 if (depot->unallocated_objects) {
311 --depot->unallocated_objects;
312 lwkt_reltoken(&ilock);
315 obj = oc->alloc(oc->allocator_args, ocflags);
317 if (oc->ctor(obj, oc->private, ocflags))
319 oc->free(obj, oc->allocator_args);
320 lwkt_gettoken(&ilock, &depot->token);
321 ++depot->unallocated_objects;
324 lwkt_reltoken(&ilock);
330 * makes debugging easier when gets_cumulative does
331 * not include gets_null.
333 ++cpucache->gets_null;
334 --cpucache->gets_cumulative;
341 * Otherwise block if allowed to.
343 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
346 tsleep(depot, PCATCH, "objcache_get", 0);
349 lwkt_reltoken(&ilock);
356 ++cpucache->gets_null;
357 --cpucache->gets_cumulative;
359 lwkt_reltoken(&ilock);
364 * Wrapper for malloc allocation routines.
367 objcache_malloc_alloc(void *allocator_args, int ocflags)
369 struct objcache_malloc_args *alloc_args = allocator_args;
371 return (malloc(alloc_args->objsize, alloc_args->mtype,
372 ocflags & OC_MFLAGS));
376 objcache_malloc_free(void *obj, void *allocator_args)
378 struct objcache_malloc_args *alloc_args = allocator_args;
380 free(obj, alloc_args->mtype);
384 * Wrapper for allocation policies that pre-allocate at initialization time
385 * and don't do run-time allocation.
388 objcache_nop_alloc(void *allocator_args, int ocflags)
394 objcache_nop_free(void *obj, void *allocator_args)
399 * Return an object to the object cache.
402 objcache_put(struct objcache *oc, void *obj)
404 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
405 struct magazine *loadedmag;
406 struct magazinedepot *depot;
410 ++cpucache->puts_cumulative;
412 if (CLUSTER_OF(obj) != myclusterid) {
414 /* use lazy IPI to send object to owning cluster XXX todo */
415 ++cpucache->puts_othercluster;
423 * Free slot available in loaded magazine. This is the hot path.
424 * It is lock-free and uses a critical section to block out interrupt
425 * handlers on the same processor.
427 loadedmag = cpucache->loaded_magazine;
428 if (!MAGAZINE_FULL(loadedmag)) {
429 loadedmag->objects[loadedmag->rounds++] = obj;
430 if (cpucache->waiting)
431 wakeup(&oc->depot[myclusterid]);
437 * Current magazine full, but previous magazine has room. XXX
439 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
440 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
441 loadedmag = cpucache->loaded_magazine;
442 loadedmag->objects[loadedmag->rounds++] = obj;
443 if (cpucache->waiting)
444 wakeup(&oc->depot[myclusterid]);
450 * Both magazines full. Get an empty magazine from the depot and
451 * move a full loaded magazine to the depot. Even though the
452 * magazine may wind up with space available after we block on
453 * the token, we still cycle it through to avoid the non-optimal
456 * Obtain the depot token.
458 depot = &oc->depot[myclusterid];
460 if (!lwkt_trytoken(&ilock, &depot->token)) {
461 lwkt_gettoken(&ilock, &depot->token);
465 lwkt_gettoken(&ilock, &depot->token);
469 * If an empty magazine is available in the depot, cycle it
472 if (!SLIST_EMPTY(&depot->emptymagazines)) {
473 loadedmag = cpucache->previous_magazine;
474 cpucache->previous_magazine = cpucache->loaded_magazine;
475 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
476 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
479 * Return loadedmag to the depot. Due to blocking it may
480 * not be entirely full and could even be empty.
482 if (MAGAZINE_EMPTY(loadedmag)) {
483 SLIST_INSERT_HEAD(&depot->emptymagazines,
484 loadedmag, nextmagazine);
486 SLIST_INSERT_HEAD(&depot->fullmagazines,
487 loadedmag, nextmagazine);
491 lwkt_reltoken(&ilock);
496 * An empty mag is not available. This is a corner case which can
497 * occur due to cpus holding partially full magazines. Do not try
498 * to allocate a mag, just free the object.
500 ++depot->unallocated_objects;
503 lwkt_reltoken(&ilock);
505 oc->dtor(obj, oc->private);
506 oc->free(obj, oc->allocator_args);
510 * The object is being put back into the cache, but the caller has
511 * indicated that the object is not in any shape to be reused and should
512 * be dtor'd immediately.
515 objcache_dtor(struct objcache *oc, void *obj)
517 struct magazinedepot *depot;
520 depot = &oc->depot[myclusterid];
522 if (!lwkt_trytoken(&ilock, &depot->token)) {
523 lwkt_gettoken(&ilock, &depot->token);
527 lwkt_gettoken(&ilock, &depot->token);
529 ++depot->unallocated_objects;
532 lwkt_reltoken(&ilock);
533 oc->dtor(obj, oc->private);
534 oc->free(obj, oc->allocator_args);
538 * Utility routine for objects that don't require any de-construction.
541 null_dtor(void *obj, void *private)
547 * De-construct and de-allocate objects in a magazine.
548 * Returns the number of objects freed.
549 * Does not de-allocate the magazine itself.
552 mag_purge(struct objcache *oc, struct magazine *mag)
559 while (mag->rounds) {
560 obj = mag->objects[--mag->rounds];
562 oc->dtor(obj, oc->private);
563 oc->free(obj, oc->allocator_args);
572 * De-allocate all magazines in a magazine list.
573 * Returns number of objects de-allocated.
576 maglist_purge(struct objcache *oc, struct magazinelist *maglist,
579 struct magazine *mag;
582 /* can't use SLIST_FOREACH because blocking releases the depot token */
583 while ((mag = SLIST_FIRST(maglist))) {
584 SLIST_REMOVE_HEAD(maglist, nextmagazine);
585 ndeleted += mag_purge(oc, mag); /* could block! */
586 free(mag, M_OBJMAG); /* could block! */
587 if (!purgeall && ndeleted > 0)
594 * De-allocates all magazines on the full and empty magazine lists.
597 depot_purge(struct magazinedepot *depot, struct objcache *oc)
599 depot->unallocated_objects +=
600 maglist_purge(oc, &depot->fullmagazines, TRUE);
601 depot->unallocated_objects +=
602 maglist_purge(oc, &depot->emptymagazines, TRUE);
603 if (depot->unallocated_objects && depot->waiting)
609 objcache_reclaim(struct objcache *oc)
611 struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
612 struct magazinedepot *depot = &oc->depot[myclusterid];
614 mag_purge(oc, cache_percpu->loaded_magazine);
615 mag_purge(oc, cache_percpu->previous_magazine);
617 /* XXX need depot token */
618 depot_purge(depot, oc);
623 * Try to free up some memory. Return as soon as some free memory found.
624 * For each object cache on the reclaim list, first try the current per-cpu
625 * cache, then the full magazine depot.
628 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
631 struct percpu_objcache *cpucache;
632 struct magazinedepot *depot;
636 for (i = 0; i < nlist; i++) {
638 cpucache = &oc->cache_percpu[mycpuid];
639 depot = &oc->depot[myclusterid];
642 if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
643 (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
645 lwkt_gettoken(&ilock, &depot->token);
646 depot->unallocated_objects += ndel;
647 if (depot->unallocated_objects && depot->waiting)
649 lwkt_reltoken(&ilock);
653 lwkt_gettoken(&ilock, &depot->token);
655 maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
656 depot->unallocated_objects += ndel;
657 if (depot->unallocated_objects && depot->waiting)
659 lwkt_reltoken(&ilock);
662 lwkt_reltoken(&ilock);
668 * Destroy an object cache. Must have no existing references.
669 * XXX Not clear this is a useful API function.
672 objcache_destroy(struct objcache *oc)
674 struct percpu_objcache *cache_percpu;
675 int clusterid, cpuid;
677 /* XXX need depot token? */
678 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
679 depot_purge(&oc->depot[clusterid], oc);
681 for (cpuid = 0; cpuid < ncpus; cpuid++) {
682 cache_percpu = &oc->cache_percpu[cpuid];
684 mag_purge(oc, cache_percpu->loaded_magazine);
685 free(cache_percpu->loaded_magazine, M_OBJMAG);
687 mag_purge(oc, cache_percpu->previous_magazine);
688 free(cache_percpu->previous_magazine, M_OBJMAG);
691 free(oc->name, M_TEMP);
692 free(oc, M_OBJCACHE);
697 * Populate the per-cluster depot with elements from a linear block
698 * of memory. Must be called for individually for each cluster.
699 * Populated depots should not be destroyed.
702 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
705 char *end = (char *)base + (nelts * size);
706 struct magazinedepot *depot = &oc->depot[myclusterid];
708 struct magazine sentinelfullmag = { 0, 0 };
709 struct magazine *emptymag = &sentinelfullmag;
711 lwkt_gettoken(&ilock, &depot->token);
713 if (MAGAZINE_FULL(emptymag)) {
714 emptymag = mag_alloc(depot->magcapacity);
715 SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
718 emptymag->objects[emptymag->rounds++] = p;
721 depot->unallocated_objects += nelts;
722 if (depot->unallocated_objects && depot->waiting)
724 lwkt_reltoken(&ilock);
730 * Check depot contention once a minute.
731 * 2 contested locks per second allowed.
733 static int objcache_rebalance_period;
734 static const int objcache_contention_rate = 120;
735 static struct callout objcache_callout;
737 #define MAXMAGSIZE 512
740 * Check depot contention and increase magazine size if necessary.
743 objcache_timer(void *dummy)
746 struct magazinedepot *depot;
747 lwkt_tokref olock, dlock;
749 lwkt_gettoken(&olock, &objcachelist_token);
750 SLIST_FOREACH(oc, &allobjcaches, oc_next) {
751 depot = &oc->depot[myclusterid];
752 if (depot->magcapacity < MAXMAGSIZE) {
753 if (depot->contested > objcache_contention_rate) {
754 lwkt_gettoken(&dlock, &depot->token);
755 depot_purge(depot, oc);
756 depot->magcapacity *= 2;
757 lwkt_reltoken(&dlock);
758 printf("objcache_timer: increasing cache %s"
759 " magsize to %d, contested %d times\n",
760 oc->name, depot->magcapacity,
763 depot->contested = 0;
766 lwkt_reltoken(&olock);
768 callout_reset(&objcache_callout, objcache_rebalance_period,
769 objcache_timer, NULL);
777 lwkt_token_init(&objcachelist_token);
779 callout_init(&objcache_callout);
780 objcache_rebalance_period = 60 * hz;
781 callout_reset(&objcache_callout, objcache_rebalance_period,
782 objcache_timer, NULL);
785 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);