2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $DragonFly: src/sys/kern/kern_objcache.c,v 1.2 2005/06/08 22:22:59 dillon Exp $
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/thread.h>
44 #include <sys/thread2.h>
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
49 #define INITIAL_MAG_CAPACITY 256
54 SLIST_ENTRY(magazine) nextmagazine;
58 SLIST_HEAD(magazinelist, magazine);
61 * per-cluster cache of magazines
62 * All fields in this structure are protected by the token.
64 struct magazinedepot {
66 * The per-cpu object caches only exchanges completely full or
67 * completely empty magazines with the depot layer, so only have
68 * to cache these two types of magazines.
70 struct magazinelist fullmagazines;
71 struct magazinelist emptymagazines;
74 /* protect this structure */
75 struct lwkt_token token;
77 /* magazines not yet allocated towards limit */
78 int unallocated_objects;
80 /* infrequently used fields */
81 int waiting; /* waiting for another cpu to
82 * return a full magazine to
84 int contested; /* depot contention count */
88 * per-cpu object cache
89 * All fields in this structure are protected by crit_enter().
91 struct percpu_objcache {
92 struct magazine *loaded_magazine; /* active magazine */
93 struct magazine *previous_magazine; /* backup magazine */
96 int gets_cumulative; /* total calls to get */
97 int gets_null; /* objcache_get returned NULL */
98 int puts_cumulative; /* total calls to put */
99 int puts_othercluster; /* returned to other cluster */
101 /* infrequently used fields */
102 int waiting; /* waiting for a thread on this cpu to
103 * return an obj to the per-cpu cache */
106 /* only until we have NUMA cluster topology information XXX */
107 #define MAXCLUSTERS 1
108 #define myclusterid 0
109 #define CLUSTER_OF(obj) 0
112 * Two-level object cache consisting of NUMA cluster-level depots of
113 * fully loaded or completely empty magazines and cpu-level caches of
114 * individual objects.
119 /* object constructor and destructor from blank storage */
120 objcache_ctor_fn *ctor;
121 objcache_dtor_fn *dtor;
124 /* interface to underlying allocator */
125 objcache_alloc_fn *alloc;
126 objcache_free_fn *free;
127 void *allocator_args;
129 SLIST_ENTRY(objcache) oc_next;
131 /* NUMA-cluster level caches */
132 struct magazinedepot depot[MAXCLUSTERS];
134 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
137 static struct lwkt_token objcachelist_token;
138 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
140 static struct magazine *
141 mag_alloc(int capacity)
143 struct magazine *mag;
145 mag = malloc(__offsetof(struct magazine, objects[capacity]),
146 M_OBJMAG, M_INTWAIT | M_ZERO);
147 mag->capacity = capacity;
153 * Create an object cache.
156 objcache_create(char *name, int cluster_limit, int mag_capacity,
157 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *private,
158 objcache_alloc_fn *alloc, objcache_free_fn *free,
159 void *allocator_args)
162 struct magazinedepot *depot;
166 /* allocate object cache structure */
167 oc = malloc(__offsetof(struct objcache, cache_percpu[ncpus]),
168 M_OBJCACHE, M_WAITOK | M_ZERO);
169 oc->name = strdup(name, M_TEMP);
172 oc->private = private;
174 oc->allocator_args = allocator_args;
176 /* initialize depots */
177 depot = &oc->depot[0];
179 lwkt_token_init(&depot->token);
180 SLIST_INIT(&depot->fullmagazines);
181 SLIST_INIT(&depot->emptymagazines);
183 if (mag_capacity == 0)
184 mag_capacity = INITIAL_MAG_CAPACITY;
185 depot->magcapacity = mag_capacity;
188 * The cluster_limit must be sufficient to have three magazines per
191 if (cluster_limit == 0) {
192 depot->unallocated_objects = -1;
194 if (cluster_limit < mag_capacity * ncpus * 3)
195 cluster_limit = mag_capacity * ncpus * 3;
196 depot->unallocated_objects = cluster_limit;
200 /* initialize per-cpu caches */
201 for (cpuid = 0; cpuid < ncpus; cpuid++) {
202 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
204 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
205 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
207 lwkt_gettoken(&olock, &objcachelist_token);
208 SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
209 lwkt_reltoken(&olock);
214 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
215 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
216 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
218 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
221 * Get an object from the object cache.
224 objcache_get(struct objcache *oc, int ocflags)
226 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
227 struct magazine *loadedmag;
228 struct magazine *emptymag;
230 struct magazinedepot *depot;
234 ++cpucache->gets_cumulative;
238 * Loaded magazine has an object. This is the hot path.
239 * It is lock-free and uses a critical section to block
240 * out interrupt handlers on the same processor.
242 loadedmag = cpucache->loaded_magazine;
243 if (MAGAZINE_NOTEMPTY(loadedmag)) {
244 obj = loadedmag->objects[--loadedmag->rounds];
249 /* Previous magazine has an object. */
250 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
251 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
252 loadedmag = cpucache->loaded_magazine;
253 obj = loadedmag->objects[--loadedmag->rounds];
259 * Both magazines empty. Get a full magazine from the depot and
260 * move one of the empty ones to the depot. Do this even if we
261 * block on the token to avoid a non-optimal corner case.
263 * Obtain the depot token.
265 depot = &oc->depot[myclusterid];
266 if (!lwkt_trytoken(&ilock, &depot->token)) {
267 lwkt_gettoken(&ilock, &depot->token);
271 /* Check if depot has a full magazine. */
272 if (!SLIST_EMPTY(&depot->fullmagazines)) {
273 emptymag = cpucache->previous_magazine;
274 cpucache->previous_magazine = cpucache->loaded_magazine;
275 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
276 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
279 * Return emptymag to the depot. Due to blocking it may
280 * not be entirely empty.
282 if (MAGAZINE_EMPTY(emptymag)) {
283 SLIST_INSERT_HEAD(&depot->emptymagazines,
284 emptymag, nextmagazine);
287 * NOTE: magazine is not necessarily entirely full
289 SLIST_INSERT_HEAD(&depot->fullmagazines,
290 emptymag, nextmagazine);
294 lwkt_reltoken(&ilock);
299 * The depot does not have any non-empty magazines. If we have
300 * not hit our object limit we can allocate a new object using
301 * the back-end allocator.
303 * note: unallocated_objects can be initialized to -1, which has
304 * the effect of removing any allocation limits.
306 if (depot->unallocated_objects) {
307 --depot->unallocated_objects;
308 lwkt_reltoken(&ilock);
311 obj = oc->alloc(oc->allocator_args, ocflags);
313 if (oc->ctor(obj, oc->private, ocflags))
315 oc->free(obj, oc->allocator_args);
316 lwkt_gettoken(&ilock, &depot->token);
317 ++depot->unallocated_objects;
320 lwkt_reltoken(&ilock);
325 ++cpucache->gets_null;
332 * Otherwise block if allowed to.
334 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
337 tsleep(depot, PCATCH, "objcache_get", 0);
340 lwkt_reltoken(&ilock);
343 ++cpucache->gets_null;
349 * Wrapper for malloc allocation routines.
352 objcache_malloc_alloc(void *allocator_args, int ocflags)
354 struct objcache_malloc_args *alloc_args = allocator_args;
356 return (malloc(alloc_args->objsize, alloc_args->mtype,
357 ocflags & OC_MFLAGS));
361 objcache_malloc_free(void *obj, void *allocator_args)
363 struct objcache_malloc_args *alloc_args = allocator_args;
365 free(obj, alloc_args->mtype);
369 * Wrapper for allocation policies that pre-allocate at initialization time
370 * and don't do run-time allocation.
373 objcache_nop_alloc(void *allocator_args, int ocflags)
379 objcache_nop_free(void *obj, void *allocator_args)
384 * Return an object to the object cache.
387 objcache_put(struct objcache *oc, void *obj)
389 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
390 struct magazine *loadedmag;
391 struct magazinedepot *depot;
395 ++cpucache->puts_cumulative;
397 if (CLUSTER_OF(obj) != myclusterid) {
399 /* use lazy IPI to send object to owning cluster XXX todo */
400 ++cpucache->puts_othercluster;
407 * Free slot available in loaded magazine. This is the hot path.
408 * It is lock-free and uses a critical section to block out interrupt
409 * handlers on the same processor.
411 loadedmag = cpucache->loaded_magazine;
412 if (!MAGAZINE_FULL(loadedmag)) {
413 loadedmag->objects[loadedmag->rounds++] = obj;
414 if (cpucache->waiting)
415 wakeup(&oc->depot[myclusterid]);
421 * Current magazine full, but previous magazine has room. XXX
423 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
424 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
425 loadedmag = cpucache->loaded_magazine;
426 loadedmag->objects[loadedmag->rounds++] = obj;
427 if (cpucache->waiting)
428 wakeup(&oc->depot[myclusterid]);
434 * Both magazines full. Get an empty magazine from the depot and
435 * move a full loaded magazine to the depot. Even though the
436 * magazine may wind up with space available after we block on
437 * the token, we still cycle it through to avoid the non-optimal
440 * Obtain the depot token.
442 depot = &oc->depot[myclusterid];
443 if (!lwkt_trytoken(&ilock, &depot->token)) {
444 lwkt_gettoken(&ilock, &depot->token);
449 * If an empty magazine is available in the depot, cycle it
452 if (!SLIST_EMPTY(&depot->emptymagazines)) {
453 loadedmag = cpucache->previous_magazine;
454 cpucache->previous_magazine = cpucache->loaded_magazine;
455 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
456 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
459 * Return loadedmag to the depot. Due to blocking it may
460 * not be entirely full and could even be empty.
462 if (MAGAZINE_EMPTY(loadedmag)) {
463 SLIST_INSERT_HEAD(&depot->emptymagazines,
464 loadedmag, nextmagazine);
466 SLIST_INSERT_HEAD(&depot->fullmagazines,
467 loadedmag, nextmagazine);
471 lwkt_reltoken(&ilock);
476 * An empty mag is not available. This is a corner case which can
477 * occur due to cpus holding partially full magazines. Do not try
478 * to allocate a mag, just free the object.
480 ++depot->unallocated_objects;
483 lwkt_reltoken(&ilock);
485 oc->dtor(obj, oc->private);
486 oc->free(obj, oc->allocator_args);
490 * The object is being put back into the cache, but the caller has
491 * indicated that the object is not in any shape to be reused and should
492 * be dtor'd immediately.
495 objcache_dtor(struct objcache *oc, void *obj)
497 struct magazinedepot *depot;
500 depot = &oc->depot[myclusterid];
501 if (!lwkt_trytoken(&ilock, &depot->token)) {
502 lwkt_gettoken(&ilock, &depot->token);
505 ++depot->unallocated_objects;
508 lwkt_reltoken(&ilock);
509 oc->dtor(obj, oc->private);
510 oc->free(obj, oc->allocator_args);
514 * Utility routine for objects that don't require any de-construction.
517 null_dtor(void *obj, void *private)
523 * De-construct and de-allocate objects in a magazine.
524 * Returns the number of objects freed.
525 * Does not de-allocate the magazine itself.
528 mag_purge(struct objcache *oc, struct magazine *mag)
535 while (mag->rounds) {
536 obj = mag->objects[--mag->rounds];
538 oc->dtor(obj, oc->private);
539 oc->free(obj, oc->allocator_args);
548 * De-allocate all magazines in a magazine list.
549 * Returns number of objects de-allocated.
552 maglist_purge(struct objcache *oc, struct magazinelist *maglist,
555 struct magazine *mag;
558 /* can't use SLIST_FOREACH because blocking releases the depot token */
559 while ((mag = SLIST_FIRST(maglist))) {
560 SLIST_REMOVE_HEAD(maglist, nextmagazine);
561 ndeleted += mag_purge(oc, mag); /* could block! */
562 free(mag, M_OBJMAG); /* could block! */
563 if (!purgeall && ndeleted > 0)
570 * De-allocates all magazines on the full and empty magazine lists.
573 depot_purge(struct magazinedepot *depot, struct objcache *oc)
575 depot->unallocated_objects +=
576 maglist_purge(oc, &depot->fullmagazines, TRUE);
577 depot->unallocated_objects +=
578 maglist_purge(oc, &depot->emptymagazines, TRUE);
579 if (depot->unallocated_objects && depot->waiting)
585 objcache_reclaim(struct objcache *oc)
587 struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
588 struct magazinedepot *depot = &oc->depot[myclusterid];
590 mag_purge(oc, cache_percpu->loaded_magazine);
591 mag_purge(oc, cache_percpu->previous_magazine);
593 /* XXX need depot token */
594 depot_purge(depot, oc);
599 * Try to free up some memory. Return as soon as some free memory found.
600 * For each object cache on the reclaim list, first try the current per-cpu
601 * cache, then the full magazine depot.
604 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
607 struct percpu_objcache *cpucache;
608 struct magazinedepot *depot;
612 for (i = 0; i < nlist; i++) {
614 cpucache = &oc->cache_percpu[mycpuid];
615 depot = &oc->depot[myclusterid];
618 if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
619 (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
621 lwkt_gettoken(&ilock, &depot->token);
622 depot->unallocated_objects += ndel;
623 if (depot->unallocated_objects && depot->waiting)
625 lwkt_reltoken(&ilock);
629 lwkt_gettoken(&ilock, &depot->token);
631 maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
632 depot->unallocated_objects += ndel;
633 if (depot->unallocated_objects && depot->waiting)
635 lwkt_reltoken(&ilock);
638 lwkt_reltoken(&ilock);
644 * Destroy an object cache. Must have no existing references.
645 * XXX Not clear this is a useful API function.
648 objcache_destroy(struct objcache *oc)
650 struct percpu_objcache *cache_percpu;
651 int clusterid, cpuid;
653 /* XXX need depot token? */
654 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
655 depot_purge(&oc->depot[clusterid], oc);
657 for (cpuid = 0; cpuid < ncpus; cpuid++) {
658 cache_percpu = &oc->cache_percpu[cpuid];
660 mag_purge(oc, cache_percpu->loaded_magazine);
661 free(cache_percpu->loaded_magazine, M_OBJMAG);
663 mag_purge(oc, cache_percpu->previous_magazine);
664 free(cache_percpu->previous_magazine, M_OBJMAG);
667 free(oc->name, M_TEMP);
668 free(oc, M_OBJCACHE);
673 * Populate the per-cluster depot with elements from a linear block
674 * of memory. Must be called for individually for each cluster.
675 * Populated depots should not be destroyed.
678 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
681 char *end = (char *)base + (nelts * size);
682 struct magazinedepot *depot = &oc->depot[myclusterid];
684 struct magazine sentinelfullmag = { 0, 0 };
685 struct magazine *emptymag = &sentinelfullmag;
687 lwkt_gettoken(&ilock, &depot->token);
689 if (MAGAZINE_FULL(emptymag)) {
690 emptymag = mag_alloc(depot->magcapacity);
691 SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
694 emptymag->objects[emptymag->rounds++] = p;
697 depot->unallocated_objects += nelts;
698 if (depot->unallocated_objects && depot->waiting)
700 lwkt_reltoken(&ilock);
706 * Check depot contention once a minute.
707 * 2 contested locks per second allowed.
709 static int objcache_rebalance_period;
710 static const int objcache_contention_rate = 120;
711 static struct callout objcache_callout;
713 #define MAXMAGSIZE 512
716 * Check depot contention and increase magazine size if necessary.
719 objcache_timer(void *dummy)
722 struct magazinedepot *depot;
723 lwkt_tokref olock, dlock;
725 lwkt_gettoken(&olock, &objcachelist_token);
726 SLIST_FOREACH(oc, &allobjcaches, oc_next) {
727 depot = &oc->depot[myclusterid];
728 if (depot->magcapacity < MAXMAGSIZE) {
729 if (depot->contested > objcache_contention_rate) {
730 lwkt_gettoken(&dlock, &depot->token);
731 depot_purge(depot, oc);
732 depot->magcapacity *= 2;
733 lwkt_reltoken(&dlock);
734 printf("objcache_timer: increasing cache %s"
735 " magsize to %d, contested %d times\n",
736 oc->name, depot->magcapacity,
739 depot->contested = 0;
742 lwkt_reltoken(&olock);
744 callout_reset(&objcache_callout, objcache_rebalance_period,
745 objcache_timer, NULL);
753 lwkt_token_init(&objcachelist_token);
755 callout_init(&objcache_callout);
756 objcache_rebalance_period = 60 * hz;
757 callout_reset(&objcache_callout, objcache_rebalance_period,
758 objcache_timer, NULL);
761 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);