2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $DragonFly: src/sys/kern/kern_objcache.c,v 1.1 2005/06/07 19:07:11 hsu Exp $
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/thread.h>
44 #include <sys/thread2.h>
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
49 #define INITIAL_MAG_CAPACITY 5
54 SLIST_ENTRY(magazine) nextmagazine;
58 SLIST_HEAD(magazinelist, magazine);
61 * per-cluster cache of magazines
62 * All fields in this structure are protected by the token.
64 struct magazinedepot {
66 * The per-cpu object caches only exchanges completely full or
67 * completely empty magazines with the depot layer, so only have
68 * to cache these two types of magazines.
70 struct magazinelist fullmagazines;
71 struct magazinelist emptymagazines;
74 struct lwkt_token token; /* protects all fields in this struct */
76 int cluster_balance; /* outstanding objects */
77 int cluster_limit; /* new obj creation limit */
80 int emptymagazines_cumulative;
82 /* infrequently used fields */
83 int waiting; /* waiting for another cpu to
84 * return a full magazine to
86 int contested; /* depot contention count */
90 * per-cpu object cache
91 * All fields in this structure are protected by crit_enter().
93 struct percpu_objcache {
94 struct magazine *loaded_magazine; /* active magazine */
95 struct magazine *previous_magazine; /* backup magazine */
98 int gets_cumulative; /* total calls to get */
99 int gets_null; /* objcache_get returned NULL */
100 int puts_cumulative; /* total calls to put */
101 int puts_othercluster; /* returned to other cluster */
103 /* infrequently used fields */
104 int waiting; /* waiting for a thread on this cpu to
105 * return an obj to the per-cpu cache */
108 /* only until we have NUMA cluster topology information XXX */
109 #define MAXCLUSTERS 1
110 #define myclusterid 0
111 #define CLUSTER_OF(obj) 0
114 * Two-level object cache consisting of NUMA cluster-level depots of
115 * fully loaded or completely empty magazines and cpu-level caches of
116 * individual objects.
121 /* object constructor and destructor from blank storage */
122 objcache_ctor_fn *ctor;
123 objcache_dtor_fn *dtor;
126 /* interface to underlying allocator */
127 objcache_alloc_fn *alloc;
128 objcache_free_fn *free;
129 void *allocator_args;
131 SLIST_ENTRY(objcache) oc_next;
133 /* NUMA-cluster level caches */
134 struct magazinedepot depot[MAXCLUSTERS];
136 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
139 static struct lwkt_token objcachelist_token;
140 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
142 static struct magazine *
143 mag_alloc(int capacity)
145 struct magazine *mag;
147 mag = malloc(sizeof(struct magazine) + capacity * sizeof(void *),
148 M_OBJMAG, M_INTWAIT);
149 mag->capacity = capacity;
155 * Create an object cache.
158 objcache_create(char *name, int cluster_limit, int mag_capacity,
159 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *private,
160 objcache_alloc_fn *alloc, objcache_free_fn *free,
161 void *allocator_args)
164 struct magazinedepot *depot;
168 /* allocate object cache structure */
169 oc = malloc(sizeof(struct objcache) +
170 ncpus * sizeof(struct percpu_objcache), M_OBJCACHE, M_WAITOK);
171 oc->name = strdup(name, M_TEMP);
174 oc->private = private;
176 oc->allocator_args = allocator_args;
178 /* initialize depots */
179 depot = &oc->depot[0];
180 SLIST_INIT(&depot->fullmagazines);
181 SLIST_INIT(&depot->emptymagazines);
182 depot->cluster_limit = cluster_limit;
183 depot->cluster_balance = 0;
184 depot->emptymagazines_cumulative = 0;
185 lwkt_token_init(&depot->token);
186 if (mag_capacity == 0)
187 mag_capacity = INITIAL_MAG_CAPACITY;
188 depot->magcapacity = mag_capacity;
190 depot->contested = 0;
192 /* initialize per-cpu caches */
193 for (cpuid = 0; cpuid < ncpus; cpuid++) {
194 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
196 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
197 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
198 cache_percpu->gets_cumulative = 0;
199 cache_percpu->gets_null = 0;
200 cache_percpu->puts_cumulative = 0;
201 cache_percpu->puts_othercluster = 0;
204 lwkt_gettoken(&olock, &objcachelist_token);
205 SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
206 lwkt_reltoken(&olock);
211 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
212 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
214 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
217 * Get an object from the object cache.
220 objcache_get(struct objcache *oc, int ocflags)
222 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
223 struct magazine *loadedmag;
225 struct magazinedepot *depot;
229 ++cpucache->gets_cumulative;
233 * Loaded magazine has an object. This is the hot path.
234 * It is lock-free and uses a critical section to block
235 * out interrupt handlers on the same processor.
237 loadedmag = cpucache->loaded_magazine;
238 if (!MAGAZINE_EMPTY(loadedmag)) {
239 alloc: obj = loadedmag->objects[--loadedmag->rounds];
244 /* Previous magazine has an object. */
245 if (!MAGAZINE_EMPTY(cpucache->previous_magazine)) {
246 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
247 loadedmag = cpucache->loaded_magazine;
252 * Both magazines empty. Get a full magazine from the depot.
255 /* Obtain the depot token. */
256 depot = &oc->depot[myclusterid];
257 if (!lwkt_trytoken(&ilock, &depot->token)) {
258 lwkt_gettoken(&ilock, &depot->token);
260 if (!MAGAZINE_EMPTY(cpucache->loaded_magazine) ||
261 !MAGAZINE_EMPTY(cpucache->previous_magazine)) {
262 lwkt_reltoken(&ilock);
267 /* Check if depot has a full magazine. */
268 if (!SLIST_EMPTY(&depot->fullmagazines)) {
269 if (cpucache->previous_magazine->capacity == depot->magcapacity)
270 SLIST_INSERT_HEAD(&depot->emptymagazines,
271 cpucache->previous_magazine,
274 free(cpucache->previous_magazine, M_OBJMAG);
275 cpucache->previous_magazine = cpucache->loaded_magazine;
276 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
277 loadedmag = cpucache->loaded_magazine;
278 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
279 lwkt_reltoken(&ilock);
287 /* Check object allocation limit. */
288 if (depot->cluster_balance >= depot->cluster_limit) {
289 if (ocflags & M_NULLOK)
291 /* Wait until someone frees an existing object. */
292 if (ocflags & M_WAITOK) {
295 tsleep(depot, PCATCH, "objcache_get", 0);
298 lwkt_reltoken(&ilock);
304 /* Allocate a new object using the back-end allocator. */
305 obj = oc->alloc(oc->allocator_args, ocflags);
307 if (oc->ctor(obj, oc->private, ocflags)) {
308 ++depot->cluster_balance;
309 lwkt_reltoken(&ilock);
310 return (obj); /* common case */
312 oc->free(obj, oc->allocator_args);
316 ++cpucache->gets_null;
318 lwkt_reltoken(&ilock);
323 * Wrapper for malloc allocation routines.
326 objcache_malloc_alloc(void *allocator_args, int ocflags)
328 struct objcache_malloc_args *alloc_args = allocator_args;
330 return (malloc(alloc_args->objsize, alloc_args->mtype,
331 ocflags & OC_MFLAGS));
335 objcache_malloc_free(void *obj, void *allocator_args)
337 struct objcache_malloc_args *alloc_args = allocator_args;
339 free(obj, alloc_args->mtype);
343 * Wrapper for allocation policies that pre-allocate at initialization time
344 * and don't do run-time allocation.
347 objcache_nop_alloc(void *allocator_args, int ocflags)
353 objcache_nop_free(void *obj, void *allocator_args)
359 * Return an object to the object cache.
362 objcache_put(struct objcache *oc, void *obj)
364 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
365 struct magazine *loadedmag;
366 struct magazinedepot *depot;
368 struct magazine *emptymag;
371 ++cpucache->puts_cumulative;
373 if (CLUSTER_OF(obj) != myclusterid) {
375 /* use lazy IPI to send object to owning cluster XXX todo */
376 ++cpucache->puts_othercluster;
383 * Free slot available in loaded magazine. This is the hot path.
384 * It is lock-free and uses a critical section to block out interrupt
385 * handlers on the same processor.
387 loadedmag = cpucache->loaded_magazine;
388 if (!MAGAZINE_FULL(loadedmag)) {
389 free: loadedmag->objects[loadedmag->rounds++] = obj;
390 if (cpucache->waiting)
391 wakeup(&oc->depot[myclusterid]);
396 /* Current magazine full, but previous magazine empty. */
397 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
398 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
399 loadedmag = cpucache->loaded_magazine;
404 * Both magazines full. Get an empty magazine from the depot.
407 /* Obtain the depot token. */
408 depot = &oc->depot[myclusterid];
409 if (!lwkt_trytoken(&ilock, &depot->token)) {
411 lwkt_gettoken(&ilock, &depot->token);
414 if (!MAGAZINE_FULL(cpucache->loaded_magazine) ||
415 !MAGAZINE_FULL(cpucache->previous_magazine)) {
416 lwkt_reltoken(&ilock);
421 /* Check if depot has empty magazine. */
422 if (!SLIST_EMPTY(&depot->emptymagazines)) {
423 emptymag = SLIST_FIRST(&depot->emptymagazines);
424 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
425 haveemptymag: if (cpucache->previous_magazine->capacity == depot->magcapacity)
426 SLIST_INSERT_HEAD(&depot->fullmagazines,
427 cpucache->previous_magazine, nextmagazine);
429 free(cpucache->previous_magazine, M_OBJMAG);
430 cpucache->previous_magazine = cpucache->loaded_magazine;
431 cpucache->loaded_magazine = emptymag;
432 loadedmag = cpucache->loaded_magazine;
433 lwkt_reltoken(&ilock);
437 /* Allocate a new empty magazine. */
438 if (depot->cluster_balance < depot->cluster_limit + depot->magcapacity){
439 emptymag = mag_alloc(depot->magcapacity);
440 ++depot->emptymagazines_cumulative;
444 --depot->cluster_balance;
445 KKASSERT(depot->cluster_balance >= 0);
448 lwkt_reltoken(&ilock);
451 oc->dtor(obj, oc->private);
452 oc->free(obj, oc->allocator_args);
457 * Utility routine for objects that don't require any de-construction.
460 null_dtor(void *obj, void *private)
466 * De-construct and de-allocate objects in a magazine.
467 * Returns the number of objects freed.
468 * Does not de-allocate the magazine itself.
471 mag_purge(struct objcache *oc, struct magazine *mag)
476 for (i = 0; i < mag->rounds; i++) {
477 obj = mag->objects[i];
478 oc->dtor(obj, oc->private);
479 oc->free(obj, oc->allocator_args);
482 return (mag->rounds);
486 * De-allocate all magazines in a magazine list.
487 * Returns number of objects de-allocated.
490 maglist_purge(struct objcache *oc, struct magazinelist *maglist,
493 struct magazine *mag;
496 /* can't use SLIST_FOREACH because blocking releases the depot token */
497 while ((mag = SLIST_FIRST(maglist))) {
498 SLIST_REMOVE_HEAD(maglist, nextmagazine);
499 ndeleted += mag_purge(oc, mag); /* could block! */
500 free(mag, M_OBJMAG); /* could block! */
501 if (!purgeall && ndeleted > 0)
508 * De-allocates all magazines on the full and empty magazine lists.
511 depot_purge(struct magazinedepot *depot, struct objcache *oc)
513 depot->cluster_balance -= maglist_purge(oc, &depot->fullmagazines,
515 maglist_purge(oc, &depot->emptymagazines, TRUE);
520 objcache_reclaim(struct objcache *oc)
522 struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
523 struct magazinedepot *depot = &oc->depot[myclusterid];
525 mag_purge(oc, cache_percpu->loaded_magazine);
526 mag_purge(oc, cache_percpu->previous_magazine);
528 depot_purge(depot, oc);
533 * Try to free up some memory. Return as soon as some free memory found.
534 * For each object cache on the reclaim list, first try the current per-cpu
535 * cache, then the full magazine depot.
538 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
541 struct percpu_objcache *cpucache;
542 struct magazinedepot *depot;
546 for (i = 0; i < nlist; i++) {
548 cpucache = &oc->cache_percpu[mycpuid];
549 depot = &oc->depot[myclusterid];
552 if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
553 (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
555 lwkt_gettoken(&ilock, &depot->token);
556 depot->cluster_balance -= ndel;
557 lwkt_reltoken(&ilock);
561 lwkt_gettoken(&ilock, &depot->token);
563 maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
564 depot->cluster_balance -= ndel;
565 lwkt_reltoken(&ilock);
568 lwkt_reltoken(&ilock);
574 * Destroy an object cache. Must have no existing references.
575 * XXX Not clear this is a useful API function.
578 objcache_destroy(struct objcache *oc)
580 struct percpu_objcache *cache_percpu;
581 int clusterid, cpuid;
583 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
584 depot_purge(&oc->depot[clusterid], oc);
586 for (cpuid = 0; cpuid < ncpus; cpuid++) {
587 cache_percpu = &oc->cache_percpu[cpuid];
589 mag_purge(oc, cache_percpu->loaded_magazine);
590 free(cache_percpu->loaded_magazine, M_OBJMAG);
592 mag_purge(oc, cache_percpu->previous_magazine);
593 free(cache_percpu->previous_magazine, M_OBJMAG);
596 free(oc->name, M_TEMP);
597 free(oc, M_OBJCACHE);
601 * Populate the per-cluster depot with elements from a linear block
602 * of memory. Must be called for individually for each cluster.
603 * Populated depots should not be destroyed.
606 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
609 char *end = (char *)base + (nelts * size);
610 struct magazinedepot *depot = &oc->depot[myclusterid];
612 struct magazine sentinelfullmag = { 0, 0 };
613 struct magazine *emptymag = &sentinelfullmag;
615 lwkt_gettoken(&ilock, &depot->token);
617 if (MAGAZINE_FULL(emptymag)) {
618 emptymag = mag_alloc(depot->magcapacity);
619 ++depot->emptymagazines_cumulative;
620 SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
623 emptymag->objects[emptymag->rounds++] = p;
626 depot->cluster_balance += nelts;
627 lwkt_reltoken(&ilock);
632 * Check depot contention once a minute.
633 * 2 contested locks per second allowed.
635 static int objcache_rebalance_period;
636 static const int objcache_contention_rate = 120;
637 static struct callout objcache_callout;
639 #define MAXMAGSIZE 512
642 * Check depot contention and increase magazine size if necessary.
645 objcache_timer(void *dummy)
648 struct magazinedepot *depot;
649 lwkt_tokref olock, dlock;
651 lwkt_gettoken(&olock, &objcachelist_token);
652 SLIST_FOREACH(oc, &allobjcaches, oc_next) {
653 depot = &oc->depot[myclusterid];
654 if (depot->magcapacity < MAXMAGSIZE) {
655 if (depot->contested > objcache_contention_rate) {
656 lwkt_gettoken(&dlock, &depot->token);
657 depot_purge(depot, oc);
658 depot->magcapacity *= 2;
659 lwkt_reltoken(&dlock);
660 printf("objcache_timer: increasing cache %s"
661 " magsize to %d, contested %d times\n",
662 oc->name, depot->magcapacity,
665 depot->contested = 0;
668 lwkt_reltoken(&olock);
670 callout_reset(&objcache_callout, objcache_rebalance_period,
671 objcache_timer, NULL);
677 lwkt_token_init(&objcachelist_token);
678 objcache_rebalance_period = 60 * hz;
679 callout_init(&objcache_callout);
680 callout_reset(&objcache_callout, objcache_rebalance_period,
681 objcache_timer, NULL);
683 SYSINIT(objcache, SI_SUB_CPU, SI_ORDER_ANY, objcache_init, 0);