2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/kern_ccms.c,v 1.4 2007/04/30 07:18:53 dillon Exp $
37 * The Cache Coherency Management System (CCMS)
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/objcache.h>
46 #include <sys/sysctl.h>
48 #include <machine/limits.h>
50 struct ccms_lock_scan_info {
58 static int ccms_cst_cmp(ccms_cst_t b1, ccms_cst_t b2);
59 static int ccms_lock_scan_cmp(ccms_cst_t b1, void *arg);
60 static int ccms_lock_undo_cmp(ccms_cst_t b1, void *arg);
61 static int ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg);
62 static int ccms_lock_get_match(struct ccms_cst *cst, void *arg);
63 static int ccms_lock_undo_match(struct ccms_cst *cst, void *arg);
64 static int ccms_lock_redo_match(struct ccms_cst *cst, void *arg);
65 static int ccms_lock_put_match(struct ccms_cst *cst, void *arg);
67 RB_GENERATE3(ccms_rb_tree, ccms_cst, rbnode, ccms_cst_cmp,
68 off_t, beg_offset, end_offset);
69 static MALLOC_DEFINE(M_CCMS, "CCMS", "Cache Coherency Management System");
71 static int ccms_enable;
72 SYSCTL_INT(_kern, OID_AUTO, ccms_enable, CTLFLAG_RW, &ccms_enable, 0, "");
74 static struct objcache *ccms_oc;
77 * Initialize the CCMS subsystem
82 ccms_oc = objcache_create_simple(M_CCMS, sizeof(struct ccms_cst));
84 SYSINIT(ccms, SI_BOOT2_MACHDEP, SI_ORDER_ANY, ccmsinit, NULL);
87 * Initialize a new CCMS dataspace. Create a new RB tree with a single
88 * element covering the entire 64 bit offset range. This simplifies
89 * algorithms enormously by removing a number of special cases.
92 ccms_dataspace_init(ccms_dataspace_t ds)
99 cst = objcache_get(ccms_oc, M_WAITOK);
100 bzero(cst, sizeof(*cst));
101 cst->beg_offset = LLONG_MIN;
102 cst->end_offset = LLONG_MAX;
103 cst->state = CCMS_STATE_INVALID;
104 RB_INSERT(ccms_rb_tree, &ds->tree, cst);
108 * Destroy a CCMS dataspace.
111 ccms_dataspace_destroy(ccms_dataspace_t ds)
113 RB_SCAN(ccms_rb_tree, &ds->tree, NULL,
114 ccms_dataspace_destroy_match, ds);
119 ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg)
121 ccms_dataspace_t ds = arg;
123 RB_REMOVE(ccms_rb_tree, &ds->tree, cst);
124 objcache_put(ccms_oc, cst);
132 ccms_lock_get(ccms_dataspace_t ds, ccms_lock_t lock)
134 struct ccms_lock_scan_info info;
136 if (ccms_enable == 0) {
142 * Partition the CST space so the precise range is covered and
143 * attempt to obtain the requested local lock (ltype) at the same
149 info.coll_cst = NULL;
150 info.cst1 = objcache_get(ccms_oc, M_WAITOK);
151 info.cst2 = objcache_get(ccms_oc, M_WAITOK);
153 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
154 ccms_lock_get_match, &info);
157 * If a collision occured, undo the fragments we were able to obtain,
158 * block, and try again.
160 while (info.coll_cst != NULL) {
161 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_undo_cmp,
162 ccms_lock_undo_match, &info);
163 info.coll_cst->blocked = 1;
164 tsleep(info.coll_cst, 0,
165 ((lock->ltype == CCMS_LTYPE_SHARED) ? "rngsh" : "rngex"),
167 info.coll_cst = NULL;
168 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
169 ccms_lock_redo_match, &info);
176 objcache_put(ccms_oc, info.cst1);
178 objcache_put(ccms_oc, info.cst2);
184 * Obtain a CCMS lock, initialize the lock structure from the uio.
187 ccms_lock_get_uio(ccms_dataspace_t ds, ccms_lock_t lock, struct uio *uio)
192 if (uio->uio_rw == UIO_READ)
193 ltype = CCMS_LTYPE_SHARED;
195 ltype = CCMS_LTYPE_MODIFYING;
198 * Calculate the ending offset (byte inclusive), make sure a seek
199 * overflow does not blow us up.
201 eoff = uio->uio_offset + uio->uio_resid - 1;
202 if (eoff < uio->uio_offset)
203 eoff = 0x7FFFFFFFFFFFFFFFLL;
204 ccms_lock_init(lock, uio->uio_offset, eoff, ltype);
205 return(ccms_lock_get(ds, lock));
210 ccms_lock_get_match(ccms_cst_t cst, void *arg)
212 struct ccms_lock_scan_info *info = arg;
213 ccms_lock_t lock = info->lock;
217 * If the lock's left edge is within the CST we must split the CST
218 * into two pieces [cst][ncst]. lrefs must be bumped on the CST
219 * containing the left edge.
221 * NOTE! cst->beg_offset may not be modified. This allows us to avoid
222 * having to manipulate the cst's position in the tree.
224 if (lock->beg_offset > cst->beg_offset) {
227 KKASSERT(ncst != NULL);
229 cst->end_offset = lock->beg_offset - 1;
231 ncst->beg_offset = lock->beg_offset;
233 RB_INSERT(ccms_rb_tree, &info->ds->tree, ncst);
236 * ncst becomes our 'matching' cst.
239 } else if (lock->beg_offset == cst->beg_offset) {
244 * If the lock's right edge is within the CST we must split the CST
245 * into two pieces [cst][ncst]. rrefs must be bumped on the CST
246 * containing the right edge.
248 * NOTE! cst->beg_offset may not be modified. This allows us to avoid
249 * having to manipulate the cst's position in the tree.
251 if (lock->end_offset < cst->end_offset) {
254 KKASSERT(ncst != NULL);
256 cst->end_offset = lock->end_offset;
258 ncst->beg_offset = lock->end_offset + 1;
260 RB_INSERT(ccms_rb_tree, &info->ds->tree, ncst);
261 /* cst remains our 'matching' cst */
262 } else if (lock->end_offset == cst->end_offset) {
267 * The lock covers the CST, so increment the CST's coverage count.
268 * Then attempt to obtain the shared/exclusive ltype.
272 if (info->coll_cst == NULL) {
273 switch(lock->ltype) {
274 case CCMS_LTYPE_SHARED:
275 if (cst->sharecount < 0) {
276 info->coll_cst = cst;
279 if (ccms_enable >= 9) {
280 kprintf("CST SHARE %d %lld-%lld\n", cst->sharecount,
281 cst->beg_offset, cst->end_offset);
285 case CCMS_LTYPE_EXCLUSIVE:
286 if (cst->sharecount != 0) {
287 info->coll_cst = cst;
290 if (ccms_enable >= 9) {
291 kprintf("CST EXCLS %d %lld-%lld\n", cst->sharecount,
292 cst->beg_offset, cst->end_offset);
296 case CCMS_LTYPE_MODIFYING:
297 if (cst->sharecount != 0) {
298 info->coll_cst = cst;
302 if (ccms_enable >= 9) {
303 kprintf("CST MODXL %d %lld-%lld\n", cst->sharecount,
304 cst->beg_offset, cst->end_offset);
314 * Undo a partially resolved ccms_ltype rangelock. This is atomic with
315 * the scan/redo code so there should not be any blocked locks when
316 * transitioning to 0.
320 ccms_lock_undo_match(ccms_cst_t cst, void *arg)
322 struct ccms_lock_scan_info *info = arg;
323 ccms_lock_t lock = info->lock;
325 switch(lock->ltype) {
326 case CCMS_LTYPE_SHARED:
327 KKASSERT(cst->sharecount > 0);
329 KKASSERT(cst->sharecount || cst->blocked == 0);
331 case CCMS_LTYPE_EXCLUSIVE:
332 KKASSERT(cst->sharecount < 0);
334 KKASSERT(cst->sharecount || cst->blocked == 0);
336 case CCMS_LTYPE_MODIFYING:
337 KKASSERT(cst->sharecount < 0 && cst->modifycount > 0);
340 KKASSERT(cst->sharecount || cst->blocked == 0);
347 * Redo the local lock request for a range which has already been
352 ccms_lock_redo_match(ccms_cst_t cst, void *arg)
354 struct ccms_lock_scan_info *info = arg;
355 ccms_lock_t lock = info->lock;
357 if (info->coll_cst == NULL) {
358 switch(lock->ltype) {
359 case CCMS_LTYPE_SHARED:
360 if (cst->sharecount < 0) {
361 info->coll_cst = cst;
363 if (ccms_enable >= 9) {
364 kprintf("CST SHARE %d %lld-%lld\n", cst->sharecount,
365 cst->beg_offset, cst->end_offset);
370 case CCMS_LTYPE_EXCLUSIVE:
371 if (cst->sharecount != 0) {
372 info->coll_cst = cst;
375 if (ccms_enable >= 9) {
376 kprintf("CST EXCLS %d %lld-%lld\n", cst->sharecount,
377 cst->beg_offset, cst->end_offset);
381 case CCMS_LTYPE_MODIFYING:
382 if (cst->sharecount != 0) {
383 info->coll_cst = cst;
387 if (ccms_enable >= 9) {
388 kprintf("CST MODXL %d %lld-%lld\n", cst->sharecount,
389 cst->beg_offset, cst->end_offset);
399 * Release a CCMS lock
402 ccms_lock_put(ccms_dataspace_t ds, ccms_lock_t lock)
404 struct ccms_lock_scan_info info;
406 if (lock->ds == NULL)
415 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
416 ccms_lock_put_match, &info);
419 objcache_put(ccms_oc, info.cst1);
421 objcache_put(ccms_oc, info.cst2);
427 ccms_lock_put_match(ccms_cst_t cst, void *arg)
429 struct ccms_lock_scan_info *info = arg;
430 ccms_lock_t lock = info->lock;
434 * Undo the local shared/exclusive rangelock.
436 switch(lock->ltype) {
437 case CCMS_LTYPE_SHARED:
438 KKASSERT(cst->sharecount > 0);
440 if (ccms_enable >= 9) {
441 kprintf("CST UNSHR %d %lld-%lld (%d)\n", cst->sharecount,
442 cst->beg_offset, cst->end_offset, cst->blocked);
444 if (cst->blocked && cst->sharecount == 0) {
449 case CCMS_LTYPE_EXCLUSIVE:
450 KKASSERT(cst->sharecount < 0);
452 if (ccms_enable >= 9) {
453 kprintf("CST UNEXC %d %lld-%lld (%d)\n", cst->sharecount,
454 cst->beg_offset, cst->end_offset, cst->blocked);
456 if (cst->blocked && cst->sharecount == 0) {
461 case CCMS_LTYPE_MODIFYING:
462 KKASSERT(cst->sharecount < 0 && cst->modifycount > 0);
465 if (ccms_enable >= 9) {
466 kprintf("CST UNMOD %d %lld-%lld (%d)\n", cst->sharecount,
467 cst->beg_offset, cst->end_offset, cst->blocked);
469 if (cst->blocked && cst->sharecount == 0) {
477 * Decrement the lock coverage count on the CST. Decrement the left and
478 * right edge counts as appropriate.
480 * When lrefs or rrefs drops to zero we check the adjacent entry to
481 * determine whether a merge is possible. If the appropriate refs field
482 * (rrefs for the entry to our left, lrefs for the entry to our right)
483 * is 0, then all covering locks must cover both entries and the xrefs
484 * field must match. We can then merge the entries if they have
485 * compatible cache states.
487 * However, because we are cleaning up the shared/exclusive count at
488 * the same time, the sharecount field may be temporarily out of
489 * sync, so require that the sharecount field also match before doing
492 * When merging an element which is being blocked on, the blocking
493 * thread(s) will be woken up.
495 * If the dataspace has too many CSTs we may be able to merge the
496 * entries even if their cache states are not the same, by dropping
497 * both to a compatible (lower) cache state and performing the appropriate
498 * management operations. XXX
501 if (lock->beg_offset == cst->beg_offset) {
503 if (cst->lrefs == 0) {
504 if ((ocst = RB_PREV(ccms_rb_tree, &info->ds->tree, cst)) != NULL &&
506 ocst->state == cst->state &&
507 ocst->sharecount == cst->sharecount
509 KKASSERT(ocst->xrefs == cst->xrefs);
510 KKASSERT(ocst->end_offset + 1 == cst->beg_offset);
511 RB_REMOVE(ccms_rb_tree, &info->ds->tree, ocst);
512 cst->beg_offset = ocst->beg_offset;
513 cst->lrefs = ocst->lrefs;
514 if (ccms_enable >= 9) {
515 kprintf("MERGELEFT %p %lld-%lld (%d)\n",
516 ocst, cst->beg_offset, cst->end_offset,
523 objcache_put(ccms_oc, ocst);
527 if (lock->end_offset == cst->end_offset) {
529 if (cst->rrefs == 0) {
530 if ((ocst = RB_NEXT(ccms_rb_tree, &info->ds->tree, cst)) != NULL &&
532 ocst->state == cst->state &&
533 ocst->sharecount == cst->sharecount
535 KKASSERT(ocst->xrefs == cst->xrefs);
536 KKASSERT(cst->end_offset + 1 == ocst->beg_offset);
537 RB_REMOVE(ccms_rb_tree, &info->ds->tree, ocst);
538 cst->end_offset = ocst->end_offset;
539 cst->rrefs = ocst->rrefs;
540 if (ccms_enable >= 9) {
541 kprintf("MERGERIGHT %p %lld-%lld\n",
542 ocst, cst->beg_offset, cst->end_offset);
544 objcache_put(ccms_oc, ocst);
553 * RB tree compare function for insertions and deletions. This function
557 ccms_cst_cmp(ccms_cst_t b1, ccms_cst_t b2)
559 if (b1->end_offset < b2->beg_offset)
561 if (b1->beg_offset > b2->end_offset)
567 * RB tree scanning compare function. This function compares the CST
568 * from the tree against the supplied ccms_lock and returns the CST's
569 * placement relative to the lock.
572 ccms_lock_scan_cmp(ccms_cst_t cst, void *arg)
574 struct ccms_lock_scan_info *info = arg;
575 ccms_lock_t lock = info->lock;
577 if (cst->end_offset < lock->beg_offset)
579 if (cst->beg_offset > lock->end_offset)
585 * This function works like ccms_lock_scan_cmp but terminates at the
586 * collision point rather then at the lock's ending offset. Only
587 * the CSTs that were already partially resolved are returned by the scan.
590 ccms_lock_undo_cmp(ccms_cst_t cst, void *arg)
592 struct ccms_lock_scan_info *info = arg;
593 ccms_lock_t lock = info->lock;
595 if (cst->end_offset < lock->beg_offset)
597 if (cst->beg_offset >= info->coll_cst->beg_offset)