AMD64 - Fix many compile-time warnings. int/ptr type mismatches, %llx, etc.
[dragonfly.git] / sys / kern / kern_ccms.c
CommitLineData
9bfc4d6d
MD
1/*
2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
ba39e2e0 34 * $DragonFly: src/sys/kern/kern_ccms.c,v 1.4 2007/04/30 07:18:53 dillon Exp $
9bfc4d6d
MD
35 */
36/*
37 * The Cache Coherency Management System (CCMS)
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/objcache.h>
45#include <sys/ccms.h>
46#include <sys/sysctl.h>
47#include <sys/uio.h>
48#include <machine/limits.h>
49
50struct ccms_lock_scan_info {
51 ccms_dataspace_t ds;
52 ccms_lock_t lock;
53 ccms_cst_t cst1;
54 ccms_cst_t cst2;
55 ccms_cst_t coll_cst;
56};
57
58static int ccms_cst_cmp(ccms_cst_t b1, ccms_cst_t b2);
59static int ccms_lock_scan_cmp(ccms_cst_t b1, void *arg);
60static int ccms_lock_undo_cmp(ccms_cst_t b1, void *arg);
61static int ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg);
62static int ccms_lock_get_match(struct ccms_cst *cst, void *arg);
63static int ccms_lock_undo_match(struct ccms_cst *cst, void *arg);
64static int ccms_lock_redo_match(struct ccms_cst *cst, void *arg);
65static int ccms_lock_put_match(struct ccms_cst *cst, void *arg);
66
67RB_GENERATE3(ccms_rb_tree, ccms_cst, rbnode, ccms_cst_cmp,
68 off_t, beg_offset, end_offset);
69static MALLOC_DEFINE(M_CCMS, "CCMS", "Cache Coherency Management System");
70
71static int ccms_enable;
72SYSCTL_INT(_kern, OID_AUTO, ccms_enable, CTLFLAG_RW, &ccms_enable, 0, "");
73
74static struct objcache *ccms_oc;
75
76/*
77 * Initialize the CCMS subsystem
78 */
79static void
80ccmsinit(void *dummy)
81{
82 ccms_oc = objcache_create_simple(M_CCMS, sizeof(struct ccms_cst));
9bfc4d6d 83}
ba39e2e0 84SYSINIT(ccms, SI_BOOT2_MACHDEP, SI_ORDER_ANY, ccmsinit, NULL);
9bfc4d6d
MD
85
86/*
87 * Initialize a new CCMS dataspace. Create a new RB tree with a single
88 * element covering the entire 64 bit offset range. This simplifies
89 * algorithms enormously by removing a number of special cases.
90 */
91void
92ccms_dataspace_init(ccms_dataspace_t ds)
93{
94 ccms_cst_t cst;
95
96 RB_INIT(&ds->tree);
97 ds->info = NULL;
98 ds->chain = NULL;
99 cst = objcache_get(ccms_oc, M_WAITOK);
100 bzero(cst, sizeof(*cst));
101 cst->beg_offset = LLONG_MIN;
102 cst->end_offset = LLONG_MAX;
103 cst->state = CCMS_STATE_INVALID;
104 RB_INSERT(ccms_rb_tree, &ds->tree, cst);
105}
106
107/*
108 * Destroy a CCMS dataspace.
109 */
110void
111ccms_dataspace_destroy(ccms_dataspace_t ds)
112{
113 RB_SCAN(ccms_rb_tree, &ds->tree, NULL,
114 ccms_dataspace_destroy_match, ds);
115}
116
117static
118int
119ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg)
120{
121 ccms_dataspace_t ds = arg;
122
123 RB_REMOVE(ccms_rb_tree, &ds->tree, cst);
124 objcache_put(ccms_oc, cst);
125 return(0);
126}
127
128/*
129 * Obtain a CCMS lock
130 */
131int
132ccms_lock_get(ccms_dataspace_t ds, ccms_lock_t lock)
133{
134 struct ccms_lock_scan_info info;
135
136 if (ccms_enable == 0) {
137 lock->ds = NULL;
138 return(0);
139 }
140
141 /*
142 * Partition the CST space so the precise range is covered and
143 * attempt to obtain the requested local lock (ltype) at the same
144 * time.
145 */
146 lock->ds = ds;
147 info.lock = lock;
148 info.ds = ds;
149 info.coll_cst = NULL;
150 info.cst1 = objcache_get(ccms_oc, M_WAITOK);
151 info.cst2 = objcache_get(ccms_oc, M_WAITOK);
152
153 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
154 ccms_lock_get_match, &info);
155
156 /*
157 * If a collision occured, undo the fragments we were able to obtain,
158 * block, and try again.
159 */
160 while (info.coll_cst != NULL) {
161 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_undo_cmp,
162 ccms_lock_undo_match, &info);
163 info.coll_cst->blocked = 1;
164 tsleep(info.coll_cst, 0,
165 ((lock->ltype == CCMS_LTYPE_SHARED) ? "rngsh" : "rngex"),
166 hz);
167 info.coll_cst = NULL;
168 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
169 ccms_lock_redo_match, &info);
170 }
171
172 /*
173 * Cleanup
174 */
175 if (info.cst1)
176 objcache_put(ccms_oc, info.cst1);
177 if (info.cst2)
178 objcache_put(ccms_oc, info.cst2);
179
180 return(0);
181}
182
183/*
184 * Obtain a CCMS lock, initialize the lock structure from the uio.
185 */
186int
187ccms_lock_get_uio(ccms_dataspace_t ds, ccms_lock_t lock, struct uio *uio)
188{
189 ccms_ltype_t ltype;
190 off_t eoff;
191
192 if (uio->uio_rw == UIO_READ)
193 ltype = CCMS_LTYPE_SHARED;
194 else
195 ltype = CCMS_LTYPE_MODIFYING;
196
197 /*
198 * Calculate the ending offset (byte inclusive), make sure a seek
199 * overflow does not blow us up.
200 */
201 eoff = uio->uio_offset + uio->uio_resid - 1;
202 if (eoff < uio->uio_offset)
203 eoff = 0x7FFFFFFFFFFFFFFFLL;
204 ccms_lock_init(lock, uio->uio_offset, eoff, ltype);
205 return(ccms_lock_get(ds, lock));
206}
207
208static
209int
210ccms_lock_get_match(ccms_cst_t cst, void *arg)
211{
212 struct ccms_lock_scan_info *info = arg;
213 ccms_lock_t lock = info->lock;
214 ccms_cst_t ncst;
215
216 /*
217 * If the lock's left edge is within the CST we must split the CST
218 * into two pieces [cst][ncst]. lrefs must be bumped on the CST
219 * containing the left edge.
220 *
221 * NOTE! cst->beg_offset may not be modified. This allows us to avoid
222 * having to manipulate the cst's position in the tree.
223 */
224 if (lock->beg_offset > cst->beg_offset) {
225 ncst = info->cst1;
226 info->cst1 = NULL;
227 KKASSERT(ncst != NULL);
228 *ncst = *cst;
229 cst->end_offset = lock->beg_offset - 1;
230 cst->rrefs = 0;
231 ncst->beg_offset = lock->beg_offset;
232 ncst->lrefs = 1;
233 RB_INSERT(ccms_rb_tree, &info->ds->tree, ncst);
234
235 /*
236 * ncst becomes our 'matching' cst.
237 */
238 cst = ncst;
239 } else if (lock->beg_offset == cst->beg_offset) {
240 ++cst->lrefs;
241 }
242
243 /*
244 * If the lock's right edge is within the CST we must split the CST
245 * into two pieces [cst][ncst]. rrefs must be bumped on the CST
246 * containing the right edge.
247 *
248 * NOTE! cst->beg_offset may not be modified. This allows us to avoid
249 * having to manipulate the cst's position in the tree.
250 */
251 if (lock->end_offset < cst->end_offset) {
252 ncst = info->cst2;
253 info->cst2 = NULL;
254 KKASSERT(ncst != NULL);
255 *ncst = *cst;
256 cst->end_offset = lock->end_offset;
257 cst->rrefs = 1;
258 ncst->beg_offset = lock->end_offset + 1;
259 ncst->lrefs = 0;
260 RB_INSERT(ccms_rb_tree, &info->ds->tree, ncst);
261 /* cst remains our 'matching' cst */
262 } else if (lock->end_offset == cst->end_offset) {
263 ++cst->rrefs;
264 }
265
266 /*
267 * The lock covers the CST, so increment the CST's coverage count.
268 * Then attempt to obtain the shared/exclusive ltype.
269 */
270 ++cst->xrefs;
271
272 if (info->coll_cst == NULL) {
273 switch(lock->ltype) {
274 case CCMS_LTYPE_SHARED:
275 if (cst->sharecount < 0) {
276 info->coll_cst = cst;
277 } else {
278 ++cst->sharecount;
279 if (ccms_enable >= 9) {
973c11b9
MD
280 kprintf("CST SHARE %d %lld-%lld\n",
281 cst->sharecount,
282 (long long)cst->beg_offset,
283 (long long)cst->end_offset);
9bfc4d6d
MD
284 }
285 }
286 break;
287 case CCMS_LTYPE_EXCLUSIVE:
288 if (cst->sharecount != 0) {
289 info->coll_cst = cst;
290 } else {
291 --cst->sharecount;
292 if (ccms_enable >= 9) {
973c11b9
MD
293 kprintf("CST EXCLS %d %lld-%lld\n",
294 cst->sharecount,
295 (long long)cst->beg_offset,
296 (long long)cst->end_offset);
9bfc4d6d
MD
297 }
298 }
299 break;
300 case CCMS_LTYPE_MODIFYING:
301 if (cst->sharecount != 0) {
302 info->coll_cst = cst;
303 } else {
304 --cst->sharecount;
305 ++cst->modifycount;
306 if (ccms_enable >= 9) {
973c11b9
MD
307 kprintf("CST MODXL %d %lld-%lld\n",
308 cst->sharecount,
309 (long long)cst->beg_offset,
310 (long long)cst->end_offset);
9bfc4d6d
MD
311 }
312 }
313 break;
314 }
315 }
316 return(0);
317}
318
319/*
320 * Undo a partially resolved ccms_ltype rangelock. This is atomic with
321 * the scan/redo code so there should not be any blocked locks when
322 * transitioning to 0.
323 */
324static
325int
326ccms_lock_undo_match(ccms_cst_t cst, void *arg)
327{
328 struct ccms_lock_scan_info *info = arg;
329 ccms_lock_t lock = info->lock;
330
331 switch(lock->ltype) {
332 case CCMS_LTYPE_SHARED:
333 KKASSERT(cst->sharecount > 0);
334 --cst->sharecount;
335 KKASSERT(cst->sharecount || cst->blocked == 0);
336 break;
337 case CCMS_LTYPE_EXCLUSIVE:
338 KKASSERT(cst->sharecount < 0);
339 ++cst->sharecount;
340 KKASSERT(cst->sharecount || cst->blocked == 0);
341 break;
342 case CCMS_LTYPE_MODIFYING:
343 KKASSERT(cst->sharecount < 0 && cst->modifycount > 0);
344 ++cst->sharecount;
345 --cst->modifycount;
346 KKASSERT(cst->sharecount || cst->blocked == 0);
347 break;
348 }
349 return(0);
350}
351
352/*
353 * Redo the local lock request for a range which has already been
354 * partitioned.
355 */
356static
357int
358ccms_lock_redo_match(ccms_cst_t cst, void *arg)
359{
360 struct ccms_lock_scan_info *info = arg;
361 ccms_lock_t lock = info->lock;
362
363 if (info->coll_cst == NULL) {
364 switch(lock->ltype) {
365 case CCMS_LTYPE_SHARED:
366 if (cst->sharecount < 0) {
367 info->coll_cst = cst;
368 } else {
369 if (ccms_enable >= 9) {
973c11b9
MD
370 kprintf("CST SHARE %d %lld-%lld\n",
371 cst->sharecount,
372 (long long)cst->beg_offset,
373 (long long)cst->end_offset);
9bfc4d6d
MD
374 }
375 ++cst->sharecount;
376 }
377 break;
378 case CCMS_LTYPE_EXCLUSIVE:
379 if (cst->sharecount != 0) {
380 info->coll_cst = cst;
381 } else {
382 --cst->sharecount;
383 if (ccms_enable >= 9) {
973c11b9
MD
384 kprintf("CST EXCLS %d %lld-%lld\n",
385 cst->sharecount,
386 (long long)cst->beg_offset,
387 (long long)cst->end_offset);
9bfc4d6d
MD
388 }
389 }
390 break;
391 case CCMS_LTYPE_MODIFYING:
392 if (cst->sharecount != 0) {
393 info->coll_cst = cst;
394 } else {
395 --cst->sharecount;
396 ++cst->modifycount;
397 if (ccms_enable >= 9) {
973c11b9
MD
398 kprintf("CST MODXL %d %lld-%lld\n",
399 cst->sharecount,
400 (long long)cst->beg_offset,
401 (long long)cst->end_offset);
9bfc4d6d
MD
402 }
403 }
404 break;
405 }
406 }
407 return(0);
408}
409
410/*
411 * Release a CCMS lock
412 */
413int
414ccms_lock_put(ccms_dataspace_t ds, ccms_lock_t lock)
415{
416 struct ccms_lock_scan_info info;
417
418 if (lock->ds == NULL)
419 return(0);
420
421 lock->ds = NULL;
422 info.lock = lock;
423 info.ds = ds;
424 info.cst1 = NULL;
425 info.cst2 = NULL;
426
427 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
428 ccms_lock_put_match, &info);
429
430 if (info.cst1)
431 objcache_put(ccms_oc, info.cst1);
432 if (info.cst2)
433 objcache_put(ccms_oc, info.cst2);
434 return(0);
435}
436
437static
438int
439ccms_lock_put_match(ccms_cst_t cst, void *arg)
440{
441 struct ccms_lock_scan_info *info = arg;
442 ccms_lock_t lock = info->lock;
443 ccms_cst_t ocst;
444
445 /*
446 * Undo the local shared/exclusive rangelock.
447 */
448 switch(lock->ltype) {
449 case CCMS_LTYPE_SHARED:
450 KKASSERT(cst->sharecount > 0);
451 --cst->sharecount;
452 if (ccms_enable >= 9) {
6ea70f76 453 kprintf("CST UNSHR %d %lld-%lld (%d)\n", cst->sharecount,
973c11b9
MD
454 (long long)cst->beg_offset,
455 (long long)cst->end_offset,
456 cst->blocked);
9bfc4d6d
MD
457 }
458 if (cst->blocked && cst->sharecount == 0) {
459 cst->blocked = 0;
460 wakeup(cst);
461 }
462 break;
463 case CCMS_LTYPE_EXCLUSIVE:
464 KKASSERT(cst->sharecount < 0);
465 ++cst->sharecount;
466 if (ccms_enable >= 9) {
6ea70f76 467 kprintf("CST UNEXC %d %lld-%lld (%d)\n", cst->sharecount,
973c11b9
MD
468 (long long)cst->beg_offset,
469 (long long)cst->end_offset,
470 cst->blocked);
9bfc4d6d
MD
471 }
472 if (cst->blocked && cst->sharecount == 0) {
473 cst->blocked = 0;
474 wakeup(cst);
475 }
476 break;
477 case CCMS_LTYPE_MODIFYING:
478 KKASSERT(cst->sharecount < 0 && cst->modifycount > 0);
479 ++cst->sharecount;
480 --cst->modifycount;
481 if (ccms_enable >= 9) {
6ea70f76 482 kprintf("CST UNMOD %d %lld-%lld (%d)\n", cst->sharecount,
973c11b9
MD
483 (long long)cst->beg_offset,
484 (long long)cst->end_offset,
485 cst->blocked);
9bfc4d6d
MD
486 }
487 if (cst->blocked && cst->sharecount == 0) {
488 cst->blocked = 0;
489 wakeup(cst);
490 }
491 break;
492 }
493
494 /*
495 * Decrement the lock coverage count on the CST. Decrement the left and
496 * right edge counts as appropriate.
497 *
498 * When lrefs or rrefs drops to zero we check the adjacent entry to
499 * determine whether a merge is possible. If the appropriate refs field
500 * (rrefs for the entry to our left, lrefs for the entry to our right)
501 * is 0, then all covering locks must cover both entries and the xrefs
502 * field must match. We can then merge the entries if they have
503 * compatible cache states.
504 *
505 * However, because we are cleaning up the shared/exclusive count at
506 * the same time, the sharecount field may be temporarily out of
507 * sync, so require that the sharecount field also match before doing
508 * a merge.
509 *
510 * When merging an element which is being blocked on, the blocking
511 * thread(s) will be woken up.
512 *
513 * If the dataspace has too many CSTs we may be able to merge the
514 * entries even if their cache states are not the same, by dropping
515 * both to a compatible (lower) cache state and performing the appropriate
516 * management operations. XXX
517 */
518 --cst->xrefs;
519 if (lock->beg_offset == cst->beg_offset) {
520 --cst->lrefs;
521 if (cst->lrefs == 0) {
522 if ((ocst = RB_PREV(ccms_rb_tree, &info->ds->tree, cst)) != NULL &&
523 ocst->rrefs == 0 &&
524 ocst->state == cst->state &&
525 ocst->sharecount == cst->sharecount
526 ) {
527 KKASSERT(ocst->xrefs == cst->xrefs);
528 KKASSERT(ocst->end_offset + 1 == cst->beg_offset);
529 RB_REMOVE(ccms_rb_tree, &info->ds->tree, ocst);
530 cst->beg_offset = ocst->beg_offset;
531 cst->lrefs = ocst->lrefs;
532 if (ccms_enable >= 9) {
6ea70f76 533 kprintf("MERGELEFT %p %lld-%lld (%d)\n",
973c11b9
MD
534 ocst,
535 (long long)cst->beg_offset,
536 (long long)cst->end_offset,
9bfc4d6d
MD
537 cst->blocked);
538 }
539 if (ocst->blocked) {
540 ocst->blocked = 0;
541 wakeup(ocst);
542 }
543 objcache_put(ccms_oc, ocst);
544 }
545 }
546 }
547 if (lock->end_offset == cst->end_offset) {
548 --cst->rrefs;
549 if (cst->rrefs == 0) {
550 if ((ocst = RB_NEXT(ccms_rb_tree, &info->ds->tree, cst)) != NULL &&
551 ocst->lrefs == 0 &&
552 ocst->state == cst->state &&
553 ocst->sharecount == cst->sharecount
554 ) {
555 KKASSERT(ocst->xrefs == cst->xrefs);
556 KKASSERT(cst->end_offset + 1 == ocst->beg_offset);
557 RB_REMOVE(ccms_rb_tree, &info->ds->tree, ocst);
558 cst->end_offset = ocst->end_offset;
559 cst->rrefs = ocst->rrefs;
560 if (ccms_enable >= 9) {
6ea70f76 561 kprintf("MERGERIGHT %p %lld-%lld\n",
973c11b9
MD
562 ocst,
563 (long long)cst->beg_offset,
564 (long long)cst->end_offset);
9bfc4d6d
MD
565 }
566 objcache_put(ccms_oc, ocst);
567 }
568 }
569 }
570 return(0);
571}
572
573
574/*
575 * RB tree compare function for insertions and deletions. This function
576 * compares two CSTs.
577 */
578static int
579ccms_cst_cmp(ccms_cst_t b1, ccms_cst_t b2)
580{
581 if (b1->end_offset < b2->beg_offset)
582 return(-1);
583 if (b1->beg_offset > b2->end_offset)
584 return(1);
585 return(0);
586}
587
588/*
589 * RB tree scanning compare function. This function compares the CST
590 * from the tree against the supplied ccms_lock and returns the CST's
591 * placement relative to the lock.
592 */
593static int
594ccms_lock_scan_cmp(ccms_cst_t cst, void *arg)
595{
596 struct ccms_lock_scan_info *info = arg;
597 ccms_lock_t lock = info->lock;
598
599 if (cst->end_offset < lock->beg_offset)
600 return(-1);
601 if (cst->beg_offset > lock->end_offset)
602 return(1);
603 return(0);
604}
605
606/*
607 * This function works like ccms_lock_scan_cmp but terminates at the
608 * collision point rather then at the lock's ending offset. Only
609 * the CSTs that were already partially resolved are returned by the scan.
610 */
611static int
612ccms_lock_undo_cmp(ccms_cst_t cst, void *arg)
613{
614 struct ccms_lock_scan_info *info = arg;
615 ccms_lock_t lock = info->lock;
616
617 if (cst->end_offset < lock->beg_offset)
618 return(-1);
619 if (cst->beg_offset >= info->coll_cst->beg_offset)
620 return(1);
621 return(0);
622}
623