Disallow writes to filesystems mounted read-only via NULLFS. In this case
[dragonfly.git] / sys / kern / kern_ccms.c
CommitLineData
9bfc4d6d
MD
1/*
2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/kern/kern_ccms.c,v 1.1 2006/08/23 06:45:39 dillon Exp $
35 */
36/*
37 * The Cache Coherency Management System (CCMS)
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/objcache.h>
45#include <sys/ccms.h>
46#include <sys/sysctl.h>
47#include <sys/uio.h>
48#include <machine/limits.h>
49
50struct ccms_lock_scan_info {
51 ccms_dataspace_t ds;
52 ccms_lock_t lock;
53 ccms_cst_t cst1;
54 ccms_cst_t cst2;
55 ccms_cst_t coll_cst;
56};
57
58static int ccms_cst_cmp(ccms_cst_t b1, ccms_cst_t b2);
59static int ccms_lock_scan_cmp(ccms_cst_t b1, void *arg);
60static int ccms_lock_undo_cmp(ccms_cst_t b1, void *arg);
61static int ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg);
62static int ccms_lock_get_match(struct ccms_cst *cst, void *arg);
63static int ccms_lock_undo_match(struct ccms_cst *cst, void *arg);
64static int ccms_lock_redo_match(struct ccms_cst *cst, void *arg);
65static int ccms_lock_put_match(struct ccms_cst *cst, void *arg);
66
67RB_GENERATE3(ccms_rb_tree, ccms_cst, rbnode, ccms_cst_cmp,
68 off_t, beg_offset, end_offset);
69static MALLOC_DEFINE(M_CCMS, "CCMS", "Cache Coherency Management System");
70
71static int ccms_enable;
72SYSCTL_INT(_kern, OID_AUTO, ccms_enable, CTLFLAG_RW, &ccms_enable, 0, "");
73
74static struct objcache *ccms_oc;
75
76/*
77 * Initialize the CCMS subsystem
78 */
79static void
80ccmsinit(void *dummy)
81{
82 ccms_oc = objcache_create_simple(M_CCMS, sizeof(struct ccms_cst));
83 printf("CCMSINIT\n");
84}
85SYSINIT(ccms, SI_SUB_OBJCACHE, SI_ORDER_ANY, ccmsinit, NULL);
86
87/*
88 * Initialize a new CCMS dataspace. Create a new RB tree with a single
89 * element covering the entire 64 bit offset range. This simplifies
90 * algorithms enormously by removing a number of special cases.
91 */
92void
93ccms_dataspace_init(ccms_dataspace_t ds)
94{
95 ccms_cst_t cst;
96
97 RB_INIT(&ds->tree);
98 ds->info = NULL;
99 ds->chain = NULL;
100 cst = objcache_get(ccms_oc, M_WAITOK);
101 bzero(cst, sizeof(*cst));
102 cst->beg_offset = LLONG_MIN;
103 cst->end_offset = LLONG_MAX;
104 cst->state = CCMS_STATE_INVALID;
105 RB_INSERT(ccms_rb_tree, &ds->tree, cst);
106}
107
108/*
109 * Destroy a CCMS dataspace.
110 */
111void
112ccms_dataspace_destroy(ccms_dataspace_t ds)
113{
114 RB_SCAN(ccms_rb_tree, &ds->tree, NULL,
115 ccms_dataspace_destroy_match, ds);
116}
117
118static
119int
120ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg)
121{
122 ccms_dataspace_t ds = arg;
123
124 RB_REMOVE(ccms_rb_tree, &ds->tree, cst);
125 objcache_put(ccms_oc, cst);
126 return(0);
127}
128
129/*
130 * Obtain a CCMS lock
131 */
132int
133ccms_lock_get(ccms_dataspace_t ds, ccms_lock_t lock)
134{
135 struct ccms_lock_scan_info info;
136
137 if (ccms_enable == 0) {
138 lock->ds = NULL;
139 return(0);
140 }
141
142 /*
143 * Partition the CST space so the precise range is covered and
144 * attempt to obtain the requested local lock (ltype) at the same
145 * time.
146 */
147 lock->ds = ds;
148 info.lock = lock;
149 info.ds = ds;
150 info.coll_cst = NULL;
151 info.cst1 = objcache_get(ccms_oc, M_WAITOK);
152 info.cst2 = objcache_get(ccms_oc, M_WAITOK);
153
154 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
155 ccms_lock_get_match, &info);
156
157 /*
158 * If a collision occured, undo the fragments we were able to obtain,
159 * block, and try again.
160 */
161 while (info.coll_cst != NULL) {
162 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_undo_cmp,
163 ccms_lock_undo_match, &info);
164 info.coll_cst->blocked = 1;
165 tsleep(info.coll_cst, 0,
166 ((lock->ltype == CCMS_LTYPE_SHARED) ? "rngsh" : "rngex"),
167 hz);
168 info.coll_cst = NULL;
169 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
170 ccms_lock_redo_match, &info);
171 }
172
173 /*
174 * Cleanup
175 */
176 if (info.cst1)
177 objcache_put(ccms_oc, info.cst1);
178 if (info.cst2)
179 objcache_put(ccms_oc, info.cst2);
180
181 return(0);
182}
183
184/*
185 * Obtain a CCMS lock, initialize the lock structure from the uio.
186 */
187int
188ccms_lock_get_uio(ccms_dataspace_t ds, ccms_lock_t lock, struct uio *uio)
189{
190 ccms_ltype_t ltype;
191 off_t eoff;
192
193 if (uio->uio_rw == UIO_READ)
194 ltype = CCMS_LTYPE_SHARED;
195 else
196 ltype = CCMS_LTYPE_MODIFYING;
197
198 /*
199 * Calculate the ending offset (byte inclusive), make sure a seek
200 * overflow does not blow us up.
201 */
202 eoff = uio->uio_offset + uio->uio_resid - 1;
203 if (eoff < uio->uio_offset)
204 eoff = 0x7FFFFFFFFFFFFFFFLL;
205 ccms_lock_init(lock, uio->uio_offset, eoff, ltype);
206 return(ccms_lock_get(ds, lock));
207}
208
209static
210int
211ccms_lock_get_match(ccms_cst_t cst, void *arg)
212{
213 struct ccms_lock_scan_info *info = arg;
214 ccms_lock_t lock = info->lock;
215 ccms_cst_t ncst;
216
217 /*
218 * If the lock's left edge is within the CST we must split the CST
219 * into two pieces [cst][ncst]. lrefs must be bumped on the CST
220 * containing the left edge.
221 *
222 * NOTE! cst->beg_offset may not be modified. This allows us to avoid
223 * having to manipulate the cst's position in the tree.
224 */
225 if (lock->beg_offset > cst->beg_offset) {
226 ncst = info->cst1;
227 info->cst1 = NULL;
228 KKASSERT(ncst != NULL);
229 *ncst = *cst;
230 cst->end_offset = lock->beg_offset - 1;
231 cst->rrefs = 0;
232 ncst->beg_offset = lock->beg_offset;
233 ncst->lrefs = 1;
234 RB_INSERT(ccms_rb_tree, &info->ds->tree, ncst);
235
236 /*
237 * ncst becomes our 'matching' cst.
238 */
239 cst = ncst;
240 } else if (lock->beg_offset == cst->beg_offset) {
241 ++cst->lrefs;
242 }
243
244 /*
245 * If the lock's right edge is within the CST we must split the CST
246 * into two pieces [cst][ncst]. rrefs must be bumped on the CST
247 * containing the right edge.
248 *
249 * NOTE! cst->beg_offset may not be modified. This allows us to avoid
250 * having to manipulate the cst's position in the tree.
251 */
252 if (lock->end_offset < cst->end_offset) {
253 ncst = info->cst2;
254 info->cst2 = NULL;
255 KKASSERT(ncst != NULL);
256 *ncst = *cst;
257 cst->end_offset = lock->end_offset;
258 cst->rrefs = 1;
259 ncst->beg_offset = lock->end_offset + 1;
260 ncst->lrefs = 0;
261 RB_INSERT(ccms_rb_tree, &info->ds->tree, ncst);
262 /* cst remains our 'matching' cst */
263 } else if (lock->end_offset == cst->end_offset) {
264 ++cst->rrefs;
265 }
266
267 /*
268 * The lock covers the CST, so increment the CST's coverage count.
269 * Then attempt to obtain the shared/exclusive ltype.
270 */
271 ++cst->xrefs;
272
273 if (info->coll_cst == NULL) {
274 switch(lock->ltype) {
275 case CCMS_LTYPE_SHARED:
276 if (cst->sharecount < 0) {
277 info->coll_cst = cst;
278 } else {
279 ++cst->sharecount;
280 if (ccms_enable >= 9) {
281 printf("CST SHARE %d %lld-%lld\n", cst->sharecount,
282 cst->beg_offset, cst->end_offset);
283 }
284 }
285 break;
286 case CCMS_LTYPE_EXCLUSIVE:
287 if (cst->sharecount != 0) {
288 info->coll_cst = cst;
289 } else {
290 --cst->sharecount;
291 if (ccms_enable >= 9) {
292 printf("CST EXCLS %d %lld-%lld\n", cst->sharecount,
293 cst->beg_offset, cst->end_offset);
294 }
295 }
296 break;
297 case CCMS_LTYPE_MODIFYING:
298 if (cst->sharecount != 0) {
299 info->coll_cst = cst;
300 } else {
301 --cst->sharecount;
302 ++cst->modifycount;
303 if (ccms_enable >= 9) {
304 printf("CST MODXL %d %lld-%lld\n", cst->sharecount,
305 cst->beg_offset, cst->end_offset);
306 }
307 }
308 break;
309 }
310 }
311 return(0);
312}
313
314/*
315 * Undo a partially resolved ccms_ltype rangelock. This is atomic with
316 * the scan/redo code so there should not be any blocked locks when
317 * transitioning to 0.
318 */
319static
320int
321ccms_lock_undo_match(ccms_cst_t cst, void *arg)
322{
323 struct ccms_lock_scan_info *info = arg;
324 ccms_lock_t lock = info->lock;
325
326 switch(lock->ltype) {
327 case CCMS_LTYPE_SHARED:
328 KKASSERT(cst->sharecount > 0);
329 --cst->sharecount;
330 KKASSERT(cst->sharecount || cst->blocked == 0);
331 break;
332 case CCMS_LTYPE_EXCLUSIVE:
333 KKASSERT(cst->sharecount < 0);
334 ++cst->sharecount;
335 KKASSERT(cst->sharecount || cst->blocked == 0);
336 break;
337 case CCMS_LTYPE_MODIFYING:
338 KKASSERT(cst->sharecount < 0 && cst->modifycount > 0);
339 ++cst->sharecount;
340 --cst->modifycount;
341 KKASSERT(cst->sharecount || cst->blocked == 0);
342 break;
343 }
344 return(0);
345}
346
347/*
348 * Redo the local lock request for a range which has already been
349 * partitioned.
350 */
351static
352int
353ccms_lock_redo_match(ccms_cst_t cst, void *arg)
354{
355 struct ccms_lock_scan_info *info = arg;
356 ccms_lock_t lock = info->lock;
357
358 if (info->coll_cst == NULL) {
359 switch(lock->ltype) {
360 case CCMS_LTYPE_SHARED:
361 if (cst->sharecount < 0) {
362 info->coll_cst = cst;
363 } else {
364 if (ccms_enable >= 9) {
365 printf("CST SHARE %d %lld-%lld\n", cst->sharecount,
366 cst->beg_offset, cst->end_offset);
367 }
368 ++cst->sharecount;
369 }
370 break;
371 case CCMS_LTYPE_EXCLUSIVE:
372 if (cst->sharecount != 0) {
373 info->coll_cst = cst;
374 } else {
375 --cst->sharecount;
376 if (ccms_enable >= 9) {
377 printf("CST EXCLS %d %lld-%lld\n", cst->sharecount,
378 cst->beg_offset, cst->end_offset);
379 }
380 }
381 break;
382 case CCMS_LTYPE_MODIFYING:
383 if (cst->sharecount != 0) {
384 info->coll_cst = cst;
385 } else {
386 --cst->sharecount;
387 ++cst->modifycount;
388 if (ccms_enable >= 9) {
389 printf("CST MODXL %d %lld-%lld\n", cst->sharecount,
390 cst->beg_offset, cst->end_offset);
391 }
392 }
393 break;
394 }
395 }
396 return(0);
397}
398
399/*
400 * Release a CCMS lock
401 */
402int
403ccms_lock_put(ccms_dataspace_t ds, ccms_lock_t lock)
404{
405 struct ccms_lock_scan_info info;
406
407 if (lock->ds == NULL)
408 return(0);
409
410 lock->ds = NULL;
411 info.lock = lock;
412 info.ds = ds;
413 info.cst1 = NULL;
414 info.cst2 = NULL;
415
416 RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
417 ccms_lock_put_match, &info);
418
419 if (info.cst1)
420 objcache_put(ccms_oc, info.cst1);
421 if (info.cst2)
422 objcache_put(ccms_oc, info.cst2);
423 return(0);
424}
425
426static
427int
428ccms_lock_put_match(ccms_cst_t cst, void *arg)
429{
430 struct ccms_lock_scan_info *info = arg;
431 ccms_lock_t lock = info->lock;
432 ccms_cst_t ocst;
433
434 /*
435 * Undo the local shared/exclusive rangelock.
436 */
437 switch(lock->ltype) {
438 case CCMS_LTYPE_SHARED:
439 KKASSERT(cst->sharecount > 0);
440 --cst->sharecount;
441 if (ccms_enable >= 9) {
442 printf("CST UNSHR %d %lld-%lld (%d)\n", cst->sharecount,
443 cst->beg_offset, cst->end_offset, cst->blocked);
444 }
445 if (cst->blocked && cst->sharecount == 0) {
446 cst->blocked = 0;
447 wakeup(cst);
448 }
449 break;
450 case CCMS_LTYPE_EXCLUSIVE:
451 KKASSERT(cst->sharecount < 0);
452 ++cst->sharecount;
453 if (ccms_enable >= 9) {
454 printf("CST UNEXC %d %lld-%lld (%d)\n", cst->sharecount,
455 cst->beg_offset, cst->end_offset, cst->blocked);
456 }
457 if (cst->blocked && cst->sharecount == 0) {
458 cst->blocked = 0;
459 wakeup(cst);
460 }
461 break;
462 case CCMS_LTYPE_MODIFYING:
463 KKASSERT(cst->sharecount < 0 && cst->modifycount > 0);
464 ++cst->sharecount;
465 --cst->modifycount;
466 if (ccms_enable >= 9) {
467 printf("CST UNMOD %d %lld-%lld (%d)\n", cst->sharecount,
468 cst->beg_offset, cst->end_offset, cst->blocked);
469 }
470 if (cst->blocked && cst->sharecount == 0) {
471 cst->blocked = 0;
472 wakeup(cst);
473 }
474 break;
475 }
476
477 /*
478 * Decrement the lock coverage count on the CST. Decrement the left and
479 * right edge counts as appropriate.
480 *
481 * When lrefs or rrefs drops to zero we check the adjacent entry to
482 * determine whether a merge is possible. If the appropriate refs field
483 * (rrefs for the entry to our left, lrefs for the entry to our right)
484 * is 0, then all covering locks must cover both entries and the xrefs
485 * field must match. We can then merge the entries if they have
486 * compatible cache states.
487 *
488 * However, because we are cleaning up the shared/exclusive count at
489 * the same time, the sharecount field may be temporarily out of
490 * sync, so require that the sharecount field also match before doing
491 * a merge.
492 *
493 * When merging an element which is being blocked on, the blocking
494 * thread(s) will be woken up.
495 *
496 * If the dataspace has too many CSTs we may be able to merge the
497 * entries even if their cache states are not the same, by dropping
498 * both to a compatible (lower) cache state and performing the appropriate
499 * management operations. XXX
500 */
501 --cst->xrefs;
502 if (lock->beg_offset == cst->beg_offset) {
503 --cst->lrefs;
504 if (cst->lrefs == 0) {
505 if ((ocst = RB_PREV(ccms_rb_tree, &info->ds->tree, cst)) != NULL &&
506 ocst->rrefs == 0 &&
507 ocst->state == cst->state &&
508 ocst->sharecount == cst->sharecount
509 ) {
510 KKASSERT(ocst->xrefs == cst->xrefs);
511 KKASSERT(ocst->end_offset + 1 == cst->beg_offset);
512 RB_REMOVE(ccms_rb_tree, &info->ds->tree, ocst);
513 cst->beg_offset = ocst->beg_offset;
514 cst->lrefs = ocst->lrefs;
515 if (ccms_enable >= 9) {
516 printf("MERGELEFT %p %lld-%lld (%d)\n",
517 ocst, cst->beg_offset, cst->end_offset,
518 cst->blocked);
519 }
520 if (ocst->blocked) {
521 ocst->blocked = 0;
522 wakeup(ocst);
523 }
524 objcache_put(ccms_oc, ocst);
525 }
526 }
527 }
528 if (lock->end_offset == cst->end_offset) {
529 --cst->rrefs;
530 if (cst->rrefs == 0) {
531 if ((ocst = RB_NEXT(ccms_rb_tree, &info->ds->tree, cst)) != NULL &&
532 ocst->lrefs == 0 &&
533 ocst->state == cst->state &&
534 ocst->sharecount == cst->sharecount
535 ) {
536 KKASSERT(ocst->xrefs == cst->xrefs);
537 KKASSERT(cst->end_offset + 1 == ocst->beg_offset);
538 RB_REMOVE(ccms_rb_tree, &info->ds->tree, ocst);
539 cst->end_offset = ocst->end_offset;
540 cst->rrefs = ocst->rrefs;
541 if (ccms_enable >= 9) {
542 printf("MERGERIGHT %p %lld-%lld\n",
543 ocst, cst->beg_offset, cst->end_offset);
544 }
545 objcache_put(ccms_oc, ocst);
546 }
547 }
548 }
549 return(0);
550}
551
552
553/*
554 * RB tree compare function for insertions and deletions. This function
555 * compares two CSTs.
556 */
557static int
558ccms_cst_cmp(ccms_cst_t b1, ccms_cst_t b2)
559{
560 if (b1->end_offset < b2->beg_offset)
561 return(-1);
562 if (b1->beg_offset > b2->end_offset)
563 return(1);
564 return(0);
565}
566
567/*
568 * RB tree scanning compare function. This function compares the CST
569 * from the tree against the supplied ccms_lock and returns the CST's
570 * placement relative to the lock.
571 */
572static int
573ccms_lock_scan_cmp(ccms_cst_t cst, void *arg)
574{
575 struct ccms_lock_scan_info *info = arg;
576 ccms_lock_t lock = info->lock;
577
578 if (cst->end_offset < lock->beg_offset)
579 return(-1);
580 if (cst->beg_offset > lock->end_offset)
581 return(1);
582 return(0);
583}
584
585/*
586 * This function works like ccms_lock_scan_cmp but terminates at the
587 * collision point rather then at the lock's ending offset. Only
588 * the CSTs that were already partially resolved are returned by the scan.
589 */
590static int
591ccms_lock_undo_cmp(ccms_cst_t cst, void *arg)
592{
593 struct ccms_lock_scan_info *info = arg;
594 ccms_lock_t lock = info->lock;
595
596 if (cst->end_offset < lock->beg_offset)
597 return(-1);
598 if (cst->beg_offset >= info->coll_cst->beg_offset)
599 return(1);
600 return(0);
601}
602