2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Scooter Morris at Genentech Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
37 * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $
40 #include "opt_debug_lockf.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
47 #include <sys/unistd.h>
48 #include <sys/vnode.h>
49 #include <sys/malloc.h>
50 #include <sys/fcntl.h>
52 #include <sys/lockf.h>
55 * This variable controls the maximum number of processes that will
56 * be checked in doing deadlock detection.
58 static int maxlockdepth = MAXDEPTH;
61 #include <sys/kernel.h>
62 #include <sys/sysctl.h>
64 #include <ufs/ufs/quota.h>
65 #include <ufs/ufs/inode.h>
68 static int lockf_debug = 0;
69 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
72 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
74 #define NOLOCKF (struct lockf *)0
77 static int lf_clearlock __P((struct lockf *));
78 static int lf_findoverlap __P((struct lockf *,
79 struct lockf *, int, struct lockf ***, struct lockf **));
81 lf_getblock __P((struct lockf *));
82 static int lf_getlock __P((struct lockf *, struct flock *));
83 static int lf_setlock __P((struct lockf *));
84 static void lf_split __P((struct lockf *, struct lockf *));
85 static void lf_wakelock __P((struct lockf *));
88 * Advisory record locking support
91 lf_advlock(ap, head, size)
92 struct vop_advlock_args /* {
102 register struct flock *fl = ap->a_fl;
103 register struct lockf *lock;
108 * Convert the flock structure into a start and end.
110 switch (fl->l_whence) {
115 * Caller is responsible for adding any necessary offset
116 * when SEEK_CUR is used.
122 start = size + fl->l_start;
133 end = start + fl->l_len - 1;
138 * Avoid the common case of unlocking when inode has no locks.
140 if (*head == (struct lockf *)0) {
141 if (ap->a_op != F_SETLK) {
142 fl->l_type = F_UNLCK;
147 * Create the lockf structure
149 MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
150 lock->lf_start = start;
152 lock->lf_id = ap->a_id;
153 /* lock->lf_inode = ip; */ /* XXX JH */
154 lock->lf_type = fl->l_type;
155 lock->lf_head = head;
156 lock->lf_next = (struct lockf *)0;
157 TAILQ_INIT(&lock->lf_blkhd);
158 lock->lf_flags = ap->a_flags;
160 * Do the requested operation.
164 return (lf_setlock(lock));
167 error = lf_clearlock(lock);
172 error = lf_getlock(lock, fl);
184 * Set a byte-range lock.
188 register struct lockf *lock;
190 register struct lockf *block;
191 struct lockf **head = lock->lf_head;
192 struct lockf **prev, *overlap, *ltmp;
193 static char lockstr[] = "lockf";
194 int ovcase, priority, needtolink, error;
198 lf_print("lf_setlock", lock);
199 #endif /* LOCKF_DEBUG */
205 if (lock->lf_type == F_WRLCK)
209 * Scan lock list for this file looking for locks that would block us.
211 while ((block = lf_getblock(lock))) {
213 * Free the structure and return if nonblocking.
215 if ((lock->lf_flags & F_WAIT) == 0) {
220 * We are blocked. Since flock style locks cover
221 * the whole file, there is no chance for deadlock.
222 * For byte-range locks we must check for deadlock.
224 * Deadlock detection is done by looking through the
225 * wait channels to see if there are any cycles that
226 * involve us. MAXDEPTH is set just to make sure we
227 * do not go off into neverland.
229 if ((lock->lf_flags & F_POSIX) &&
230 (block->lf_flags & F_POSIX)) {
231 register struct proc *wproc;
232 register struct lockf *waitblock;
235 /* The block is waiting on something */
236 wproc = (struct proc *)block->lf_id;
237 while (wproc->p_wchan &&
238 (wproc->p_wmesg == lockstr) &&
239 (i++ < maxlockdepth)) {
240 waitblock = (struct lockf *)wproc->p_wchan;
241 /* Get the owner of the blocking lock */
242 waitblock = waitblock->lf_next;
243 if ((waitblock->lf_flags & F_POSIX) == 0)
245 wproc = (struct proc *)waitblock->lf_id;
246 if (wproc == (struct proc *)lock->lf_id) {
253 * For flock type locks, we must first remove
254 * any shared locks that we hold before we sleep
255 * waiting for an exclusive lock.
257 if ((lock->lf_flags & F_FLOCK) &&
258 lock->lf_type == F_WRLCK) {
259 lock->lf_type = F_UNLCK;
260 (void) lf_clearlock(lock);
261 lock->lf_type = F_WRLCK;
264 * Add our lock to the blocked list and sleep until we're free.
265 * Remember who blocked us (for deadlock detection).
267 lock->lf_next = block;
268 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
270 if (lockf_debug & 1) {
271 lf_print("lf_setlock: blocking on", block);
272 lf_printlist("lf_setlock", block);
274 #endif /* LOCKF_DEBUG */
275 error = tsleep((caddr_t)lock, priority, lockstr, 0);
277 * We may have been awakened by a signal and/or by a
278 * debugger continuing us (in which cases we must remove
279 * ourselves from the blocked list) and/or by another
280 * process releasing a lock (in which case we have
281 * already been removed from the blocked list and our
282 * lf_next field set to NOLOCKF).
285 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
286 lock->lf_next = NOLOCKF;
294 * No blocks!! Add the lock. Note that we will
295 * downgrade or upgrade any overlapping locks this
296 * process already owns.
298 * Skip over locks owned by other processes.
299 * Handle any locks that overlap and are owned by ourselves.
305 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
307 block = overlap->lf_next;
312 * 2) overlap contains lock
313 * 3) lock contains overlap
314 * 4) overlap starts before lock
315 * 5) overlap ends after lock
318 case 0: /* no overlap */
321 lock->lf_next = overlap;
325 case 1: /* overlap == lock */
327 * If downgrading lock, others may be
328 * able to acquire it.
330 if (lock->lf_type == F_RDLCK &&
331 overlap->lf_type == F_WRLCK)
332 lf_wakelock(overlap);
333 overlap->lf_type = lock->lf_type;
335 lock = overlap; /* for debug output below */
338 case 2: /* overlap contains lock */
340 * Check for common starting point and different types.
342 if (overlap->lf_type == lock->lf_type) {
344 lock = overlap; /* for debug output below */
347 if (overlap->lf_start == lock->lf_start) {
349 lock->lf_next = overlap;
350 overlap->lf_start = lock->lf_end + 1;
352 lf_split(overlap, lock);
353 lf_wakelock(overlap);
356 case 3: /* lock contains overlap */
358 * If downgrading lock, others may be able to
359 * acquire it, otherwise take the list.
361 if (lock->lf_type == F_RDLCK &&
362 overlap->lf_type == F_WRLCK) {
363 lf_wakelock(overlap);
365 while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
366 ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
367 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
369 TAILQ_INSERT_TAIL(&lock->lf_blkhd,
371 ltmp->lf_next = lock;
375 * Add the new lock if necessary and delete the overlap.
379 lock->lf_next = overlap->lf_next;
380 prev = &lock->lf_next;
383 *prev = overlap->lf_next;
384 free(overlap, M_LOCKF);
387 case 4: /* overlap starts before lock */
389 * Add lock after overlap on the list.
391 lock->lf_next = overlap->lf_next;
392 overlap->lf_next = lock;
393 overlap->lf_end = lock->lf_start - 1;
394 prev = &lock->lf_next;
395 lf_wakelock(overlap);
399 case 5: /* overlap ends after lock */
401 * Add the new lock before overlap.
405 lock->lf_next = overlap;
407 overlap->lf_start = lock->lf_end + 1;
408 lf_wakelock(overlap);
414 if (lockf_debug & 1) {
415 lf_print("lf_setlock: got the lock", lock);
416 lf_printlist("lf_setlock", lock);
418 #endif /* LOCKF_DEBUG */
423 * Remove a byte-range lock on an inode.
425 * Generally, find the lock (or an overlap to that lock)
426 * and remove it (or shrink it), then wakeup anyone we can.
430 register struct lockf *unlock;
432 struct lockf **head = unlock->lf_head;
433 register struct lockf *lf = *head;
434 struct lockf *overlap, **prev;
440 if (unlock->lf_type != F_UNLCK)
441 panic("lf_clearlock: bad type");
443 lf_print("lf_clearlock", unlock);
444 #endif /* LOCKF_DEBUG */
446 while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
448 * Wakeup the list of locks to be retried.
450 lf_wakelock(overlap);
454 case 1: /* overlap == lock */
455 *prev = overlap->lf_next;
456 FREE(overlap, M_LOCKF);
459 case 2: /* overlap contains lock: split it */
460 if (overlap->lf_start == unlock->lf_start) {
461 overlap->lf_start = unlock->lf_end + 1;
464 lf_split(overlap, unlock);
465 overlap->lf_next = unlock->lf_next;
468 case 3: /* lock contains overlap */
469 *prev = overlap->lf_next;
470 lf = overlap->lf_next;
471 free(overlap, M_LOCKF);
474 case 4: /* overlap starts before lock */
475 overlap->lf_end = unlock->lf_start - 1;
476 prev = &overlap->lf_next;
477 lf = overlap->lf_next;
480 case 5: /* overlap ends after lock */
481 overlap->lf_start = unlock->lf_end + 1;
488 lf_printlist("lf_clearlock", unlock);
489 #endif /* LOCKF_DEBUG */
494 * Check whether there is a blocking lock,
495 * and if so return its process identifier.
499 register struct lockf *lock;
500 register struct flock *fl;
502 register struct lockf *block;
506 lf_print("lf_getlock", lock);
507 #endif /* LOCKF_DEBUG */
509 if ((block = lf_getblock(lock))) {
510 fl->l_type = block->lf_type;
511 fl->l_whence = SEEK_SET;
512 fl->l_start = block->lf_start;
513 if (block->lf_end == -1)
516 fl->l_len = block->lf_end - block->lf_start + 1;
517 if (block->lf_flags & F_POSIX)
518 fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
522 fl->l_type = F_UNLCK;
528 * Walk the list of locks for an inode and
529 * return the first blocking lock.
531 static struct lockf *
533 register struct lockf *lock;
535 struct lockf **prev, *overlap, *lf = *(lock->lf_head);
538 prev = lock->lf_head;
539 while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
541 * We've found an overlap, see if it blocks us
543 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
546 * Nope, point to the next one on the list and
547 * see if it blocks us
549 lf = overlap->lf_next;
555 * Walk the list of locks for an inode to
556 * find an overlapping lock (if any).
558 * NOTE: this returns only the FIRST overlapping lock. There
559 * may be more than one.
562 lf_findoverlap(lf, lock, type, prev, overlap)
563 register struct lockf *lf;
566 struct lockf ***prev;
567 struct lockf **overlap;
576 lf_print("lf_findoverlap: looking for overlap in", lock);
577 #endif /* LOCKF_DEBUG */
578 start = lock->lf_start;
580 while (lf != NOLOCKF) {
581 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
582 ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
583 *prev = &lf->lf_next;
584 *overlap = lf = lf->lf_next;
589 lf_print("\tchecking", lf);
590 #endif /* LOCKF_DEBUG */
592 * OK, check for overlap
597 * 2) overlap contains lock
598 * 3) lock contains overlap
599 * 4) overlap starts before lock
600 * 5) overlap ends after lock
602 if ((lf->lf_end != -1 && start > lf->lf_end) ||
603 (end != -1 && lf->lf_start > end)) {
607 printf("no overlap\n");
608 #endif /* LOCKF_DEBUG */
609 if ((type & SELF) && end != -1 && lf->lf_start > end)
611 *prev = &lf->lf_next;
612 *overlap = lf = lf->lf_next;
615 if ((lf->lf_start == start) && (lf->lf_end == end)) {
619 printf("overlap == lock\n");
620 #endif /* LOCKF_DEBUG */
623 if ((lf->lf_start <= start) &&
625 ((lf->lf_end >= end) || (lf->lf_end == -1))) {
629 printf("overlap contains lock\n");
630 #endif /* LOCKF_DEBUG */
633 if (start <= lf->lf_start &&
635 (lf->lf_end != -1 && end >= lf->lf_end))) {
639 printf("lock contains overlap\n");
640 #endif /* LOCKF_DEBUG */
643 if ((lf->lf_start < start) &&
644 ((lf->lf_end >= start) || (lf->lf_end == -1))) {
648 printf("overlap starts before lock\n");
649 #endif /* LOCKF_DEBUG */
652 if ((lf->lf_start > start) &&
654 ((lf->lf_end > end) || (lf->lf_end == -1))) {
658 printf("overlap ends after lock\n");
659 #endif /* LOCKF_DEBUG */
662 panic("lf_findoverlap: default");
668 * Split a lock and a contained region into
669 * two or three locks as necessary.
672 lf_split(lock1, lock2)
673 register struct lockf *lock1;
674 register struct lockf *lock2;
676 register struct lockf *splitlock;
679 if (lockf_debug & 2) {
680 lf_print("lf_split", lock1);
681 lf_print("splitting from", lock2);
683 #endif /* LOCKF_DEBUG */
685 * Check to see if spliting into only two pieces.
687 if (lock1->lf_start == lock2->lf_start) {
688 lock1->lf_start = lock2->lf_end + 1;
689 lock2->lf_next = lock1;
692 if (lock1->lf_end == lock2->lf_end) {
693 lock1->lf_end = lock2->lf_start - 1;
694 lock2->lf_next = lock1->lf_next;
695 lock1->lf_next = lock2;
699 * Make a new lock consisting of the last part of
700 * the encompassing lock
702 MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
703 bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
704 splitlock->lf_start = lock2->lf_end + 1;
705 TAILQ_INIT(&splitlock->lf_blkhd);
706 lock1->lf_end = lock2->lf_start - 1;
710 splitlock->lf_next = lock1->lf_next;
711 lock2->lf_next = splitlock;
712 lock1->lf_next = lock2;
719 lf_wakelock(listhead)
720 struct lockf *listhead;
722 register struct lockf *wakelock;
724 while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
725 wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
726 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
727 wakelock->lf_next = NOLOCKF;
730 lf_print("lf_wakelock: awakening", wakelock);
731 #endif /* LOCKF_DEBUG */
732 wakeup((caddr_t)wakelock);
743 register struct lockf *lock;
746 printf("%s: lock %p for ", tag, (void *)lock);
747 if (lock->lf_flags & F_POSIX)
748 printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
750 printf("id %p", (void *)lock->lf_id);
751 /* XXX no %qd in kernel. Truncate. */
752 printf(" in ino %lu on dev <%d, %d>, %s, start %ld, end %ld",
753 (u_long)lock->lf_inode->i_number,
754 major(lock->lf_inode->i_dev),
755 minor(lock->lf_inode->i_dev),
756 lock->lf_type == F_RDLCK ? "shared" :
757 lock->lf_type == F_WRLCK ? "exclusive" :
758 lock->lf_type == F_UNLCK ? "unlock" :
759 "unknown", (long)lock->lf_start, (long)lock->lf_end);
760 if (!TAILQ_EMPTY(&lock->lf_blkhd))
761 printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
767 lf_printlist(tag, lock)
771 register struct lockf *lf, *blk;
773 printf("%s: Lock list for ino %lu on dev <%d, %d>:\n",
774 tag, (u_long)lock->lf_inode->i_number,
775 major(lock->lf_inode->i_dev),
776 minor(lock->lf_inode->i_dev));
777 for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
778 printf("\tlock %p for ",(void *)lf);
779 if (lf->lf_flags & F_POSIX)
781 (long)((struct proc *)lf->lf_id)->p_pid);
783 printf("id %p", (void *)lf->lf_id);
784 /* XXX no %qd in kernel. Truncate. */
785 printf(", %s, start %ld, end %ld",
786 lf->lf_type == F_RDLCK ? "shared" :
787 lf->lf_type == F_WRLCK ? "exclusive" :
788 lf->lf_type == F_UNLCK ? "unlock" :
789 "unknown", (long)lf->lf_start, (long)lf->lf_end);
790 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
791 printf("\n\t\tlock request %p for ", (void *)blk);
792 if (blk->lf_flags & F_POSIX)
794 (long)((struct proc *)blk->lf_id)->p_pid);
796 printf("id %p", (void *)blk->lf_id);
797 /* XXX no %qd in kernel. Truncate. */
798 printf(", %s, start %ld, end %ld",
799 blk->lf_type == F_RDLCK ? "shared" :
800 blk->lf_type == F_WRLCK ? "exclusive" :
801 blk->lf_type == F_UNLCK ? "unlock" :
802 "unknown", (long)blk->lf_start,
804 if (!TAILQ_EMPTY(&blk->lf_blkhd))
805 panic("lf_printlist: bad list");
810 #endif /* LOCKF_DEBUG */