kernel - Fix global counter used in lockf assertions
[dragonfly.git] / sys / kern / kern_lockf.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>. All rights reserved.
3 * Copyright (c) 2006 Matthew Dillon <dillon@backplane.com>. All rights reserved.
4 *
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Scooter Morris at Genentech Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
40 * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $
41 * $DragonFly: src/sys/kern/kern_lockf.c,v 1.37 2007/11/01 22:48:16 dillon Exp $
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/lock.h>
48#include <sys/proc.h>
49#include <sys/unistd.h>
50#include <sys/vnode.h>
51#include <sys/malloc.h>
52#include <sys/fcntl.h>
53#include <sys/resourcevar.h>
54
55#include <sys/lockf.h>
56#include <machine/limits.h> /* for LLONG_MAX */
57#include <machine/stdarg.h>
58
59#include <sys/spinlock2.h>
60
61#ifdef INVARIANTS
62int lf_global_counter = 0;
63#endif
64
65#ifdef LOCKF_DEBUG
66int lf_print_ranges = 0;
67
68static void _lf_print_lock(const struct lockf *);
69static void _lf_printf(const char *, ...);
70
71#define lf_print_lock(lock) if (lf_print_ranges) _lf_print_lock(lock)
72#define lf_printf(ctl, args...) if (lf_print_ranges) _lf_printf(ctl, args)
73#else
74#define lf_print_lock(lock)
75#define lf_printf(ctl, args...)
76#endif
77
78static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
79
80static void lf_wakeup(struct lockf *, off_t, off_t);
81static struct lockf_range *lf_alloc_range(void);
82static void lf_create_range(struct lockf_range *, struct proc *, int, int,
83 off_t, off_t);
84static void lf_insert(struct lockf_range_list *list,
85 struct lockf_range *elm,
86 struct lockf_range *insert_point);
87static void lf_destroy_range(struct lockf_range *);
88
89static int lf_setlock(struct lockf *, struct proc *, int, int,
90 off_t, off_t);
91static int lf_getlock(struct flock *, struct lockf *, struct proc *,
92 int, int, off_t, off_t);
93
94static int lf_count_change(struct proc *, int);
95
96/*
97 * Return TRUE (non-zero) if the type and posix flags match.
98 */
99static __inline
100int
101lf_match(struct lockf_range *range, int type, int flags)
102{
103 if (range->lf_type != type)
104 return(0);
105 if ((range->lf_flags ^ flags) & F_POSIX)
106 return(0);
107 return(1);
108}
109
110/*
111 * Check whether range and [start, end] overlap.
112 */
113static __inline
114int
115lf_overlap(const struct lockf_range *range, off_t start, off_t end)
116{
117 if (range->lf_start >= start && range->lf_start <= end)
118 return(1);
119 else if (start >= range->lf_start && start <= range->lf_end)
120 return(1);
121 else
122 return(0);
123}
124
125
126/*
127 * Change the POSIX lock accounting for the given process.
128 */
129void
130lf_count_adjust(struct proc *p, int increase)
131{
132 struct uidinfo *uip;
133
134 KKASSERT(p != NULL);
135
136 uip = p->p_ucred->cr_uidinfo;
137 spin_lock(&uip->ui_lock);
138
139 if (increase)
140 uip->ui_posixlocks += p->p_numposixlocks;
141 else
142 uip->ui_posixlocks -= p->p_numposixlocks;
143
144 KASSERT(uip->ui_posixlocks >= 0,
145 ("Negative number of POSIX locks held by %s user: %d.",
146 increase ? "new" : "old", uip->ui_posixlocks));
147 spin_unlock(&uip->ui_lock);
148}
149
150static int
151lf_count_change(struct proc *owner, int diff)
152{
153 struct uidinfo *uip;
154 int max, ret;
155
156 /* we might actually not have a process context */
157 if (owner == NULL)
158 return(0);
159
160 uip = owner->p_ucred->cr_uidinfo;
161
162 max = MIN(owner->p_rlimit[RLIMIT_POSIXLOCKS].rlim_cur,
163 maxposixlocksperuid);
164
165 spin_lock(&uip->ui_lock);
166 if (diff > 0 && owner->p_ucred->cr_uid != 0 && max != -1 &&
167 uip->ui_posixlocks >= max ) {
168 ret = 1;
169 } else {
170 uip->ui_posixlocks += diff;
171 owner->p_numposixlocks += diff;
172 KASSERT(uip->ui_posixlocks >= 0,
173 ("Negative number of POSIX locks held by user: %d.",
174 uip->ui_posixlocks));
175 KASSERT(owner->p_numposixlocks >= 0,
176 ("Negative number of POSIX locks held by proc: %d.",
177 uip->ui_posixlocks));
178 ret = 0;
179 }
180 spin_unlock(&uip->ui_lock);
181 return ret;
182}
183
184/*
185 * Advisory record locking support
186 */
187int
188lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size)
189{
190 struct flock *fl = ap->a_fl;
191 struct proc *owner;
192 off_t start, end;
193 int type, flags, error;
194 lwkt_token_t token;
195
196 /*
197 * Convert the flock structure into a start and end.
198 */
199 switch (fl->l_whence) {
200 case SEEK_SET:
201 case SEEK_CUR:
202 /*
203 * Caller is responsible for adding any necessary offset
204 * when SEEK_CUR is used.
205 */
206 start = fl->l_start;
207 break;
208
209 case SEEK_END:
210 start = size + fl->l_start;
211 break;
212
213 default:
214 return(EINVAL);
215 }
216
217 flags = ap->a_flags;
218 if (start < 0)
219 return(EINVAL);
220 if (fl->l_len == 0) {
221 flags |= F_NOEND;
222 end = LLONG_MAX;
223 } else if (fl->l_len < 0) {
224 return(EINVAL);
225 } else {
226 end = start + fl->l_len - 1;
227 if (end < start)
228 return(EINVAL);
229 }
230
231 type = fl->l_type;
232 /*
233 * This isn't really correct for flock-style locks,
234 * but the current handling is somewhat broken anyway.
235 */
236 owner = (struct proc *)ap->a_id;
237
238 /*
239 * Do the requested operation.
240 */
241 token = lwkt_getpooltoken(lock);
242
243 if (lock->init_done == 0) {
244 TAILQ_INIT(&lock->lf_range);
245 TAILQ_INIT(&lock->lf_blocked);
246 lock->init_done = 1;
247 }
248
249 switch(ap->a_op) {
250 case F_SETLK:
251 /*
252 * NOTE: It is possible for both lf_range and lf_blocked to
253 * be empty if we block and get woken up, but another process
254 * then gets in and issues an unlock. So VMAYHAVELOCKS must
255 * be set after the lf_setlock() operation completes rather
256 * then before.
257 */
258 error = lf_setlock(lock, owner, type, flags, start, end);
259 vsetflags(ap->a_vp, VMAYHAVELOCKS);
260 break;
261
262 case F_UNLCK:
263 error = lf_setlock(lock, owner, type, flags, start, end);
264 if (TAILQ_EMPTY(&lock->lf_range) &&
265 TAILQ_EMPTY(&lock->lf_blocked)) {
266 vclrflags(ap->a_vp, VMAYHAVELOCKS);
267 }
268 break;
269
270 case F_GETLK:
271 error = lf_getlock(fl, lock, owner, type, flags, start, end);
272 break;
273
274 default:
275 error = EINVAL;
276 break;
277 }
278 lwkt_reltoken(token);
279 return(error);
280}
281
282static int
283lf_setlock(struct lockf *lock, struct proc *owner, int type, int flags,
284 off_t start, off_t end)
285{
286 struct lockf_range *range;
287 struct lockf_range *brange;
288 struct lockf_range *next;
289 struct lockf_range *first_match;
290 struct lockf_range *last_match;
291 struct lockf_range *insert_point;
292 struct lockf_range *new_range1;
293 struct lockf_range *new_range2;
294 int wakeup_needed;
295 int double_clip;
296 int unlock_override;
297 int error = 0;
298 int count;
299 struct lockf_range_list deadlist;
300
301 new_range1 = NULL;
302 new_range2 = NULL;
303 count = 0;
304
305restart:
306 /*
307 * Preallocate two ranges so we don't have to worry about blocking
308 * in the middle of the lock code.
309 */
310 if (new_range1 == NULL)
311 new_range1 = lf_alloc_range();
312 if (new_range2 == NULL)
313 new_range2 = lf_alloc_range();
314 first_match = NULL;
315 last_match = NULL;
316 insert_point = NULL;
317 wakeup_needed = 0;
318
319 lf_print_lock(lock);
320
321 /*
322 * Locate the insertion point for the new lock (the first range
323 * with an lf_start >= start).
324 *
325 * Locate the first and latch ranges owned by us that overlap
326 * the requested range.
327 */
328 TAILQ_FOREACH(range, &lock->lf_range, lf_link) {
329 if (insert_point == NULL && range->lf_start >= start)
330 insert_point = range;
331
332 /*
333 * Skip non-overlapping locks. Locks are sorted by lf_start
334 * So we can terminate the search when lf_start exceeds the
335 * requested range (insert_point is still guarenteed to be
336 * set properly).
337 */
338 if (range->lf_end < start)
339 continue;
340 if (range->lf_start > end) {
341 range = NULL;
342 break;
343 }
344
345 /*
346 * Overlapping lock. Set first_match and last_match if we
347 * are the owner.
348 */
349 if (range->lf_owner == owner) {
350 if (first_match == NULL)
351 first_match = range;
352 last_match = range;
353 continue;
354 }
355
356 /*
357 * If we aren't the owner check for a conflicting lock. Only
358 * if not unlocking.
359 */
360 if (type != F_UNLCK) {
361 if (type == F_WRLCK || range->lf_type == F_WRLCK)
362 break;
363 }
364 }
365
366 /*
367 * If a conflicting lock was observed, block or fail as appropriate.
368 * (this code is skipped when unlocking)
369 */
370 if (range != NULL) {
371 if ((flags & F_WAIT) == 0) {
372 error = EAGAIN;
373 goto do_cleanup;
374 }
375
376 /*
377 * We are blocked. For POSIX locks we have to check
378 * for deadlocks and return with EDEADLK. This is done
379 * by checking whether range->lf_owner is already
380 * blocked.
381 *
382 * Since flock-style locks cover the whole file, a
383 * deadlock between those is nearly impossible.
384 * This can only occur if a process tries to lock the
385 * same inode exclusively while holding a shared lock
386 * with another descriptor.
387 * XXX How can we cleanly detect this?
388 * XXX The current mixing of flock & fcntl/lockf is evil.
389 *
390 * Handle existing locks of flock-style like POSIX locks.
391 */
392 if (flags & F_POSIX) {
393 TAILQ_FOREACH(brange, &lock->lf_blocked, lf_link) {
394 if (brange->lf_owner == range->lf_owner) {
395 error = EDEADLK;
396 goto do_cleanup;
397 }
398 }
399 }
400
401 /*
402 * For flock-style locks, we must first remove
403 * any shared locks that we hold before we sleep
404 * waiting for an exclusive lock.
405 */
406 if ((flags & F_POSIX) == 0 && type == F_WRLCK)
407 lf_setlock(lock, owner, F_UNLCK, 0, start, end);
408
409 brange = new_range1;
410 new_range1 = NULL;
411 lf_create_range(brange, owner, type, 0, start, end);
412 TAILQ_INSERT_TAIL(&lock->lf_blocked, brange, lf_link);
413 error = tsleep(brange, PCATCH, "lockf", 0);
414
415 /*
416 * We may have been awaked by a signal and/or by a
417 * debugger continuing us (in which case we must remove
418 * ourselves from the blocked list) and/or by another
419 * process releasing/downgrading a lock (in which case
420 * we have already been removed from the blocked list
421 * and our lf_flags field is 1).
422 *
423 * Sleep if it looks like we might be livelocking.
424 */
425 if (brange->lf_flags == 0)
426 TAILQ_REMOVE(&lock->lf_blocked, brange, lf_link);
427 if (count == 2)
428 tsleep(brange, 0, "lockfz", 2);
429 else
430 ++count;
431 lf_destroy_range(brange);
432
433 if (error)
434 goto do_cleanup;
435 goto restart;
436 }
437
438 /*
439 * If there are no overlapping locks owned by us then creating
440 * the new lock is easy. This is the most common case.
441 */
442 if (first_match == NULL) {
443 if (type == F_UNLCK)
444 goto do_wakeup;
445 if (flags & F_POSIX) {
446 if (lf_count_change(owner, 1)) {
447 error = ENOLCK;
448 goto do_cleanup;
449 }
450 }
451 range = new_range1;
452 new_range1 = NULL;
453 lf_create_range(range, owner, type, flags, start, end);
454 lf_insert(&lock->lf_range, range, insert_point);
455 goto do_wakeup;
456 }
457
458 /*
459 * double_clip - Calculate a special case where TWO locks may have
460 * to be added due to the new lock breaking up an
461 * existing incompatible lock in the middle.
462 *
463 * unlock_override - Calculate a special case where NO locks
464 * need to be created. This occurs when an unlock
465 * does not clip any locks at the front and rear.
466 *
467 * WARNING! closef() and fdrop() assume that an F_UNLCK of the
468 * entire range will always succeed so the unlock_override
469 * case is mandatory.
470 */
471 double_clip = 0;
472 unlock_override = 0;
473 if (first_match->lf_start < start) {
474 if (first_match == last_match && last_match->lf_end > end)
475 double_clip = 1;
476 } else if (type == F_UNLCK && last_match->lf_end <= end) {
477 unlock_override = 1;
478 }
479
480 /*
481 * Figure out the worst case net increase in POSIX locks and account
482 * for it now before we start modifying things. If neither the
483 * first or last locks match we have an issue. If there is only
484 * one overlapping range which needs to be clipped on both ends
485 * we wind up having to create up to two new locks, else only one.
486 *
487 * When unlocking the worst case is always 1 new lock if our
488 * unlock request cuts the middle out of an existing lock range.
489 *
490 * count represents the 'cleanup' adjustment needed. It starts
491 * negative, is incremented whenever we create a new POSIX lock,
492 * and decremented whenever we delete an existing one. At the
493 * end of the day it had better be <= 0 or we didn't calculate the
494 * worse case properly here.
495 */
496 count = 0;
497 if ((flags & F_POSIX) && !unlock_override) {
498 if (!lf_match(first_match, type, flags) &&
499 !lf_match(last_match, type, flags)
500 ) {
501 if (double_clip && type != F_UNLCK)
502 count = -2;
503 else
504 count = -1;
505 }
506 if (count && lf_count_change(owner, -count)) {
507 error = ENOLCK;
508 goto do_cleanup;
509 }
510 }
511 /* else flock style lock which encompasses entire range */
512
513 /*
514 * Create and insert the lock represented the requested range.
515 * Adjust the net POSIX lock count. We have to move our insertion
516 * point since brange now represents the first record >= start.
517 *
518 * When unlocking, no new lock is inserted but we still clip.
519 */
520 if (type != F_UNLCK) {
521 brange = new_range1;
522 new_range1 = NULL;
523 lf_create_range(brange, owner, type, flags, start, end);
524 lf_insert(&lock->lf_range, brange, insert_point);
525 insert_point = brange;
526 if (flags & F_POSIX)
527 ++count;
528 } else {
529 brange = NULL;
530 }
531
532 /*
533 * Handle the double_clip case. This is the only case where
534 * we wind up having to add TWO locks.
535 */
536 if (double_clip) {
537 KKASSERT(first_match == last_match);
538 last_match = new_range2;
539 new_range2 = NULL;
540 lf_create_range(last_match, first_match->lf_owner,
541 first_match->lf_type, first_match->lf_flags,
542 end + 1, first_match->lf_end);
543 first_match->lf_end = start - 1;
544 first_match->lf_flags &= ~F_NOEND;
545
546 /*
547 * Figure out where to insert the right side clip.
548 */
549 lf_insert(&lock->lf_range, last_match, first_match);
550 if (last_match->lf_flags & F_POSIX)
551 ++count;
552 }
553
554 /*
555 * Clip or destroy the locks between first_match and last_match,
556 * inclusive. Ignore the primary lock we created (brange). Note
557 * that if double-clipped, first_match and last_match will be
558 * outside our clipping range. Otherwise first_match and last_match
559 * will be deleted.
560 *
561 * We have already taken care of any double clipping.
562 *
563 * The insert_point may become invalid as we delete records, do not
564 * use that pointer any more. Also, when removing something other
565 * then 'range' we have to check to see if the item we are removing
566 * is 'next' and adjust 'next' properly.
567 *
568 * NOTE: brange will be NULL if F_UNLCKing.
569 */
570 TAILQ_INIT(&deadlist);
571 next = first_match;
572
573 while ((range = next) != NULL) {
574 next = TAILQ_NEXT(range, lf_link);
575
576 /*
577 * Ignore elements that we do not own and ignore the
578 * primary request range which we just created.
579 */
580 if (range->lf_owner != owner || range == brange)
581 continue;
582
583 /*
584 * We may have to wakeup a waiter when downgrading a lock.
585 */
586 if (type == F_UNLCK)
587 wakeup_needed = 1;
588 if (type == F_RDLCK && range->lf_type == F_WRLCK)
589 wakeup_needed = 1;
590
591 /*
592 * Clip left. This can only occur on first_match.
593 *
594 * Merge the left clip with brange if possible. This must
595 * be done specifically, not in the optimized merge heuristic
596 * below, since we may have counted on it in our 'count'
597 * calculation above.
598 */
599 if (range->lf_start < start) {
600 KKASSERT(range == first_match);
601 if (brange &&
602 range->lf_end >= start - 1 &&
603 lf_match(range, type, flags)) {
604 range->lf_end = brange->lf_end;
605 range->lf_flags |= brange->lf_flags & F_NOEND;
606 /*
607 * Removing something other then 'range',
608 * adjust 'next' if necessary.
609 */
610 if (next == brange)
611 next = TAILQ_NEXT(next, lf_link);
612 TAILQ_REMOVE(&lock->lf_range, brange, lf_link);
613 if (brange->lf_flags & F_POSIX)
614 --count;
615 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link);
616 brange = range;
617 } else if (range->lf_end >= start) {
618 range->lf_end = start - 1;
619 if (type != F_UNLCK)
620 range->lf_flags &= ~F_NOEND;
621 }
622 if (range == last_match)
623 break;
624 continue;
625 }
626
627 /*
628 * Clip right. This can only occur on last_match.
629 *
630 * Merge the right clip if possible. This must be done
631 * specifically, not in the optimized merge heuristic
632 * below, since we may have counted on it in our 'count'
633 * calculation.
634 *
635 * Since we are adjusting lf_start, we have to move the
636 * record to maintain the sorted list. Since lf_start is
637 * only getting larger we can use the next element as the
638 * insert point (we don't have to backtrack).
639 */
640 if (range->lf_end > end) {
641 KKASSERT(range == last_match);
642 if (brange &&
643 range->lf_start <= end + 1 &&
644 lf_match(range, type, flags)) {
645 brange->lf_end = range->lf_end;
646 brange->lf_flags |= range->lf_flags & F_NOEND;
647 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
648 if (range->lf_flags & F_POSIX)
649 --count;
650 TAILQ_INSERT_TAIL(&deadlist, range, lf_link);
651 } else if (range->lf_start <= end) {
652 range->lf_start = end + 1;
653 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
654 lf_insert(&lock->lf_range, range, next);
655 }
656 /* range == last_match, we are done */
657 break;
658 }
659
660 /*
661 * The record must be entirely enclosed. Note that the
662 * record could be first_match or last_match, and will be
663 * deleted.
664 */
665 KKASSERT(range->lf_start >= start && range->lf_end <= end);
666 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
667 if (range->lf_flags & F_POSIX)
668 --count;
669 TAILQ_INSERT_TAIL(&deadlist, range, lf_link);
670 if (range == last_match)
671 break;
672 }
673
674 /*
675 * Attempt to merge locks adjacent to brange. For example, we may
676 * have had to clip first_match and/or last_match, and they might
677 * be adjacent. Or there might simply have been an adjacent lock
678 * already there.
679 *
680 * Don't get fancy, just check adjacent elements in the list if they
681 * happen to be owned by us.
682 *
683 * This case only gets hit if we have a situation where a shared
684 * and exclusive lock are adjacent, and the exclusive lock is
685 * downgraded to shared or the shared lock is upgraded to exclusive.
686 */
687 if (brange) {
688 range = TAILQ_PREV(brange, lockf_range_list, lf_link);
689 if (range &&
690 range->lf_owner == owner &&
691 range->lf_end == brange->lf_start - 1 &&
692 lf_match(range, type, flags)
693 ) {
694 /*
695 * Extend range to cover brange and scrap brange.
696 */
697 range->lf_end = brange->lf_end;
698 range->lf_flags |= brange->lf_flags & F_NOEND;
699 TAILQ_REMOVE(&lock->lf_range, brange, lf_link);
700 if (brange->lf_flags & F_POSIX)
701 --count;
702 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link);
703 brange = range;
704 }
705 range = TAILQ_NEXT(brange, lf_link);
706 if (range &&
707 range->lf_owner == owner &&
708 range->lf_start == brange->lf_end + 1 &&
709 lf_match(range, type, flags)
710 ) {
711 /*
712 * Extend brange to cover range and scrap range.
713 */
714 brange->lf_end = range->lf_end;
715 brange->lf_flags |= range->lf_flags & F_NOEND;
716 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
717 if (range->lf_flags & F_POSIX)
718 --count;
719 TAILQ_INSERT_TAIL(&deadlist, range, lf_link);
720 }
721 }
722
723 /*
724 * Destroy deleted elements. We didn't want to do it in the loop
725 * because the free() might have blocked.
726 *
727 * Adjust the count for any posix locks we thought we might create
728 * but didn't.
729 */
730 while ((range = TAILQ_FIRST(&deadlist)) != NULL) {
731 TAILQ_REMOVE(&deadlist, range, lf_link);
732 lf_destroy_range(range);
733 }
734
735 KKASSERT(count <= 0);
736 if (count < 0)
737 lf_count_change(owner, count);
738do_wakeup:
739 lf_print_lock(lock);
740 if (wakeup_needed)
741 lf_wakeup(lock, start, end);
742 error = 0;
743do_cleanup:
744 if (new_range1 != NULL)
745 lf_destroy_range(new_range1);
746 if (new_range2 != NULL)
747 lf_destroy_range(new_range2);
748 return(error);
749}
750
751/*
752 * Check whether there is a blocking lock,
753 * and if so return its process identifier.
754 */
755static int
756lf_getlock(struct flock *fl, struct lockf *lock, struct proc *owner,
757 int type, int flags, off_t start, off_t end)
758{
759 struct lockf_range *range;
760
761 TAILQ_FOREACH(range, &lock->lf_range, lf_link)
762 if (range->lf_owner != owner &&
763 lf_overlap(range, start, end) &&
764 (type == F_WRLCK || range->lf_type == F_WRLCK))
765 break;
766 if (range == NULL) {
767 fl->l_type = F_UNLCK;
768 return(0);
769 }
770 fl->l_type = range->lf_type;
771 fl->l_whence = SEEK_SET;
772 fl->l_start = range->lf_start;
773 if (range->lf_flags & F_NOEND)
774 fl->l_len = 0;
775 else
776 fl->l_len = range->lf_end - range->lf_start + 1;
777 if (range->lf_owner != NULL && (range->lf_flags & F_POSIX))
778 fl->l_pid = range->lf_owner->p_pid;
779 else
780 fl->l_pid = -1;
781 return(0);
782}
783
784/*
785 * Wakeup pending lock attempts. Theoretically we can stop as soon as
786 * we encounter an exclusive request that covers the whole range (at least
787 * insofar as the sleep code above calls lf_wakeup() if it would otherwise
788 * exit instead of loop), but for now just wakeup all overlapping
789 * requests. XXX
790 */
791static void
792lf_wakeup(struct lockf *lock, off_t start, off_t end)
793{
794 struct lockf_range *range, *nrange;
795
796 TAILQ_FOREACH_MUTABLE(range, &lock->lf_blocked, lf_link, nrange) {
797 if (lf_overlap(range, start, end) == 0)
798 continue;
799 TAILQ_REMOVE(&lock->lf_blocked, range, lf_link);
800 range->lf_flags = 1;
801 wakeup(range);
802 }
803}
804
805/*
806 * Allocate a range structure and initialize it sufficiently such that
807 * lf_destroy_range() does not barf.
808 */
809static struct lockf_range *
810lf_alloc_range(void)
811{
812 struct lockf_range *range;
813
814#ifdef INVARIANTS
815 atomic_add_int(&lf_global_counter, 1);
816#endif
817 range = kmalloc(sizeof(struct lockf_range), M_LOCKF, M_WAITOK);
818 range->lf_owner = NULL;
819 return(range);
820}
821
822static void
823lf_insert(struct lockf_range_list *list, struct lockf_range *elm,
824 struct lockf_range *insert_point)
825{
826 while (insert_point && insert_point->lf_start < elm->lf_start)
827 insert_point = TAILQ_NEXT(insert_point, lf_link);
828 if (insert_point != NULL)
829 TAILQ_INSERT_BEFORE(insert_point, elm, lf_link);
830 else
831 TAILQ_INSERT_TAIL(list, elm, lf_link);
832}
833
834static void
835lf_create_range(struct lockf_range *range, struct proc *owner, int type,
836 int flags, off_t start, off_t end)
837{
838 KKASSERT(start <= end);
839 range->lf_type = type;
840 range->lf_flags = flags;
841 range->lf_start = start;
842 range->lf_end = end;
843 range->lf_owner = owner;
844
845 lf_printf("lf_create_range: %lld..%lld\n",
846 range->lf_start, range->lf_end);
847}
848
849static void
850lf_destroy_range(struct lockf_range *range)
851{
852 lf_printf("lf_destroy_range: %lld..%lld\n",
853 range->lf_start, range->lf_end);
854 kfree(range, M_LOCKF);
855#ifdef INVARIANTS
856 atomic_add_int(&lf_global_counter, -1);
857 KKASSERT(lf_global_counter >= 0);
858#endif
859}
860
861#ifdef LOCKF_DEBUG
862
863static void
864_lf_printf(const char *ctl, ...)
865{
866 struct proc *p;
867 __va_list va;
868
869 if (lf_print_ranges) {
870 if ((p = curproc) != NULL)
871 kprintf("pid %d (%s): ", p->p_pid, p->p_comm);
872 }
873 __va_start(va, ctl);
874 kvprintf(ctl, va);
875 __va_end(va);
876}
877
878static void
879_lf_print_lock(const struct lockf *lock)
880{
881 struct lockf_range *range;
882
883 if (lf_print_ranges == 0)
884 return;
885
886 if (TAILQ_EMPTY(&lock->lf_range)) {
887 lf_printf("lockf %p: no ranges locked\n", lock);
888 } else {
889 lf_printf("lockf %p:\n", lock);
890 }
891 TAILQ_FOREACH(range, &lock->lf_range, lf_link)
892 kprintf("\t%lld..%lld type %s owned by %d\n",
893 range->lf_start, range->lf_end,
894 range->lf_type == F_RDLCK ? "shared" : "exclusive",
895 range->lf_flags & F_POSIX ? range->lf_owner->p_pid : -1);
896 if (TAILQ_EMPTY(&lock->lf_blocked))
897 kprintf("no process waiting for range\n");
898 else
899 kprintf("blocked locks:");
900 TAILQ_FOREACH(range, &lock->lf_blocked, lf_link)
901 kprintf("\t%lld..%lld type %s waiting on %p\n",
902 range->lf_start, range->lf_end,
903 range->lf_type == F_RDLCK ? "shared" : "exclusive",
904 range);
905}
906#endif /* LOCKF_DEBUG */