Merge from vendor branch OPENSSH:
[dragonfly.git] / sys / kern / kern_lockf.c
1 /*
2  * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>
3  *
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Scooter Morris at Genentech Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *      This product includes software developed by the University of
21  *      California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *      @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
39  * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $
40  * $DragonFly: src/sys/kern/kern_lockf.c,v 1.20 2004/07/10 12:19:27 joerg Exp $
41  */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/proc.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/malloc.h>
51 #include <sys/fcntl.h>
52 #include <sys/resourcevar.h>
53
54 #include <sys/lockf.h>
55 #include <machine/limits.h>     /* for LLONG_MAX */
56 #include <machine/stdarg.h>
57
58 #ifdef INVARIANTS
59 int lf_global_counter = 0;
60 #endif
61
62 #ifdef LOCKF_DEBUG
63 int lf_print_ranges = 0;
64
65 static void _lf_print_lock(const struct lockf *);
66 static void _lf_printf(const char *, ...);
67
68 #define lf_print_lock(lock) if (lf_print_ranges) _lf_print_lock(lock)
69 #define lf_printf(ctl, args...) if (lf_print_ranges) _lf_printf(ctl, args)
70 #else
71 #define lf_print_lock(lock)
72 #define lf_printf(ctl, args...)
73 #endif
74
75 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
76
77 static void     lf_wakeup(struct lockf *, off_t, off_t);
78 static int      lf_overlap(const struct lockf_range *, off_t, off_t);
79 static int      lf_overlap_left(const struct lockf_range *, off_t, off_t);
80 static int      lf_overlap_right(const struct lockf_range *, off_t, off_t);
81 static int      lf_overlap_left2(const struct lockf_range *, off_t, off_t);
82 static int      lf_overlap_right2(const struct lockf_range *, off_t, off_t);
83 static int      lf_overlap_embedded(const struct lockf_range *, off_t, off_t);
84 static struct lockf_range *lf_alloc_range(void);
85 static void     lf_create_range(struct lockf_range *, struct proc *, int, int,
86                                 off_t, off_t, int);
87 static void     lf_destroy_range(struct lockf_range *, int);
88
89 static int      lf_setlock(struct lockf *, struct proc *, int, int,
90                            off_t, off_t);
91 static int      lf_clearlock(struct lockf *, struct proc *, int, int,
92                              off_t, off_t);
93 static int      lf_getlock(struct flock *, struct lockf *, struct proc *,
94                            int, int, off_t, off_t);
95
96 static int      lf_count_change(struct proc *, int);
97
98 /*
99  * Change the POSIX lock accounting for the given process.
100  */
101 void
102 lf_count_adjust(struct proc *p, int increase)
103 {
104         struct uidinfo *uip;
105
106         KKASSERT(p != NULL);
107
108         uip = p->p_ucred->cr_uidinfo;
109
110         if (increase)
111                 uip->ui_posixlocks += p->p_numposixlocks;
112         else
113                 uip->ui_posixlocks -= p->p_numposixlocks;
114
115         KASSERT(uip->ui_posixlocks >= 0,
116                 ("Negative number of POSIX locks held by %s user: %d.",
117                  increase ? "new" : "old", uip->ui_posixlocks));
118 }
119
120 static int
121 lf_count_change(struct proc *owner, int diff)
122 {
123         struct uidinfo *uip;
124         int max;
125
126         /* we might actually not have a process context */
127         if (owner == NULL)
128                 return(0);
129
130         uip = owner->p_ucred->cr_uidinfo;
131
132         max = MIN(owner->p_rlimit[RLIMIT_POSIXLOCKS].rlim_cur,
133                   maxposixlocksperuid);
134         if (diff > 0 && owner->p_ucred->cr_uid != 0 && max != -1 &&
135             uip->ui_posixlocks >= max )
136                 return(1);
137
138         uip->ui_posixlocks += diff;
139
140         KASSERT(uip->ui_posixlocks >= 0,
141                 ("Negative number of POSIX locks held by user: %d.",
142                  uip->ui_posixlocks));
143
144         return(0);
145 }
146
147 /*
148  * Advisory record locking support
149  */
150 int
151 lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size)
152 {
153         struct flock *fl = ap->a_fl;
154         struct proc *owner;
155         off_t start, end;
156         int type, flags, error;
157         lwkt_tokref ilock;
158
159         /*
160          * Convert the flock structure into a start and end.
161          */
162         switch (fl->l_whence) {
163         case SEEK_SET:
164         case SEEK_CUR:
165                 /*
166                  * Caller is responsible for adding any necessary offset
167                  * when SEEK_CUR is used.
168                  */
169                 start = fl->l_start;
170                 break;
171
172         case SEEK_END:
173                 start = size + fl->l_start;
174                 break;
175
176         default:
177                 return(EINVAL);
178         }
179         if (start < 0)
180                 return(EINVAL);
181         if (fl->l_len == 0) {
182                 flags |= F_NOEND;
183                 end = LLONG_MAX;
184         } else {
185                 end = start + fl->l_len - 1;
186                 if (end < start)
187                         return(EINVAL);
188         }
189         
190         flags = ap->a_flags;
191         type = fl->l_type;
192         /*
193          * This isn't really correct for flock-style locks,
194          * but the current handling is somewhat broken anyway.
195          */
196         owner = (struct proc *)ap->a_id;
197
198         /*
199          * Do the requested operation.
200          */
201         lwkt_gettoken(&ilock, lwkt_token_pool_get(lock));
202
203         if (lock->init_done == 0) {
204                 TAILQ_INIT(&lock->lf_range);
205                 TAILQ_INIT(&lock->lf_blocked);
206                 lock->init_done = 1;
207         }
208
209         switch(ap->a_op) {
210         case F_SETLK:
211                 error = lf_setlock(lock, owner, type, flags, start, end);
212                 break;
213
214         case F_UNLCK:
215                 error = lf_clearlock(lock, owner, type, flags, start, end);
216                 break;
217
218         case F_GETLK:
219                 error = lf_getlock(fl, lock, owner, type, flags, start, end);
220                 break;
221
222         default:
223                 error = EINVAL;
224                 break;
225         }
226         lwkt_reltoken(&ilock);
227         return(error);
228 }
229
230 static int
231 lf_setlock(struct lockf *lock, struct proc *owner, int type, int flags,
232            off_t start, off_t end)
233 {
234         struct lockf_range *range, *first_match, *insert_point;
235         int wakeup_needed, lock_needed;
236         /* pre-allocation to avoid blocking in the middle of the algorithm */
237         struct lockf_range *new_range1 = NULL, *new_range2 = NULL;
238         int error = 0;
239         
240         /* for restauration in case of hitting the POSIX lock limit below */
241         struct lockf_range *orig_first_match = NULL;
242         off_t orig_end = -1;
243         int orig_flags = 0;
244
245 restart:
246         if (new_range1 == NULL)
247                 new_range1 = lf_alloc_range();
248         if (new_range2 == NULL)
249                 new_range2 = lf_alloc_range();
250         first_match = NULL;
251         insert_point = NULL;
252         wakeup_needed = 0;
253
254         lf_print_lock(lock);
255
256         TAILQ_FOREACH(range, &lock->lf_range, lf_link) {
257                 if (insert_point == NULL && range->lf_start >= start)
258                         insert_point = range;
259                 if (lf_overlap(range, start, end) == 0)
260                         continue;
261                 if (range->lf_owner == owner) {
262                         if (first_match == NULL)
263                                 first_match = range;
264                         continue;
265                 }
266                 if (type == F_WRLCK || range->lf_type == F_WRLCK)
267                         break;
268         }
269
270         if (range != NULL) {
271                 struct lockf_range *brange;
272
273                 if ((flags & F_WAIT) == 0) {
274                         error = EAGAIN;
275                         goto do_cleanup;
276                 }
277
278                 /*
279                  * We are blocked. For POSIX locks we have to check
280                  * for deadlocks and return with EDEADLK. This is done
281                  * by checking wether range->lf_owner is already
282                  * blocked.
283                  *
284                  * Since flock-style locks cover the whole file, a
285                  * deadlock between those is nearly impossible.
286                  * This can only occur if a process tries to lock the
287                  * same inode exclusively while holding a shared lock
288                  * with another descriptor.
289                  * XXX How can we cleanly detect this?
290                  * XXX The current mixing of flock & fcntl/lockf is evil.
291                  *
292                  * Handle existing locks of flock-style like POSIX locks.
293                  */
294                 if (flags & F_POSIX) {
295                         TAILQ_FOREACH(brange, &lock->lf_blocked, lf_link)
296                                 if (brange->lf_owner == range->lf_owner) {
297                                         error = EDEADLK;
298                                         goto do_cleanup;
299                                 }
300                 }
301                 
302                 /*
303                  * For flock-style locks, we must first remove
304                  * any shared locks that we hold before we sleep
305                  * waiting for an exclusive lock.
306                  */
307                 if ((flags & F_FLOCK) && type == F_WRLCK)
308                         lf_clearlock(lock, owner, type, flags, start, end);
309
310                 brange = new_range1;
311                 new_range1 = NULL;
312                 lf_create_range(brange, owner, type, 0, start, end, 0);
313                 TAILQ_INSERT_TAIL(&lock->lf_blocked, brange, lf_link);
314                 error = tsleep(brange, PCATCH, "lockf", 0);
315
316                 /*
317                  * We may have been awaked by a signal and/or by a
318                  * debugger continuing us (in which case we must remove
319                  * ourselves from the blocked list) and/or by another
320                  * process releasing/downgrading a lock (in which case
321                  * we have already been removed from the blocked list
322                  * and our lf_flags field is 1).
323                  */
324                 if (brange->lf_flags == 0)
325                         TAILQ_REMOVE(&lock->lf_blocked, brange, lf_link);
326                 tsleep(brange, 0, "lockfz", 2); /* XXX debug livelock */
327                 lf_destroy_range(brange, 0);
328
329                 if (error)
330                         goto do_cleanup;
331                 goto restart;
332         }
333
334         if (first_match == NULL) {
335                 if (flags & F_POSIX) {
336                         if (lf_count_change(owner, 1)) {
337                                 error = ENOLCK;
338                                 goto do_cleanup;
339                         }
340                 }
341                 range = new_range1;
342                 new_range1 = NULL;
343                 lf_create_range(range, owner, type, flags, start, end, 1);
344                 if (insert_point != NULL)
345                         TAILQ_INSERT_BEFORE(insert_point, range, lf_link);
346                 else
347                         TAILQ_INSERT_TAIL(&lock->lf_range, range, lf_link);
348                 goto do_wakeup;
349         }
350
351         lock_needed = 1;
352
353         if (lf_overlap_left(first_match, start, end)) {
354                 KKASSERT((flags & F_POSIX) != 0);
355                 if (first_match->lf_end > end) {
356                         if (first_match->lf_type == type)
357                                 goto do_wakeup;
358                         if (lf_count_change(owner, 2)) {
359                                 error = ENOLCK;
360                                 goto do_cleanup;
361                         }
362                         range = new_range1;
363                         new_range1 = NULL;
364                         lf_create_range(range, owner, type, flags,
365                                         start, end, 1);
366                         if (insert_point != NULL)
367                                 TAILQ_INSERT_BEFORE(insert_point, range,
368                                                     lf_link);
369                         else
370                                 TAILQ_INSERT_TAIL(&lock->lf_range, range,
371                                                   lf_link);
372                         insert_point = range;
373                         range = new_range2;
374                         new_range2 = NULL;
375                         lf_create_range(range, owner, first_match->lf_type,
376                                         first_match->lf_flags, end + 1,
377                                         first_match->lf_end, 1);
378                         TAILQ_INSERT_AFTER(&lock->lf_range, insert_point,
379                                            range, lf_link);
380                         first_match->lf_flags &= ~F_NOEND;
381                         first_match->lf_end = start - 1;
382                         if (type == F_RDLCK)
383                                 wakeup_needed = 1;
384                         goto do_wakeup;
385                 }
386                 /*
387                  * left match, but not right match
388                  *
389                  * handle the lf_type != type case directly,
390                  * merge the other case with the !lock_needed path.
391                  */
392                 if (first_match->lf_type != type) {
393                         /*
394                          * This is needed if the lockf acquisition below fails.
395                          */
396                         orig_first_match = first_match;
397                         orig_end = first_match->lf_end;
398                         orig_flags = first_match->lf_flags;
399                         first_match->lf_end = start - 1;
400                         first_match->lf_flags &= ~F_NOEND;
401                         if (type == F_RDLCK)
402                                 wakeup_needed = 1;
403                         /* Try to find the next matching range */
404                         range = TAILQ_NEXT(first_match, lf_link);
405                         while (range != NULL) {
406                                 if (range->lf_owner == owner &&
407                                     lf_overlap(range, start, end))
408                                         break;
409                                 range = TAILQ_NEXT(range, lf_link);
410                         }
411                         if (range == NULL)
412                                 goto do_wakeup;
413                         first_match = range;
414                         /* fall through to !left_match behaviour */
415                 } else {
416                         first_match->lf_end = end;
417                         first_match->lf_flags |= flags & F_NOEND;
418                         lock_needed = 0;
419                 }
420         }
421
422         if (lf_overlap_embedded(first_match, start, end)) {
423                 if (first_match != insert_point) {
424                         TAILQ_REMOVE(&lock->lf_range, first_match, lf_link);
425                         TAILQ_INSERT_BEFORE(insert_point, first_match, lf_link);
426                 }
427                 first_match->lf_start = start;
428                 first_match->lf_end = end;
429                 first_match->lf_flags |= flags & F_NOEND;
430                 first_match->lf_type = type;
431                 lock_needed = 0;                
432         }
433
434         if (lock_needed == 0) {
435                 struct lockf_range *nrange;
436
437                 range = TAILQ_NEXT(first_match, lf_link);
438                 while (range != NULL) {
439                         if (range->lf_owner != owner) {
440                                 range = TAILQ_NEXT(range, lf_link);
441                                 continue;
442                         }
443                         if (lf_overlap_embedded(range, start, end)) {
444                                 nrange = TAILQ_NEXT(range, lf_link);
445                                 TAILQ_REMOVE(&lock->lf_range, range,
446                                              lf_link);
447                                 lf_count_change(owner, -1);
448                                 lf_destroy_range(range, 1);
449                                 range = nrange;
450                                 continue;
451                         }
452                         if (lf_overlap_right(range, start, end) == 0) {
453                                 range = TAILQ_NEXT(range, lf_link);
454                                 continue;
455                         }
456                         if (range->lf_type != type) {
457                                 range->lf_start = end + 1;
458                                 nrange = TAILQ_NEXT(range, lf_link);
459                                 TAILQ_REMOVE(&lock->lf_range, range, lf_link);
460                                 while (nrange != NULL) {
461                                         if (nrange->lf_start >= end + 1)
462                                                 break;
463                                         nrange = TAILQ_NEXT(nrange, lf_link);
464                                 }
465                                 if (nrange != NULL)
466                                         TAILQ_INSERT_BEFORE(nrange, range,
467                                                             lf_link);
468                                 else
469                                         TAILQ_INSERT_TAIL(&lock->lf_range,
470                                                           range, lf_link);
471                                 break;
472                         }
473                         first_match->lf_end = range->lf_end;
474                         first_match->lf_flags |=
475                             range->lf_flags & F_NOEND;
476                         TAILQ_REMOVE(&lock->lf_range, range, lf_link);
477                         lf_count_change(owner, -1);
478                         lf_destroy_range(range, 1);
479                         break;
480                 }
481                 goto do_wakeup;
482         }
483
484         if (lf_overlap_right(first_match, start, end)) {
485                 KKASSERT((flags & F_POSIX) != 0);
486                 if (first_match->lf_type == type) {
487                         first_match->lf_start = start;
488                         if (first_match != insert_point) {
489                                 TAILQ_REMOVE(&lock->lf_range, first_match,
490                                              lf_link);
491                                 TAILQ_INSERT_BEFORE(insert_point, first_match,
492                                                     lf_link);
493                         }
494                         goto do_wakeup;
495                 }
496                 if (lf_count_change(owner, 1)) {
497                         if (orig_first_match != NULL) {
498                                 orig_first_match->lf_end = orig_end;
499                                 orig_first_match->lf_flags = orig_end;
500                         }
501                         error = ENOLCK;
502                         goto do_cleanup;
503                 }
504                 first_match->lf_start = end + 1;
505                 KKASSERT(new_range1 != NULL);
506                 range = new_range1;
507                 new_range1 = NULL;
508                 lf_create_range(range, owner, type, flags, start, end, 1);
509                 TAILQ_INSERT_BEFORE(insert_point, range, lf_link);
510                 range = TAILQ_NEXT(first_match, lf_link);
511                 TAILQ_REMOVE(&lock->lf_range, first_match, lf_link);
512                 while (range != NULL) {
513                         if (range->lf_start >= first_match->lf_start)
514                                 break;
515                         range = TAILQ_NEXT(range, lf_link);
516                 }
517                 if (range != NULL)
518                         TAILQ_INSERT_BEFORE(range, first_match, lf_link);
519                 else
520                         TAILQ_INSERT_TAIL(&lock->lf_range, first_match, lf_link);
521                 goto do_wakeup;
522         }
523
524 do_wakeup:
525         lf_print_lock(lock);
526         if (wakeup_needed)
527                 lf_wakeup(lock, start, end);
528         error = 0;
529 do_cleanup:
530         if (new_range1 != NULL)
531                 lf_destroy_range(new_range1, 0);
532         if (new_range2 != NULL)
533                 lf_destroy_range(new_range2, 0);
534         return(error);
535 }
536
537 static int
538 lf_clearlock(struct lockf *lock, struct proc *owner, int type, int flags,
539              off_t start, off_t end)
540 {
541         struct lockf_range *range, *trange;
542         struct lockf_range *new_range;
543         int error = 0;
544
545         new_range = lf_alloc_range();
546
547         TAILQ_FOREACH_MUTABLE(range, &lock->lf_range, lf_link, trange) {
548                 if (range->lf_end < start)
549                         continue;
550                 if (range->lf_start > end)
551                         break;
552                 if (range->lf_owner != owner)
553                         continue;
554                 if (lf_overlap_embedded(range, start, end)) {
555                         TAILQ_REMOVE(&lock->lf_range, range, lf_link);
556                         /* flock-locks are equal */
557                         if (range->lf_flags & F_POSIX)
558                                 lf_count_change(owner, -1);
559                         lf_destroy_range(range, 1);
560                         continue;
561                 }
562                 if (lf_overlap_left2(range, start, end)) {
563                         KKASSERT(range->lf_flags & F_POSIX);
564                         if (lf_overlap_right2(range, start, end)) {
565                                 struct lockf_range *nrange;
566
567                                 if (lf_count_change(owner, 1)) {
568                                         error = ENOLCK;
569                                         goto do_cleanup;
570                                 }
571                                 nrange = new_range;
572                                 new_range = NULL;
573                                 lf_create_range(nrange, range->lf_owner,
574                                     range->lf_type, range->lf_flags,
575                                     end + 1, range->lf_end, 1);
576                                 range->lf_end = start;
577                                 range->lf_flags &= ~F_NOEND;
578                                 for (; range != NULL;
579                                      range = TAILQ_NEXT(range, lf_link))
580                                         if (range->lf_start >= nrange->lf_start)
581                                                 break;
582                                 if (range != NULL)
583                                         TAILQ_INSERT_BEFORE(range, nrange,
584                                                             lf_link);
585                                 else
586                                         TAILQ_INSERT_TAIL(&lock->lf_range,
587                                                           nrange, lf_link);
588                                 break;
589                         }
590                         range->lf_end = start - 1;
591                         range->lf_flags &= ~F_NOEND;
592                         continue;
593                 }
594                 if (lf_overlap_right2(range, start, end)) {
595                         struct lockf_range *nrange = range;
596
597                         KKASSERT(range->lf_flags & F_POSIX);
598
599                         range  = TAILQ_NEXT(range, lf_link);
600                         TAILQ_REMOVE(&lock->lf_range, nrange, lf_link);
601                         for (; range != NULL;
602                              range = TAILQ_NEXT(range, lf_link))
603                                 if (range->lf_start >= nrange->lf_start)
604                                         break;
605                         if (range != NULL)
606                                 TAILQ_INSERT_BEFORE(range, nrange, lf_link);
607                         else
608                                 TAILQ_INSERT_TAIL(&lock->lf_range, nrange,
609                                                   lf_link);
610                         break;
611                 }
612         }
613
614         lf_wakeup(lock, start, end);
615         error = 0;
616
617 do_cleanup:
618         if (new_range != NULL)
619                 lf_destroy_range(new_range, 0);
620
621         return(error);
622 }
623
624 /*
625  * Check whether there is a blocking lock,
626  * and if so return its process identifier.
627  */
628 static int
629 lf_getlock(struct flock *fl, struct lockf *lock, struct proc *owner,
630            int type, int flags, off_t start, off_t end)
631 {
632         struct lockf_range *range;
633
634         TAILQ_FOREACH(range, &lock->lf_range, lf_link)
635                 if (range->lf_owner != owner &&
636                     lf_overlap(range, start, end) &&
637                     (type == F_WRLCK || range->lf_type == F_WRLCK))
638                         break;
639         if (range == NULL) {
640                 fl->l_type = F_UNLCK;
641                 return(0);
642         }
643         fl->l_type = range->lf_type;
644         fl->l_whence = SEEK_SET;
645         fl->l_start = range->lf_start;
646         if (range->lf_flags & F_NOEND)
647                 fl->l_len = 0;
648         else
649                 fl->l_len = range->lf_end - range->lf_start + 1;
650         if (range->lf_owner != NULL && (range->lf_flags & F_POSIX))
651                 fl->l_pid = range->lf_owner->p_pid;
652         else
653                 fl->l_pid = -1;
654         return(0);
655 }
656
657 /*
658  * Check wether range and [start, end] overlap.
659  */
660 static int
661 lf_overlap(const struct lockf_range *range, off_t start, off_t end)
662 {
663         if (range->lf_start >= start && range->lf_start <= end)
664                 return(1);
665         else if (start >= range->lf_start && start <= range->lf_end)
666                 return(1);
667         else
668                 return(0);
669 }
670
671 /*
672  * Wakeup pending lock attempts.
673  */
674 static void
675 lf_wakeup(struct lockf *lock, off_t start, off_t end)
676 {
677         struct lockf_range *range, *nrange;
678         TAILQ_FOREACH_MUTABLE(range, &lock->lf_blocked, lf_link, nrange) {
679                 if (lf_overlap(range, start, end) == 0)
680                         continue;
681                 TAILQ_REMOVE(&lock->lf_blocked, range, lf_link);
682                 range->lf_flags = 1;
683                 wakeup(range);
684                 if (lf_overlap_embedded(range, start, end))
685                         break;
686         }
687 }
688
689 static int
690 lf_overlap_left(const struct lockf_range *range, off_t start, off_t end)
691 {
692         if (range->lf_start < start && range->lf_end >= start - 1 &&
693             range->lf_end <= end)
694                 return(1);
695         else
696                 return(0);
697                 
698 }
699
700 static int
701 lf_overlap_right(const struct lockf_range *range, off_t start, off_t end)
702 {
703         if (range->lf_end > end && range->lf_start >= start &&
704             range->lf_start - 1 <= end)
705                 return(1);
706         else
707                 return(0);
708 }
709
710 static int
711 lf_overlap_left2(const struct lockf_range *range, off_t start, off_t end)
712 {
713         if (range->lf_start < start && range->lf_end >= start &&
714             range->lf_end <= end)
715                 return(1);
716         else
717                 return(0);
718                 
719 }
720
721 static int
722 lf_overlap_right2(const struct lockf_range *range, off_t start, off_t end)
723 {
724         if (range->lf_end > end && range->lf_start >= start &&
725             range->lf_start <= end)
726                 return(1);
727         else
728                 return(0);
729 }
730
731 static int
732 lf_overlap_embedded(const struct lockf_range *range, off_t start, off_t end)
733 {
734         if (range->lf_start >= start && range->lf_end <= end)
735                 return(1);
736         else
737                 return(0);
738 }
739
740 /*
741  * Allocate a range structure and initialize it sufficiently such that
742  * lf_destroy_range() does not barf.
743  */
744 static struct lockf_range *
745 lf_alloc_range(void)
746 {
747         struct lockf_range *range;
748
749 #ifdef INVARIANTS
750         lf_global_counter++;
751 #endif
752         range = malloc(sizeof(struct lockf_range), M_LOCKF, M_WAITOK);
753         range->lf_owner = NULL;
754         return(range);
755 }
756
757 static void
758 lf_create_range(struct lockf_range *range, struct proc *owner, int type,
759                 int flags, off_t start, off_t end, int accounting)
760 {
761         KKASSERT(start <= end);
762         if (owner != NULL && (flags & F_POSIX) && accounting)
763                 ++owner->p_numposixlocks;
764         range->lf_type = type;
765         range->lf_flags = flags;
766         range->lf_start = start;
767         range->lf_end = end;
768         range->lf_owner = owner;
769
770         lf_printf("lf_create_range: %lld..%lld\n",
771                         range->lf_start, range->lf_end);
772 }
773
774 static void
775 lf_destroy_range(struct lockf_range *range, int accounting)
776 {
777         struct proc *owner = range->lf_owner;
778         int flags = range->lf_flags;
779
780         lf_printf("lf_destroy_range: %lld..%lld\n",
781                         range->lf_start, range->lf_end);
782
783         free(range, M_LOCKF);
784         if (owner != NULL && (flags & F_POSIX) && accounting) {
785                 --owner->p_numposixlocks;
786                 KASSERT(owner->p_numposixlocks >= 0,
787                         ("Negative number of POSIX locks held by process: %d",
788                          owner->p_numposixlocks));
789         }
790
791 #ifdef INVARIANTS
792         lf_global_counter--;
793         KKASSERT(lf_global_counter>=0);
794 #endif
795 }
796
797 #ifdef LOCKF_DEBUG
798
799 static void
800 _lf_printf(const char *ctl, ...)
801 {
802         struct proc *p;
803         __va_list va;
804
805         if (lf_print_ranges) {
806             if ((p = curproc) != NULL)
807                 printf("pid %d (%s): ", p->p_pid, p->p_comm);
808         }
809         __va_start(va, ctl);
810         vprintf(ctl, va);
811         __va_end(va);
812 }
813
814 static void
815 _lf_print_lock(const struct lockf *lock)
816 {
817         struct lockf_range *range;
818
819         if (lf_print_ranges == 0)
820                 return;
821
822         if (TAILQ_EMPTY(&lock->lf_range)) {
823                 lf_printf("lockf %p: no ranges locked\n", lock);
824         } else {
825                 lf_printf("lockf %p:\n", lock);
826         }
827         TAILQ_FOREACH(range, &lock->lf_range, lf_link)
828                 printf("\t%lld..%lld type %s owned by %d\n",
829                        range->lf_start, range->lf_end,
830                        range->lf_type == F_RDLCK ? "shared" : "exclusive",
831                        range->lf_flags & F_POSIX ? range->lf_owner->p_pid : -1);
832         if (TAILQ_EMPTY(&lock->lf_blocked))
833                 printf("no process waiting for range\n");
834         else
835                 printf("blocked locks:");
836         TAILQ_FOREACH(range, &lock->lf_blocked, lf_link)
837                 printf("\t%lld..%lld type %s waiting on %p\n",
838                        range->lf_start, range->lf_end,
839                        range->lf_type == F_RDLCK ? "shared" : "exclusive",
840                        range);
841 }
842 #endif /* LOCKF_DEBUG */