kernel: Remove newlines from the panic messages that have one.
[dragonfly.git] / sys / vfs / hammer / hammer_subs.c
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER structural locking
36  */
37
38 #include "hammer.h"
39 #include <sys/dirent.h>
40
41 void
42 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
43 {
44         thread_t td = curthread;
45         u_int lv;
46         u_int nlv;
47
48         KKASSERT(lock->refs);
49         for (;;) {
50                 lv = lock->lockval;
51
52                 if (lv == 0) {
53                         nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
54                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
55                                 lock->lowner = td;
56                                 break;
57                         }
58                 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
59                            lock->lowner == td) {
60                         nlv = (lv + 1);
61                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
62                                 break;
63                 } else {
64                         if (hammer_debug_locks) {
65                                 kprintf("hammer_lock_ex: held by %p\n",
66                                         lock->lowner);
67                         }
68                         nlv = lv | HAMMER_LOCKF_WANTED;
69                         ++hammer_contention_count;
70                         tsleep_interlock(&lock->lockval, 0);
71                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
72                                 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
73                                 if (hammer_debug_locks)
74                                         kprintf("hammer_lock_ex: try again\n");
75                         }
76                 }
77         }
78 }
79
80 /*
81  * Try to obtain an exclusive lock
82  */
83 int
84 hammer_lock_ex_try(struct hammer_lock *lock)
85 {
86         thread_t td = curthread;
87         int error;
88         u_int lv;
89         u_int nlv;
90
91         KKASSERT(lock->refs);
92         for (;;) {
93                 lv = lock->lockval;
94
95                 if (lv == 0) {
96                         nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
97                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
98                                 lock->lowner = td;
99                                 error = 0;
100                                 break;
101                         }
102                 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
103                            lock->lowner == td) {
104                         nlv = (lv + 1);
105                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
106                                 error = 0;
107                                 break;
108                         }
109                 } else {
110                         error = EAGAIN;
111                         break;
112                 }
113         }
114         return (error);
115 }
116
117 /*
118  * Obtain a shared lock
119  *
120  * We do not give pending exclusive locks priority over shared locks as
121  * doing so could lead to a deadlock.
122  */
123 void
124 hammer_lock_sh(struct hammer_lock *lock)
125 {
126         thread_t td = curthread;
127         u_int lv;
128         u_int nlv;
129         const char *ident = "hmrlck";
130
131         KKASSERT(lock->refs);
132         for (;;) {
133                 lv = lock->lockval;
134
135                 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
136                         nlv = (lv + 1);
137                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
138                                 break;
139                 } else if (lock->lowner == td) {
140                         /*
141                          * Disallowed case, drop into kernel debugger for
142                          * now.  A cont continues w/ an exclusive lock.
143                          */
144                         nlv = (lv + 1);
145                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
146                                 if (hammer_debug_critical)
147                                         Debugger("hammer_lock_sh: holding ex");
148                                 break;
149                         }
150                 } else {
151                         nlv = lv | HAMMER_LOCKF_WANTED;
152                         ++hammer_contention_count;
153                         tsleep_interlock(&lock->lockval, 0);
154                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
155                                 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
156                 }
157         }
158 }
159
160 int
161 hammer_lock_sh_try(struct hammer_lock *lock)
162 {
163         thread_t td = curthread;
164         u_int lv;
165         u_int nlv;
166         int error;
167
168         KKASSERT(lock->refs);
169         for (;;) {
170                 lv = lock->lockval;
171
172                 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
173                         nlv = (lv + 1);
174                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
175                                 error = 0;
176                                 break;
177                         }
178                 } else if (lock->lowner == td) {
179                         /*
180                          * Disallowed case, drop into kernel debugger for
181                          * now.  A cont continues w/ an exclusive lock.
182                          */
183                         nlv = (lv + 1);
184                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
185                                 if (hammer_debug_critical)
186                                         Debugger("hammer_lock_sh: holding ex");
187                                 error = 0;
188                                 break;
189                         }
190                 } else {
191                         error = EAGAIN;
192                         break;
193                 }
194         }
195         return (error);
196 }
197
198 /*
199  * Upgrade a shared lock to an exclusively held lock.  This function will
200  * return EDEADLK If there is more then one shared holder.
201  *
202  * No error occurs and no action is taken if the lock is already exclusively
203  * held by the caller.  If the lock is not held at all or held exclusively
204  * by someone else, this function will panic.
205  */
206 int
207 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
208 {
209         thread_t td = curthread;
210         u_int lv;
211         u_int nlv;
212         int error;
213
214         for (;;) {
215                 lv = lock->lockval;
216
217                 if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
218                         nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
219                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
220                                 lock->lowner = td;
221                                 error = 0;
222                                 break;
223                         }
224                 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
225                         if (lock->lowner != curthread)
226                                 panic("hammer_lock_upgrade: illegal state");
227                         error = 0;
228                         break;
229                 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
230                         panic("hammer_lock_upgrade: lock is not held");
231                         /* NOT REACHED */
232                         error = EDEADLK;
233                         break;
234                 } else {
235                         error = EDEADLK;
236                         break;
237                 }
238         }
239         return (error);
240 }
241
242 /*
243  * Downgrade an exclusively held lock to a shared lock.
244  */
245 void
246 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
247 {
248         thread_t td __debugvar = curthread;
249         u_int lv;
250         u_int nlv;
251
252         KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
253                  (HAMMER_LOCKF_EXCLUSIVE | shcount));
254         KKASSERT(lock->lowner == td);
255
256         /*
257          * NOTE: Must clear owner before releasing exclusivity
258          */
259         lock->lowner = NULL;
260
261         for (;;) {
262                 lv = lock->lockval;
263                 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
264                 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
265                         if (lv & HAMMER_LOCKF_WANTED)
266                                 wakeup(&lock->lockval);
267                         break;
268                 }
269         }
270 }
271
272 void
273 hammer_unlock(struct hammer_lock *lock)
274 {
275         thread_t td __debugvar = curthread;
276         u_int lv;
277         u_int nlv;
278
279         lv = lock->lockval;
280         KKASSERT(lv != 0);
281         if (lv & HAMMER_LOCKF_EXCLUSIVE)
282                 KKASSERT(lock->lowner == td);
283
284         for (;;) {
285                 lv = lock->lockval;
286                 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
287                 if (nlv > 1) {
288                         nlv = lv - 1;
289                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
290                                 break;
291                 } else if (nlv == 1) {
292                         nlv = 0;
293                         if (lv & HAMMER_LOCKF_EXCLUSIVE)
294                                 lock->lowner = NULL;
295                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
296                                 if (lv & HAMMER_LOCKF_WANTED)
297                                         wakeup(&lock->lockval);
298                                 break;
299                         }
300                 } else {
301                         panic("hammer_unlock: lock %p is not held", lock);
302                 }
303         }
304 }
305
306 /*
307  * The calling thread must be holding a shared or exclusive lock.
308  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
309  */
310 int
311 hammer_lock_status(struct hammer_lock *lock)
312 {
313         u_int lv = lock->lockval;
314
315         if (lv & HAMMER_LOCKF_EXCLUSIVE)
316                 return(1);
317         else if (lv)
318                 return(-1);
319         panic("hammer_lock_status: lock must be held: %p", lock);
320 }
321
322 /*
323  * Bump the ref count for a lock (not the excl/share count, but a separate
324  * structural reference count).  The CHECK flag will be set on a 0->1
325  * transition.
326  *
327  * This function does nothing to serialize races between multple threads.
328  * The caller can interlock it later on to deal with serialization.
329  *
330  * MPSAFE
331  */
332 void
333 hammer_ref(struct hammer_lock *lock)
334 {
335         u_int lv;
336         u_int nlv;
337
338         for (;;) {
339                 lv = lock->refs;
340                 if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
341                         nlv = (lv + 1) | HAMMER_REFS_CHECK;
342                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
343                                 return;
344                 } else {
345                         nlv = (lv + 1);
346                         KKASSERT((int)nlv > 0);
347                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
348                                 return;
349                 }
350         }
351         /* not reached */
352 }
353
354 /*
355  * Drop the ref count for a lock (not the excl/share count, but a separate
356  * structural reference count).  The CHECK flag will be cleared on a 1->0
357  * transition.
358  *
359  * This function does nothing to serialize races between multple threads.
360  *
361  * MPSAFE
362  */
363 void
364 hammer_rel(struct hammer_lock *lock)
365 {
366         u_int lv;
367         u_int nlv;
368
369         for (;;) {
370                 lv = lock->refs;
371                 if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
372                         nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
373                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
374                                 return;
375                 } else {
376                         KKASSERT((int)lv > 0);
377                         nlv = (lv - 1);
378                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
379                                 return;
380                 }
381         }
382         /* not reached */
383 }
384
385 /*
386  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
387  * more sophisticated versions which handle MP transition races and block
388  * when necessary.
389  *
390  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
391  * the interlock for 0->1 transitions or if the CHECK is found to be set.
392  *
393  * This case will return TRUE, the interlock will be held, and the CHECK
394  * bit also set.  Other threads attempting to ref will see the CHECK bit
395  * and block until we clean up.
396  *
397  * FALSE is returned for transitions other than 0->1 when the CHECK bit
398  * is not found to be set, or if the function loses the race with another
399  * thread.
400  *
401  * TRUE is only returned to one thread and the others will block.
402  * Effectively a TRUE indicator means 'someone transitioned 0->1
403  * and you are the first guy to successfully lock it after that, so you
404  * need to check'.  Due to races the ref-count may be greater than 1 upon
405  * return.
406  *
407  * MPSAFE
408  */
409 int
410 hammer_ref_interlock(struct hammer_lock *lock)
411 {
412         u_int lv;
413         u_int nlv;
414
415         /*
416          * Integrated reference count bump, lock, and check, with hot-path.
417          *
418          * (a) Return 1 (+LOCKED, +CHECK)       0->1 transition
419          * (b) Return 0 (-LOCKED, -CHECK)       N->N+1 transition
420          * (c) Break out (+CHECK)               Check condition and Cannot lock
421          * (d) Return 1 (+LOCKED, +CHECK)       Successfully locked
422          */
423         for (;;) {
424                 lv = lock->refs;
425                 if (lv == 0) {
426                         nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
427                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
428                                 lock->rowner = curthread;
429                                 return(1);
430                         }
431                 } else {
432                         nlv = (lv + 1);
433                         if ((lv & ~HAMMER_REFS_FLAGS) == 0)
434                                 nlv |= HAMMER_REFS_CHECK;
435                         if ((nlv & HAMMER_REFS_CHECK) == 0) {
436                                 if (atomic_cmpset_int(&lock->refs, lv, nlv))
437                                         return(0);
438                         } else if (lv & HAMMER_REFS_LOCKED) {
439                                 /* CHECK also set here */
440                                 if (atomic_cmpset_int(&lock->refs, lv, nlv))
441                                         break;
442                         } else {
443                                 /* CHECK also set here */
444                                 nlv |= HAMMER_REFS_LOCKED;
445                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
446                                         lock->rowner = curthread;
447                                         return(1);
448                                 }
449                         }
450                 }
451         }
452
453         /*
454          * Defered check condition because we were unable to acquire the
455          * lock.  We must block until the check condition is cleared due
456          * to a race with another thread, or we are able to acquire the
457          * lock.
458          *
459          * (a) Return 0 (-CHECK)                Another thread handled it
460          * (b) Return 1 (+LOCKED, +CHECK)       We handled it.
461          */
462         for (;;) {
463                 lv = lock->refs;
464                 if ((lv & HAMMER_REFS_CHECK) == 0)
465                         return(0);
466                 if (lv & HAMMER_REFS_LOCKED) {
467                         tsleep_interlock(&lock->refs, 0);
468                         nlv = (lv | HAMMER_REFS_WANTED);
469                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
470                                 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
471                 } else {
472                         /* CHECK also set here */
473                         nlv = lv | HAMMER_REFS_LOCKED;
474                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
475                                 lock->rowner = curthread;
476                                 return(1);
477                         }
478                 }
479         }
480         /* not reached */
481 }
482
483 /*
484  * This is the same as hammer_ref_interlock() but asserts that the
485  * 0->1 transition is always true, thus the lock must have no references
486  * on entry or have CHECK set, and will have one reference with the
487  * interlock held on return.  It must also not be interlocked on entry
488  * by anyone.
489  *
490  * NOTE that CHECK will never be found set when the ref-count is 0.
491  *
492  * TRUE is always returned to match the API for hammer_ref_interlock().
493  * This function returns with one ref, the lock held, and the CHECK bit set.
494  */
495 int
496 hammer_ref_interlock_true(struct hammer_lock *lock)
497 {
498         u_int lv;
499         u_int nlv;
500
501         for (;;) {
502                 lv = lock->refs;
503
504                 if (lv) {
505                         panic("hammer_ref_interlock_true: bad lock %p %08x",
506                               lock, lock->refs);
507                 }
508                 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
509                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
510                         lock->rowner = curthread;
511                         return (1);
512                 }
513         }
514 }
515
516 /*
517  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
518  * CHECK flag.  The ref-count remains unchanged.
519  *
520  * This routine is called in the load path when the load succeeds.
521  */
522 void
523 hammer_ref_interlock_done(struct hammer_lock *lock)
524 {
525         u_int lv;
526         u_int nlv;
527
528         for (;;) {
529                 lv = lock->refs;
530                 nlv = lv & ~HAMMER_REFS_FLAGS;
531                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
532                         if (lv & HAMMER_REFS_WANTED)
533                                 wakeup(&lock->refs);
534                         break;
535                 }
536         }
537 }
538
539 /*
540  * hammer_rel_interlock() works a bit differently in that it must
541  * acquire the lock in tandem with a 1->0 transition.  CHECK is
542  * not used.
543  *
544  * TRUE is returned on 1->0 transitions with the lock held on return
545  * and FALSE is returned otherwise with the lock not held.
546  *
547  * It is important to note that the refs are not stable and may
548  * increase while we hold the lock, the TRUE indication only means
549  * that we transitioned 1->0, not necessarily that we stayed at 0.
550  *
551  * Another thread bumping refs while we hold the lock will set CHECK,
552  * causing one of the competing hammer_ref_interlock() calls to
553  * return TRUE after we release our lock.
554  *
555  * MPSAFE
556  */
557 int
558 hammer_rel_interlock(struct hammer_lock *lock, int locked)
559 {
560         u_int lv;
561         u_int nlv;
562
563         /*
564          * In locked mode (failure/unload path) we release the
565          * ref-count but leave it locked.
566          */
567         if (locked) {
568                 hammer_rel(lock);
569                 return(1);
570         }
571
572         /*
573          * Integrated reference count drop with LOCKED, plus the hot-path
574          * returns.
575          */
576         for (;;) {
577                 lv = lock->refs;
578
579                 if (lv == 1) {
580                         nlv = 0 | HAMMER_REFS_LOCKED;
581                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
582                                 lock->rowner = curthread;
583                                 return(1);
584                         }
585                 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
586                         if ((lv & HAMMER_REFS_LOCKED) == 0) {
587                                 nlv = (lv - 1) | HAMMER_REFS_LOCKED;
588                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
589                                         lock->rowner = curthread;
590                                         return(1);
591                                 }
592                         } else {
593                                 nlv = lv | HAMMER_REFS_WANTED;
594                                 tsleep_interlock(&lock->refs, 0);
595                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
596                                         tsleep(&lock->refs, PINTERLOCKED,
597                                                "h0lk", 0);
598                                 }
599                         }
600                 } else {
601                         nlv = (lv - 1);
602                         KKASSERT((int)nlv >= 0);
603                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
604                                 return(0);
605                 }
606         }
607         /* not reached */
608 }
609
610 /*
611  * Unlock the interlock acquired by hammer_rel_interlock().
612  *
613  * If orig_locked is non-zero the interlock was originally held prior to
614  * the hammer_rel_interlock() call and passed through to us.  In this
615  * case we want to retain the CHECK error state if not transitioning
616  * to 0.
617  *
618  * The code is the same either way so we do not have to conditionalize
619  * on orig_locked.
620  */
621 void
622 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
623 {
624         u_int lv;
625         u_int nlv;
626
627         for (;;) {
628                 lv = lock->refs;
629                 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
630                 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
631                         nlv &= ~HAMMER_REFS_CHECK;
632                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
633                         if (lv & HAMMER_REFS_WANTED)
634                                 wakeup(&lock->refs);
635                         break;
636                 }
637         }
638 }
639
640 /*
641  * Acquire the interlock on lock->refs.
642  *
643  * Return TRUE if CHECK is currently set.  Note that CHECK will not
644  * be set if the reference count is 0, but can get set if this function
645  * is preceeded by, say, hammer_ref(), or through races with other
646  * threads.  The return value allows the caller to use the same logic
647  * as hammer_ref_interlock().
648  *
649  * MPSAFE
650  */
651 int
652 hammer_get_interlock(struct hammer_lock *lock)
653 {
654         u_int lv;
655         u_int nlv;
656
657         for (;;) {
658                 lv = lock->refs;
659                 if (lv & HAMMER_REFS_LOCKED) {
660                         nlv = lv | HAMMER_REFS_WANTED;
661                         tsleep_interlock(&lock->refs, 0);
662                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
663                                 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
664                 } else {
665                         nlv = (lv | HAMMER_REFS_LOCKED);
666                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
667                                 lock->rowner = curthread;
668                                 return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
669                         }
670                 }
671         }
672 }
673
674 /*
675  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
676  * cache callback code to disassociate or lock the bufs related to HAMMER
677  * structures.
678  *
679  * During teardown the related bp will be acquired by hammer_io_release()
680  * which interocks our test.
681  *
682  * Returns non-zero on success, zero on failure.
683  */
684 int
685 hammer_try_interlock_norefs(struct hammer_lock *lock)
686 {
687         u_int lv;
688         u_int nlv;
689
690         for (;;) {
691                 lv = lock->refs;
692                 if (lv == 0) {
693                         nlv = lv | HAMMER_REFS_LOCKED;
694                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
695                                 lock->rowner = curthread;
696                                 return(1);
697                         }
698                 } else {
699                         return(0);
700                 }
701         }
702         /* not reached */
703 }
704
705 /*
706  * Release the interlock on lock->refs.  This function will set
707  * CHECK if the refs is non-zero and error is non-zero, and clear
708  * CHECK otherwise.
709  *
710  * MPSAFE
711  */
712 void
713 hammer_put_interlock(struct hammer_lock *lock, int error)
714 {
715         u_int lv;
716         u_int nlv;
717
718         for (;;) {
719                 lv = lock->refs;
720                 KKASSERT(lv & HAMMER_REFS_LOCKED);
721                 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
722
723                 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
724                         nlv &= ~HAMMER_REFS_CHECK;
725                 else
726                         nlv |= HAMMER_REFS_CHECK;
727
728                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
729                         if (lv & HAMMER_REFS_WANTED)
730                                 wakeup(&lock->refs);
731                         return;
732                 }
733         }
734 }
735
736 /*
737  * The sync_lock must be held when doing any modifying operations on
738  * meta-data.  It does not have to be held when modifying non-meta-data buffers
739  * (backend or frontend).
740  *
741  * The flusher holds the lock exclusively while all other consumers hold it
742  * shared.  All modifying operations made while holding the lock are atomic
743  * in that they will be made part of the same flush group.
744  *
745  * Due to the atomicy requirement deadlock recovery code CANNOT release the
746  * sync lock, nor can we give pending exclusive sync locks priority over
747  * a shared sync lock as this could lead to a 3-way deadlock.
748  */
749 void
750 hammer_sync_lock_ex(hammer_transaction_t trans)
751 {
752         ++trans->sync_lock_refs;
753         hammer_lock_ex(&trans->hmp->sync_lock);
754 }
755
756 void
757 hammer_sync_lock_sh(hammer_transaction_t trans)
758 {
759         ++trans->sync_lock_refs;
760         hammer_lock_sh(&trans->hmp->sync_lock);
761 }
762
763 int
764 hammer_sync_lock_sh_try(hammer_transaction_t trans)
765 {
766         int error;
767
768         ++trans->sync_lock_refs;
769         if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
770                 --trans->sync_lock_refs;
771         return (error);
772 }
773
774 void
775 hammer_sync_unlock(hammer_transaction_t trans)
776 {
777         --trans->sync_lock_refs;
778         hammer_unlock(&trans->hmp->sync_lock);
779 }
780
781 /*
782  * Misc
783  */
784 u_int32_t
785 hammer_to_unix_xid(uuid_t *uuid)
786 {
787         return(*(u_int32_t *)&uuid->node[2]);
788 }
789
790 void
791 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
792 {
793         bzero(uuid, sizeof(*uuid));
794         *(u_int32_t *)&uuid->node[2] = guid;
795 }
796
797 void
798 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
799 {
800         ts->tv_sec = (unsigned long)(xtime / 1000000);
801         ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
802 }
803
804 u_int64_t
805 hammer_timespec_to_time(struct timespec *ts)
806 {
807         u_int64_t xtime;
808
809         xtime = (unsigned)(ts->tv_nsec / 1000) +
810                 (unsigned long)ts->tv_sec * 1000000ULL;
811         return(xtime);
812 }
813
814
815 /*
816  * Convert a HAMMER filesystem object type to a vnode type
817  */
818 enum vtype
819 hammer_get_vnode_type(u_int8_t obj_type)
820 {
821         switch(obj_type) {
822         case HAMMER_OBJTYPE_DIRECTORY:
823                 return(VDIR);
824         case HAMMER_OBJTYPE_REGFILE:
825                 return(VREG);
826         case HAMMER_OBJTYPE_DBFILE:
827                 return(VDATABASE);
828         case HAMMER_OBJTYPE_FIFO:
829                 return(VFIFO);
830         case HAMMER_OBJTYPE_SOCKET:
831                 return(VSOCK);
832         case HAMMER_OBJTYPE_CDEV:
833                 return(VCHR);
834         case HAMMER_OBJTYPE_BDEV:
835                 return(VBLK);
836         case HAMMER_OBJTYPE_SOFTLINK:
837                 return(VLNK);
838         default:
839                 return(VBAD);
840         }
841         /* not reached */
842 }
843
844 int
845 hammer_get_dtype(u_int8_t obj_type)
846 {
847         switch(obj_type) {
848         case HAMMER_OBJTYPE_DIRECTORY:
849                 return(DT_DIR);
850         case HAMMER_OBJTYPE_REGFILE:
851                 return(DT_REG);
852         case HAMMER_OBJTYPE_DBFILE:
853                 return(DT_DBF);
854         case HAMMER_OBJTYPE_FIFO:
855                 return(DT_FIFO);
856         case HAMMER_OBJTYPE_SOCKET:
857                 return(DT_SOCK);
858         case HAMMER_OBJTYPE_CDEV:
859                 return(DT_CHR);
860         case HAMMER_OBJTYPE_BDEV:
861                 return(DT_BLK);
862         case HAMMER_OBJTYPE_SOFTLINK:
863                 return(DT_LNK);
864         default:
865                 return(DT_UNKNOWN);
866         }
867         /* not reached */
868 }
869
870 u_int8_t
871 hammer_get_obj_type(enum vtype vtype)
872 {
873         switch(vtype) {
874         case VDIR:
875                 return(HAMMER_OBJTYPE_DIRECTORY);
876         case VREG:
877                 return(HAMMER_OBJTYPE_REGFILE);
878         case VDATABASE:
879                 return(HAMMER_OBJTYPE_DBFILE);
880         case VFIFO:
881                 return(HAMMER_OBJTYPE_FIFO);
882         case VSOCK:
883                 return(HAMMER_OBJTYPE_SOCKET);
884         case VCHR:
885                 return(HAMMER_OBJTYPE_CDEV);
886         case VBLK:
887                 return(HAMMER_OBJTYPE_BDEV);
888         case VLNK:
889                 return(HAMMER_OBJTYPE_SOFTLINK);
890         default:
891                 return(HAMMER_OBJTYPE_UNKNOWN);
892         }
893         /* not reached */
894 }
895
896 /*
897  * Return flags for hammer_delete_at_cursor()
898  */
899 int
900 hammer_nohistory(hammer_inode_t ip)
901 {
902         if (ip->hmp->hflags & HMNT_NOHISTORY)
903                 return(HAMMER_DELETE_DESTROY);
904         if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
905                 return(HAMMER_DELETE_DESTROY);
906         return(0);
907 }
908
909 /*
910  * ALGORITHM VERSION 0:
911  *      Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
912  *      crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
913  *      to generate a unique key and will scan all entries with the same upper
914  *      32 bits when issuing a lookup.
915  *
916  *      0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
917  *
918  * ALGORITHM VERSION 1:
919  *
920  *      This algorithm breaks the filename down into a separate 32-bit crcs
921  *      for each filename segment separated by a special character (dot,
922  *      underscore, underline, or tilde).  The CRCs are then added together.
923  *      This allows temporary names.  A full-filename 16 bit crc is also
924  *      generated to deal with degenerate conditions.
925  *
926  *      The algorithm is designed to handle create/rename situations such
927  *      that a create with an extention to a rename without an extention
928  *      only shifts the key space rather than randomizes it.
929  *
930  *      NOTE: The inode allocator cache can only match 10 bits so we do
931  *            not really have any room for a partial sorted name, and
932  *            numbers don't sort well in that situation anyway.
933  *
934  *      0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
935  *
936  *
937  * We strip bit 63 in order to provide a positive key, this way a seek
938  * offset of 0 will represent the base of the directory.
939  *
940  * We usually strip bit 0 (set it to 0) in order to provide a consistent
941  * iteration space for collisions.
942  *
943  * This function can never return 0.  We use the MSB-0 space to synthesize
944  * artificial directory entries such as "." and "..".
945  */
946 int64_t
947 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
948                          u_int32_t *max_iterationsp)
949 {
950         const char *aname = name;
951         int32_t crcx;
952         int64_t key;
953         int i;
954         int j;
955
956         switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
957         case HAMMER_INODE_CAP_DIRHASH_ALG0:
958                 /*
959                  * Original algorithm
960                  */
961                 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
962                 if (key == 0)
963                         key |= 0x100000000LL;
964                 *max_iterationsp = 0xFFFFFFFFU;
965                 break;
966         case HAMMER_INODE_CAP_DIRHASH_ALG1:
967                 /*
968                  * Filesystem version 6 or better will create directories
969                  * using the ALG1 dirhash.  This hash breaks the filename
970                  * up into domains separated by special characters and
971                  * hashes each domain independently.
972                  *
973                  * We also do a simple sub-sort using the first character
974                  * of the filename in the top 5-bits.
975                  */
976                 key = 0;
977
978                 /*
979                  * m32
980                  */
981                 crcx = 0;
982                 for (i = j = 0; i < len; ++i) {
983                         if (aname[i] == '.' ||
984                             aname[i] == '-' ||
985                             aname[i] == '_' ||
986                             aname[i] == '~') {
987                                 if (i != j)
988                                         crcx += crc32(aname + j, i - j);
989                                 j = i + 1;
990                         }
991                 }
992                 if (i != j)
993                         crcx += crc32(aname + j, i - j);
994
995 #if 0
996                 /*
997                  * xor top 5 bits 0mmmm into low bits and steal the top 5
998                  * bits as a semi sub sort using the first character of
999                  * the filename.  bit 63 is always left as 0 so directory
1000                  * keys are positive numbers.
1001                  */
1002                 crcx ^= (uint32_t)crcx >> (32 - 5);
1003                 crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1004 #endif
1005                 crcx &= 0x7FFFFFFFU;
1006
1007                 key |= (uint64_t)crcx << 32;
1008
1009                 /*
1010                  * l16 - crc of entire filename
1011                  *
1012                  * This crc reduces degenerate hash collision conditions
1013                  */
1014                 crcx = crc32(aname, len);
1015                 crcx = crcx ^ (crcx << 16);
1016                 key |= crcx & 0xFFFF0000U;
1017
1018                 /*
1019                  * Cleanup
1020                  */
1021                 if ((key & 0xFFFFFFFF00000000LL) == 0)
1022                         key |= 0x100000000LL;
1023                 if (hammer_debug_general & 0x0400) {
1024                         kprintf("namekey2: 0x%016llx %*.*s\n",
1025                                 (long long)key, len, len, aname);
1026                 }
1027                 *max_iterationsp = 0x00FFFFFF;
1028                 break;
1029         case HAMMER_INODE_CAP_DIRHASH_ALG2:
1030         case HAMMER_INODE_CAP_DIRHASH_ALG3:
1031         default:
1032                 key = 0;                        /* compiler warning */
1033                 *max_iterationsp = 1;           /* sanity */
1034                 panic("hammer_directory_namekey: bad algorithm %p", dip);
1035                 break;
1036         }
1037         return(key);
1038 }
1039
1040 /*
1041  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1042  * EINVAL on failure.
1043  *
1044  * If this function fails *ispfs, *tidp, and *localizationp will not
1045  * be modified.
1046  */
1047 int
1048 hammer_str_to_tid(const char *str, int *ispfsp,
1049                   hammer_tid_t *tidp, u_int32_t *localizationp)
1050 {
1051         hammer_tid_t tid;
1052         u_int32_t localization;
1053         char *ptr;
1054         int ispfs;
1055         int n;
1056
1057         /*
1058          * Forms allowed for TID:  "0x%016llx"
1059          *                         "-1"
1060          */
1061         tid = strtouq(str, &ptr, 0);
1062         n = ptr - str;
1063         if (n == 2 && str[0] == '-' && str[1] == '1') {
1064                 /* ok */
1065         } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1066                 /* ok */
1067         } else {
1068                 return(EINVAL);
1069         }
1070
1071         /*
1072          * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1073          */
1074         str = ptr;
1075         if (*str == ':') {
1076                 localization = strtoul(str + 1, &ptr, 10) << 16;
1077                 if (ptr - str != 6)
1078                         return(EINVAL);
1079                 str = ptr;
1080                 ispfs = 1;
1081         } else {
1082                 localization = *localizationp;
1083                 ispfs = 0;
1084         }
1085
1086         /*
1087          * Any trailing junk invalidates special extension handling.
1088          */
1089         if (*str)
1090                 return(EINVAL);
1091         *tidp = tid;
1092         *localizationp = localization;
1093         *ispfsp = ispfs;
1094         return(0);
1095 }
1096
1097 void
1098 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1099 {
1100         blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1101 }
1102
1103 void
1104 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1105 {
1106         ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1107                           crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1108 }
1109
1110 int
1111 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1112 {
1113         hammer_crc_t crc;
1114
1115         crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1116         return (blockmap->entry_crc == crc);
1117 }
1118
1119 int
1120 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1121 {
1122         hammer_crc_t crc;
1123
1124         crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1125               crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1126         return (ondisk->vol_crc == crc);
1127 }
1128
1129 int
1130 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1131 {
1132         hammer_crc_t crc;
1133
1134         crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1135         return (ondisk->crc == crc);
1136 }
1137
1138 /*
1139  * Test or set the leaf->data_crc field.  Deal with any special cases given
1140  * a generic B-Tree leaf element and its data.
1141  *
1142  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1143  *       to be updated in-place.
1144  */
1145 int
1146 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1147 {
1148         hammer_crc_t crc;
1149
1150         if (leaf->data_len == 0) {
1151                 crc = 0;
1152         } else {
1153                 switch(leaf->base.rec_type) {
1154                 case HAMMER_RECTYPE_INODE:
1155                         if (leaf->data_len != sizeof(struct hammer_inode_data))
1156                                 return(0);
1157                         crc = crc32(data, HAMMER_INODE_CRCSIZE);
1158                         break;
1159                 default:
1160                         crc = crc32(data, leaf->data_len);
1161                         break;
1162                 }
1163         }
1164         return (leaf->data_crc == crc);
1165 }
1166
1167 void
1168 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1169 {
1170         if (leaf->data_len == 0) {
1171                 leaf->data_crc = 0;
1172         } else {
1173                 switch(leaf->base.rec_type) {
1174                 case HAMMER_RECTYPE_INODE:
1175                         KKASSERT(leaf->data_len ==
1176                                   sizeof(struct hammer_inode_data));
1177                         leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1178                         break;
1179                 default:
1180                         leaf->data_crc = crc32(data, leaf->data_len);
1181                         break;
1182                 }
1183         }
1184 }
1185
1186 void
1187 hkprintf(const char *ctl, ...)
1188 {
1189         __va_list va;
1190
1191         if (hammer_debug_debug) {
1192                 __va_start(va, ctl);
1193                 kvprintf(ctl, va);
1194                 __va_end(va);
1195         }
1196 }
1197
1198 /*
1199  * Return the block size at the specified file offset.
1200  */
1201 int
1202 hammer_blocksize(int64_t file_offset)
1203 {
1204         if (file_offset < HAMMER_XDEMARC)
1205                 return(HAMMER_BUFSIZE);
1206         else
1207                 return(HAMMER_XBUFSIZE);
1208 }
1209
1210 int
1211 hammer_blockoff(int64_t file_offset)
1212 {
1213         if (file_offset < HAMMER_XDEMARC)
1214                 return((int)file_offset & HAMMER_BUFMASK);
1215         else
1216                 return((int)file_offset & HAMMER_XBUFMASK);
1217 }
1218
1219 /*
1220  * Return the demarkation point between the two offsets where
1221  * the block size changes. 
1222  */
1223 int64_t
1224 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1225 {
1226         if (file_offset1 < HAMMER_XDEMARC) {
1227                 if (file_offset2 <= HAMMER_XDEMARC)
1228                         return(file_offset2);
1229                 return(HAMMER_XDEMARC);
1230         }
1231         panic("hammer_blockdemarc: illegal range %lld %lld",
1232               (long long)file_offset1, (long long)file_offset2);
1233 }
1234
1235 udev_t
1236 hammer_fsid_to_udev(uuid_t *uuid)
1237 {
1238         u_int32_t crc;
1239
1240         crc = crc32(uuid, sizeof(*uuid));
1241         return((udev_t)crc);
1242 }
1243