Merge branch 'vendor/OPENSSL'
[dragonfly.git] / sys / vfs / hammer / hammer_subs.c
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
35  */
36 /*
37  * HAMMER structural locking
38  */
39
40 #include "hammer.h"
41 #include <sys/dirent.h>
42
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
45 {
46         thread_t td = curthread;
47         u_int lv;
48         u_int nlv;
49
50         KKASSERT(lock->refs);
51         for (;;) {
52                 lv = lock->lockval;
53
54                 if (lv == 0) {
55                         nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
56                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
57                                 lock->lowner = td;
58                                 break;
59                         }
60                 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
61                            lock->lowner == td) {
62                         nlv = (lv + 1);
63                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
64                                 break;
65                 } else {
66                         if (hammer_debug_locks) {
67                                 kprintf("hammer_lock_ex: held by %p\n",
68                                         lock->lowner);
69                         }
70                         nlv = lv | HAMMER_LOCKF_WANTED;
71                         ++hammer_contention_count;
72                         tsleep_interlock(&lock->lockval, 0);
73                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
74                                 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
75                                 if (hammer_debug_locks)
76                                         kprintf("hammer_lock_ex: try again\n");
77                         }
78                 }
79         }
80 }
81
82 /*
83  * Try to obtain an exclusive lock
84  */
85 int
86 hammer_lock_ex_try(struct hammer_lock *lock)
87 {
88         thread_t td = curthread;
89         int error;
90         u_int lv;
91         u_int nlv;
92
93         KKASSERT(lock->refs);
94         for (;;) {
95                 lv = lock->lockval;
96
97                 if (lv == 0) {
98                         nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
99                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
100                                 lock->lowner = td;
101                                 error = 0;
102                                 break;
103                         }
104                 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
105                            lock->lowner == td) {
106                         nlv = (lv + 1);
107                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
108                                 error = 0;
109                                 break;
110                         }
111                 } else {
112                         error = EAGAIN;
113                         break;
114                 }
115         }
116         return (error);
117 }
118
119 /*
120  * Obtain a shared lock
121  *
122  * We do not give pending exclusive locks priority over shared locks as
123  * doing so could lead to a deadlock.
124  */
125 void
126 hammer_lock_sh(struct hammer_lock *lock)
127 {
128         thread_t td = curthread;
129         u_int lv;
130         u_int nlv;
131         const char *ident = "hmrlck";
132
133         KKASSERT(lock->refs);
134         for (;;) {
135                 lv = lock->lockval;
136
137                 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
138                         nlv = (lv + 1);
139                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
140                                 break;
141                 } else if (lock->lowner == td) {
142                         /*
143                          * Disallowed case, drop into kernel debugger for
144                          * now.  A cont continues w/ an exclusive lock.
145                          */
146                         nlv = (lv + 1);
147                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
148                                 if (hammer_debug_critical)
149                                         Debugger("hammer_lock_sh: holding ex");
150                                 break;
151                         }
152                 } else {
153                         nlv = lv | HAMMER_LOCKF_WANTED;
154                         ++hammer_contention_count;
155                         tsleep_interlock(&lock->lockval, 0);
156                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
157                                 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
158                 }
159         }
160 }
161
162 int
163 hammer_lock_sh_try(struct hammer_lock *lock)
164 {
165         thread_t td = curthread;
166         u_int lv;
167         u_int nlv;
168         int error;
169
170         KKASSERT(lock->refs);
171         for (;;) {
172                 lv = lock->lockval;
173
174                 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
175                         nlv = (lv + 1);
176                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
177                                 error = 0;
178                                 break;
179                         }
180                 } else if (lock->lowner == td) {
181                         /*
182                          * Disallowed case, drop into kernel debugger for
183                          * now.  A cont continues w/ an exclusive lock.
184                          */
185                         nlv = (lv + 1);
186                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
187                                 if (hammer_debug_critical)
188                                         Debugger("hammer_lock_sh: holding ex");
189                                 error = 0;
190                                 break;
191                         }
192                 } else {
193                         error = EAGAIN;
194                         break;
195                 }
196         }
197         return (error);
198 }
199
200 /*
201  * Upgrade a shared lock to an exclusively held lock.  This function will
202  * return EDEADLK If there is more then one shared holder.
203  *
204  * No error occurs and no action is taken if the lock is already exclusively
205  * held by the caller.  If the lock is not held at all or held exclusively
206  * by someone else, this function will panic.
207  */
208 int
209 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
210 {
211         thread_t td = curthread;
212         u_int lv;
213         u_int nlv;
214         int error;
215
216         for (;;) {
217                 lv = lock->lockval;
218
219                 if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
220                         nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
221                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
222                                 lock->lowner = td;
223                                 error = 0;
224                                 break;
225                         }
226                 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
227                         if (lock->lowner != curthread)
228                                 panic("hammer_lock_upgrade: illegal state");
229                         error = 0;
230                         break;
231                 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
232                         panic("hammer_lock_upgrade: lock is not held");
233                         /* NOT REACHED */
234                         error = EDEADLK;
235                         break;
236                 } else {
237                         error = EDEADLK;
238                         break;
239                 }
240         }
241         return (error);
242 }
243
244 /*
245  * Downgrade an exclusively held lock to a shared lock.
246  */
247 void
248 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
249 {
250         thread_t td __debugvar = curthread;
251         u_int lv;
252         u_int nlv;
253
254         KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
255                  (HAMMER_LOCKF_EXCLUSIVE | shcount));
256         KKASSERT(lock->lowner == td);
257
258         /*
259          * NOTE: Must clear owner before releasing exclusivity
260          */
261         lock->lowner = NULL;
262
263         for (;;) {
264                 lv = lock->lockval;
265                 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
266                 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
267                         if (lv & HAMMER_LOCKF_WANTED)
268                                 wakeup(&lock->lockval);
269                         break;
270                 }
271         }
272 }
273
274 void
275 hammer_unlock(struct hammer_lock *lock)
276 {
277         thread_t td __debugvar = curthread;
278         u_int lv;
279         u_int nlv;
280
281         lv = lock->lockval;
282         KKASSERT(lv != 0);
283         if (lv & HAMMER_LOCKF_EXCLUSIVE)
284                 KKASSERT(lock->lowner == td);
285
286         for (;;) {
287                 lv = lock->lockval;
288                 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
289                 if (nlv > 1) {
290                         nlv = lv - 1;
291                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
292                                 break;
293                 } else if (nlv == 1) {
294                         nlv = 0;
295                         if (lv & HAMMER_LOCKF_EXCLUSIVE)
296                                 lock->lowner = NULL;
297                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
298                                 if (lv & HAMMER_LOCKF_WANTED)
299                                         wakeup(&lock->lockval);
300                                 break;
301                         }
302                 } else {
303                         panic("hammer_unlock: lock %p is not held", lock);
304                 }
305         }
306 }
307
308 /*
309  * The calling thread must be holding a shared or exclusive lock.
310  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
311  */
312 int
313 hammer_lock_status(struct hammer_lock *lock)
314 {
315         u_int lv = lock->lockval;
316
317         if (lv & HAMMER_LOCKF_EXCLUSIVE)
318                 return(1);
319         else if (lv)
320                 return(-1);
321         panic("hammer_lock_status: lock must be held: %p", lock);
322 }
323
324 /*
325  * Bump the ref count for a lock (not the excl/share count, but a separate
326  * structural reference count).  The CHECK flag will be set on a 0->1
327  * transition.
328  *
329  * This function does nothing to serialize races between multple threads.
330  * The caller can interlock it later on to deal with serialization.
331  *
332  * MPSAFE
333  */
334 void
335 hammer_ref(struct hammer_lock *lock)
336 {
337         u_int lv;
338         u_int nlv;
339
340         for (;;) {
341                 lv = lock->refs;
342                 if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
343                         nlv = (lv + 1) | HAMMER_REFS_CHECK;
344                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
345                                 return;
346                 } else {
347                         nlv = (lv + 1);
348                         KKASSERT((int)nlv > 0);
349                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
350                                 return;
351                 }
352         }
353         /* not reached */
354 }
355
356 /*
357  * Drop the ref count for a lock (not the excl/share count, but a separate
358  * structural reference count).  The CHECK flag will be cleared on a 1->0
359  * transition.
360  *
361  * This function does nothing to serialize races between multple threads.
362  *
363  * MPSAFE
364  */
365 void
366 hammer_rel(struct hammer_lock *lock)
367 {
368         u_int lv;
369         u_int nlv;
370
371         for (;;) {
372                 lv = lock->refs;
373                 if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
374                         nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
375                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
376                                 return;
377                 } else {
378                         KKASSERT((int)lv > 0);
379                         nlv = (lv - 1);
380                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
381                                 return;
382                 }
383         }
384         /* not reached */
385 }
386
387 /*
388  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
389  * more sophisticated versions which handle MP transition races and block
390  * when necessary.
391  *
392  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
393  * the interlock for 0->1 transitions or if the CHECK is found to be set.
394  *
395  * This case will return TRUE, the interlock will be held, and the CHECK
396  * bit also set.  Other threads attempting to ref will see the CHECK bit
397  * and block until we clean up.
398  *
399  * FALSE is returned for transitions other than 0->1 when the CHECK bit
400  * is not found to be set, or if the function loses the race with another
401  * thread.
402  *
403  * TRUE is only returned to one thread and the others will block.
404  * Effectively a TRUE indicator means 'someone transitioned 0->1
405  * and you are the first guy to successfully lock it after that, so you
406  * need to check'.  Due to races the ref-count may be greater than 1 upon
407  * return.
408  *
409  * MPSAFE
410  */
411 int
412 hammer_ref_interlock(struct hammer_lock *lock)
413 {
414         u_int lv;
415         u_int nlv;
416
417         /*
418          * Integrated reference count bump, lock, and check, with hot-path.
419          *
420          * (a) Return 1 (+LOCKED, +CHECK)       0->1 transition
421          * (b) Return 0 (-LOCKED, -CHECK)       N->N+1 transition
422          * (c) Break out (+CHECK)               Check condition and Cannot lock
423          * (d) Return 1 (+LOCKED, +CHECK)       Successfully locked
424          */
425         for (;;) {
426                 lv = lock->refs;
427                 if (lv == 0) {
428                         nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
429                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
430                                 lock->rowner = curthread;
431                                 return(1);
432                         }
433                 } else {
434                         nlv = (lv + 1);
435                         if ((lv & ~HAMMER_REFS_FLAGS) == 0)
436                                 nlv |= HAMMER_REFS_CHECK;
437                         if ((nlv & HAMMER_REFS_CHECK) == 0) {
438                                 if (atomic_cmpset_int(&lock->refs, lv, nlv))
439                                         return(0);
440                         } else if (lv & HAMMER_REFS_LOCKED) {
441                                 /* CHECK also set here */
442                                 if (atomic_cmpset_int(&lock->refs, lv, nlv))
443                                         break;
444                         } else {
445                                 /* CHECK also set here */
446                                 nlv |= HAMMER_REFS_LOCKED;
447                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
448                                         lock->rowner = curthread;
449                                         return(1);
450                                 }
451                         }
452                 }
453         }
454
455         /*
456          * Defered check condition because we were unable to acquire the
457          * lock.  We must block until the check condition is cleared due
458          * to a race with another thread, or we are able to acquire the
459          * lock.
460          *
461          * (a) Return 0 (-CHECK)                Another thread handled it
462          * (b) Return 1 (+LOCKED, +CHECK)       We handled it.
463          */
464         for (;;) {
465                 lv = lock->refs;
466                 if ((lv & HAMMER_REFS_CHECK) == 0)
467                         return(0);
468                 if (lv & HAMMER_REFS_LOCKED) {
469                         tsleep_interlock(&lock->refs, 0);
470                         nlv = (lv | HAMMER_REFS_WANTED);
471                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
472                                 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
473                 } else {
474                         /* CHECK also set here */
475                         nlv = lv | HAMMER_REFS_LOCKED;
476                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
477                                 lock->rowner = curthread;
478                                 return(1);
479                         }
480                 }
481         }
482         /* not reached */
483 }
484
485 /*
486  * This is the same as hammer_ref_interlock() but asserts that the
487  * 0->1 transition is always true, thus the lock must have no references
488  * on entry or have CHECK set, and will have one reference with the
489  * interlock held on return.  It must also not be interlocked on entry
490  * by anyone.
491  *
492  * NOTE that CHECK will never be found set when the ref-count is 0.
493  *
494  * TRUE is always returned to match the API for hammer_ref_interlock().
495  * This function returns with one ref, the lock held, and the CHECK bit set.
496  */
497 int
498 hammer_ref_interlock_true(struct hammer_lock *lock)
499 {
500         u_int lv;
501         u_int nlv;
502
503         for (;;) {
504                 lv = lock->refs;
505
506                 if (lv) {
507                         panic("hammer_ref_interlock_true: bad lock %p %08x\n",
508                               lock, lock->refs);
509                 }
510                 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
511                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
512                         lock->rowner = curthread;
513                         return (1);
514                 }
515         }
516 }
517
518 /*
519  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
520  * CHECK flag.  The ref-count remains unchanged.
521  *
522  * This routine is called in the load path when the load succeeds.
523  */
524 void
525 hammer_ref_interlock_done(struct hammer_lock *lock)
526 {
527         u_int lv;
528         u_int nlv;
529
530         for (;;) {
531                 lv = lock->refs;
532                 nlv = lv & ~HAMMER_REFS_FLAGS;
533                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
534                         if (lv & HAMMER_REFS_WANTED)
535                                 wakeup(&lock->refs);
536                         break;
537                 }
538         }
539 }
540
541 /*
542  * hammer_rel_interlock() works a bit differently in that it must
543  * acquire the lock in tandem with a 1->0 transition.  CHECK is
544  * not used.
545  *
546  * TRUE is returned on 1->0 transitions with the lock held on return
547  * and FALSE is returned otherwise with the lock not held.
548  *
549  * It is important to note that the refs are not stable and may
550  * increase while we hold the lock, the TRUE indication only means
551  * that we transitioned 1->0, not necessarily that we stayed at 0.
552  *
553  * Another thread bumping refs while we hold the lock will set CHECK,
554  * causing one of the competing hammer_ref_interlock() calls to
555  * return TRUE after we release our lock.
556  *
557  * MPSAFE
558  */
559 int
560 hammer_rel_interlock(struct hammer_lock *lock, int locked)
561 {
562         u_int lv;
563         u_int nlv;
564
565         /*
566          * In locked mode (failure/unload path) we release the
567          * ref-count but leave it locked.
568          */
569         if (locked) {
570                 hammer_rel(lock);
571                 return(1);
572         }
573
574         /*
575          * Integrated reference count drop with LOCKED, plus the hot-path
576          * returns.
577          */
578         for (;;) {
579                 lv = lock->refs;
580
581                 if (lv == 1) {
582                         nlv = 0 | HAMMER_REFS_LOCKED;
583                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
584                                 lock->rowner = curthread;
585                                 return(1);
586                         }
587                 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
588                         if ((lv & HAMMER_REFS_LOCKED) == 0) {
589                                 nlv = (lv - 1) | HAMMER_REFS_LOCKED;
590                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
591                                         lock->rowner = curthread;
592                                         return(1);
593                                 }
594                         } else {
595                                 nlv = lv | HAMMER_REFS_WANTED;
596                                 tsleep_interlock(&lock->refs, 0);
597                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
598                                         tsleep(&lock->refs, PINTERLOCKED,
599                                                "h0lk", 0);
600                                 }
601                         }
602                 } else {
603                         nlv = (lv - 1);
604                         KKASSERT((int)nlv >= 0);
605                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
606                                 return(0);
607                 }
608         }
609         /* not reached */
610 }
611
612 /*
613  * Unlock the interlock acquired by hammer_rel_interlock().
614  *
615  * If orig_locked is non-zero the interlock was originally held prior to
616  * the hammer_rel_interlock() call and passed through to us.  In this
617  * case we want to retain the CHECK error state if not transitioning
618  * to 0.
619  *
620  * The code is the same either way so we do not have to conditionalize
621  * on orig_locked.
622  */
623 void
624 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
625 {
626         u_int lv;
627         u_int nlv;
628
629         for (;;) {
630                 lv = lock->refs;
631                 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
632                 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
633                         nlv &= ~HAMMER_REFS_CHECK;
634                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
635                         if (lv & HAMMER_REFS_WANTED)
636                                 wakeup(&lock->refs);
637                         break;
638                 }
639         }
640 }
641
642 /*
643  * Acquire the interlock on lock->refs.
644  *
645  * Return TRUE if CHECK is currently set.  Note that CHECK will not
646  * be set if the reference count is 0, but can get set if this function
647  * is preceeded by, say, hammer_ref(), or through races with other
648  * threads.  The return value allows the caller to use the same logic
649  * as hammer_ref_interlock().
650  *
651  * MPSAFE
652  */
653 int
654 hammer_get_interlock(struct hammer_lock *lock)
655 {
656         u_int lv;
657         u_int nlv;
658
659         for (;;) {
660                 lv = lock->refs;
661                 if (lv & HAMMER_REFS_LOCKED) {
662                         nlv = lv | HAMMER_REFS_WANTED;
663                         tsleep_interlock(&lock->refs, 0);
664                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
665                                 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
666                 } else {
667                         nlv = (lv | HAMMER_REFS_LOCKED);
668                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
669                                 lock->rowner = curthread;
670                                 return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
671                         }
672                 }
673         }
674 }
675
676 /*
677  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
678  * cache callback code to disassociate or lock the bufs related to HAMMER
679  * structures.
680  *
681  * During teardown the related bp will be acquired by hammer_io_release()
682  * which interocks our test.
683  *
684  * Returns non-zero on success, zero on failure.
685  */
686 int
687 hammer_try_interlock_norefs(struct hammer_lock *lock)
688 {
689         u_int lv;
690         u_int nlv;
691
692         for (;;) {
693                 lv = lock->refs;
694                 if (lv == 0) {
695                         nlv = lv | HAMMER_REFS_LOCKED;
696                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
697                                 lock->rowner = curthread;
698                                 return(1);
699                         }
700                 } else {
701                         return(0);
702                 }
703         }
704         /* not reached */
705 }
706
707 /*
708  * Release the interlock on lock->refs.  This function will set
709  * CHECK if the refs is non-zero and error is non-zero, and clear
710  * CHECK otherwise.
711  *
712  * MPSAFE
713  */
714 void
715 hammer_put_interlock(struct hammer_lock *lock, int error)
716 {
717         u_int lv;
718         u_int nlv;
719
720         for (;;) {
721                 lv = lock->refs;
722                 KKASSERT(lv & HAMMER_REFS_LOCKED);
723                 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
724
725                 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
726                         nlv &= ~HAMMER_REFS_CHECK;
727                 else
728                         nlv |= HAMMER_REFS_CHECK;
729
730                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
731                         if (lv & HAMMER_REFS_WANTED)
732                                 wakeup(&lock->refs);
733                         return;
734                 }
735         }
736 }
737
738 /*
739  * The sync_lock must be held when doing any modifying operations on
740  * meta-data.  It does not have to be held when modifying non-meta-data buffers
741  * (backend or frontend).
742  *
743  * The flusher holds the lock exclusively while all other consumers hold it
744  * shared.  All modifying operations made while holding the lock are atomic
745  * in that they will be made part of the same flush group.
746  *
747  * Due to the atomicy requirement deadlock recovery code CANNOT release the
748  * sync lock, nor can we give pending exclusive sync locks priority over
749  * a shared sync lock as this could lead to a 3-way deadlock.
750  */
751 void
752 hammer_sync_lock_ex(hammer_transaction_t trans)
753 {
754         ++trans->sync_lock_refs;
755         hammer_lock_ex(&trans->hmp->sync_lock);
756 }
757
758 void
759 hammer_sync_lock_sh(hammer_transaction_t trans)
760 {
761         ++trans->sync_lock_refs;
762         hammer_lock_sh(&trans->hmp->sync_lock);
763 }
764
765 int
766 hammer_sync_lock_sh_try(hammer_transaction_t trans)
767 {
768         int error;
769
770         ++trans->sync_lock_refs;
771         if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
772                 --trans->sync_lock_refs;
773         return (error);
774 }
775
776 void
777 hammer_sync_unlock(hammer_transaction_t trans)
778 {
779         --trans->sync_lock_refs;
780         hammer_unlock(&trans->hmp->sync_lock);
781 }
782
783 /*
784  * Misc
785  */
786 u_int32_t
787 hammer_to_unix_xid(uuid_t *uuid)
788 {
789         return(*(u_int32_t *)&uuid->node[2]);
790 }
791
792 void
793 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
794 {
795         bzero(uuid, sizeof(*uuid));
796         *(u_int32_t *)&uuid->node[2] = guid;
797 }
798
799 void
800 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
801 {
802         ts->tv_sec = (unsigned long)(xtime / 1000000);
803         ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
804 }
805
806 u_int64_t
807 hammer_timespec_to_time(struct timespec *ts)
808 {
809         u_int64_t xtime;
810
811         xtime = (unsigned)(ts->tv_nsec / 1000) +
812                 (unsigned long)ts->tv_sec * 1000000ULL;
813         return(xtime);
814 }
815
816
817 /*
818  * Convert a HAMMER filesystem object type to a vnode type
819  */
820 enum vtype
821 hammer_get_vnode_type(u_int8_t obj_type)
822 {
823         switch(obj_type) {
824         case HAMMER_OBJTYPE_DIRECTORY:
825                 return(VDIR);
826         case HAMMER_OBJTYPE_REGFILE:
827                 return(VREG);
828         case HAMMER_OBJTYPE_DBFILE:
829                 return(VDATABASE);
830         case HAMMER_OBJTYPE_FIFO:
831                 return(VFIFO);
832         case HAMMER_OBJTYPE_SOCKET:
833                 return(VSOCK);
834         case HAMMER_OBJTYPE_CDEV:
835                 return(VCHR);
836         case HAMMER_OBJTYPE_BDEV:
837                 return(VBLK);
838         case HAMMER_OBJTYPE_SOFTLINK:
839                 return(VLNK);
840         default:
841                 return(VBAD);
842         }
843         /* not reached */
844 }
845
846 int
847 hammer_get_dtype(u_int8_t obj_type)
848 {
849         switch(obj_type) {
850         case HAMMER_OBJTYPE_DIRECTORY:
851                 return(DT_DIR);
852         case HAMMER_OBJTYPE_REGFILE:
853                 return(DT_REG);
854         case HAMMER_OBJTYPE_DBFILE:
855                 return(DT_DBF);
856         case HAMMER_OBJTYPE_FIFO:
857                 return(DT_FIFO);
858         case HAMMER_OBJTYPE_SOCKET:
859                 return(DT_SOCK);
860         case HAMMER_OBJTYPE_CDEV:
861                 return(DT_CHR);
862         case HAMMER_OBJTYPE_BDEV:
863                 return(DT_BLK);
864         case HAMMER_OBJTYPE_SOFTLINK:
865                 return(DT_LNK);
866         default:
867                 return(DT_UNKNOWN);
868         }
869         /* not reached */
870 }
871
872 u_int8_t
873 hammer_get_obj_type(enum vtype vtype)
874 {
875         switch(vtype) {
876         case VDIR:
877                 return(HAMMER_OBJTYPE_DIRECTORY);
878         case VREG:
879                 return(HAMMER_OBJTYPE_REGFILE);
880         case VDATABASE:
881                 return(HAMMER_OBJTYPE_DBFILE);
882         case VFIFO:
883                 return(HAMMER_OBJTYPE_FIFO);
884         case VSOCK:
885                 return(HAMMER_OBJTYPE_SOCKET);
886         case VCHR:
887                 return(HAMMER_OBJTYPE_CDEV);
888         case VBLK:
889                 return(HAMMER_OBJTYPE_BDEV);
890         case VLNK:
891                 return(HAMMER_OBJTYPE_SOFTLINK);
892         default:
893                 return(HAMMER_OBJTYPE_UNKNOWN);
894         }
895         /* not reached */
896 }
897
898 /*
899  * Return flags for hammer_delete_at_cursor()
900  */
901 int
902 hammer_nohistory(hammer_inode_t ip)
903 {
904         if (ip->hmp->hflags & HMNT_NOHISTORY)
905                 return(HAMMER_DELETE_DESTROY);
906         if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
907                 return(HAMMER_DELETE_DESTROY);
908         return(0);
909 }
910
911 /*
912  * ALGORITHM VERSION 0:
913  *      Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
914  *      crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
915  *      to generate a unique key and will scan all entries with the same upper
916  *      32 bits when issuing a lookup.
917  *
918  *      0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
919  *
920  * ALGORITHM VERSION 1:
921  *
922  *      This algorithm breaks the filename down into a separate 32-bit crcs
923  *      for each filename segment separated by a special character (dot,
924  *      underscore, underline, or tilde).  The CRCs are then added together.
925  *      This allows temporary names.  A full-filename 16 bit crc is also
926  *      generated to deal with degenerate conditions.
927  *
928  *      The algorithm is designed to handle create/rename situations such
929  *      that a create with an extention to a rename without an extention
930  *      only shifts the key space rather than randomizes it.
931  *
932  *      NOTE: The inode allocator cache can only match 10 bits so we do
933  *            not really have any room for a partial sorted name, and
934  *            numbers don't sort well in that situation anyway.
935  *
936  *      0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
937  *
938  *
939  * We strip bit 63 in order to provide a positive key, this way a seek
940  * offset of 0 will represent the base of the directory.
941  *
942  * We usually strip bit 0 (set it to 0) in order to provide a consistent
943  * iteration space for collisions.
944  *
945  * This function can never return 0.  We use the MSB-0 space to synthesize
946  * artificial directory entries such as "." and "..".
947  */
948 int64_t
949 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
950                          u_int32_t *max_iterationsp)
951 {
952         const char *aname = name;
953         int32_t crcx;
954         int64_t key;
955         int i;
956         int j;
957
958         switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
959         case HAMMER_INODE_CAP_DIRHASH_ALG0:
960                 /*
961                  * Original algorithm
962                  */
963                 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
964                 if (key == 0)
965                         key |= 0x100000000LL;
966                 *max_iterationsp = 0xFFFFFFFFU;
967                 break;
968         case HAMMER_INODE_CAP_DIRHASH_ALG1:
969                 /*
970                  * Filesystem version 6 or better will create directories
971                  * using the ALG1 dirhash.  This hash breaks the filename
972                  * up into domains separated by special characters and
973                  * hashes each domain independently.
974                  *
975                  * We also do a simple sub-sort using the first character
976                  * of the filename in the top 5-bits.
977                  */
978                 key = 0;
979
980                 /*
981                  * m32
982                  */
983                 crcx = 0;
984                 for (i = j = 0; i < len; ++i) {
985                         if (aname[i] == '.' ||
986                             aname[i] == '-' ||
987                             aname[i] == '_' ||
988                             aname[i] == '~') {
989                                 if (i != j)
990                                         crcx += crc32(aname + j, i - j);
991                                 j = i + 1;
992                         }
993                 }
994                 if (i != j)
995                         crcx += crc32(aname + j, i - j);
996
997 #if 0
998                 /*
999                  * xor top 5 bits 0mmmm into low bits and steal the top 5
1000                  * bits as a semi sub sort using the first character of
1001                  * the filename.  bit 63 is always left as 0 so directory
1002                  * keys are positive numbers.
1003                  */
1004                 crcx ^= (uint32_t)crcx >> (32 - 5);
1005                 crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1006 #endif
1007                 crcx &= 0x7FFFFFFFU;
1008
1009                 key |= (uint64_t)crcx << 32;
1010
1011                 /*
1012                  * l16 - crc of entire filename
1013                  *
1014                  * This crc reduces degenerate hash collision conditions
1015                  */
1016                 crcx = crc32(aname, len);
1017                 crcx = crcx ^ (crcx << 16);
1018                 key |= crcx & 0xFFFF0000U;
1019
1020                 /*
1021                  * Cleanup
1022                  */
1023                 if ((key & 0xFFFFFFFF00000000LL) == 0)
1024                         key |= 0x100000000LL;
1025                 if (hammer_debug_general & 0x0400) {
1026                         kprintf("namekey2: 0x%016llx %*.*s\n",
1027                                 (long long)key, len, len, aname);
1028                 }
1029                 *max_iterationsp = 0x00FFFFFF;
1030                 break;
1031         case HAMMER_INODE_CAP_DIRHASH_ALG2:
1032         case HAMMER_INODE_CAP_DIRHASH_ALG3:
1033         default:
1034                 key = 0;                        /* compiler warning */
1035                 *max_iterationsp = 1;           /* sanity */
1036                 panic("hammer_directory_namekey: bad algorithm %p\n", dip);
1037                 break;
1038         }
1039         return(key);
1040 }
1041
1042 /*
1043  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1044  * EINVAL on failure.
1045  *
1046  * If this function fails *ispfs, *tidp, and *localizationp will not
1047  * be modified.
1048  */
1049 int
1050 hammer_str_to_tid(const char *str, int *ispfsp,
1051                   hammer_tid_t *tidp, u_int32_t *localizationp)
1052 {
1053         hammer_tid_t tid;
1054         u_int32_t localization;
1055         char *ptr;
1056         int ispfs;
1057         int n;
1058
1059         /*
1060          * Forms allowed for TID:  "0x%016llx"
1061          *                         "-1"
1062          */
1063         tid = strtouq(str, &ptr, 0);
1064         n = ptr - str;
1065         if (n == 2 && str[0] == '-' && str[1] == '1') {
1066                 /* ok */
1067         } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1068                 /* ok */
1069         } else {
1070                 return(EINVAL);
1071         }
1072
1073         /*
1074          * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1075          */
1076         str = ptr;
1077         if (*str == ':') {
1078                 localization = strtoul(str + 1, &ptr, 10) << 16;
1079                 if (ptr - str != 6)
1080                         return(EINVAL);
1081                 str = ptr;
1082                 ispfs = 1;
1083         } else {
1084                 localization = *localizationp;
1085                 ispfs = 0;
1086         }
1087
1088         /*
1089          * Any trailing junk invalidates special extension handling.
1090          */
1091         if (*str)
1092                 return(EINVAL);
1093         *tidp = tid;
1094         *localizationp = localization;
1095         *ispfsp = ispfs;
1096         return(0);
1097 }
1098
1099 void
1100 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1101 {
1102         blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1103 }
1104
1105 void
1106 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1107 {
1108         ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1109                           crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1110 }
1111
1112 int
1113 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1114 {
1115         hammer_crc_t crc;
1116
1117         crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1118         return (blockmap->entry_crc == crc);
1119 }
1120
1121 int
1122 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1123 {
1124         hammer_crc_t crc;
1125
1126         crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1127               crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1128         return (ondisk->vol_crc == crc);
1129 }
1130
1131 int
1132 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1133 {
1134         hammer_crc_t crc;
1135
1136         crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1137         return (ondisk->crc == crc);
1138 }
1139
1140 /*
1141  * Test or set the leaf->data_crc field.  Deal with any special cases given
1142  * a generic B-Tree leaf element and its data.
1143  *
1144  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1145  *       to be updated in-place.
1146  */
1147 int
1148 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1149 {
1150         hammer_crc_t crc;
1151
1152         if (leaf->data_len == 0) {
1153                 crc = 0;
1154         } else {
1155                 switch(leaf->base.rec_type) {
1156                 case HAMMER_RECTYPE_INODE:
1157                         if (leaf->data_len != sizeof(struct hammer_inode_data))
1158                                 return(0);
1159                         crc = crc32(data, HAMMER_INODE_CRCSIZE);
1160                         break;
1161                 default:
1162                         crc = crc32(data, leaf->data_len);
1163                         break;
1164                 }
1165         }
1166         return (leaf->data_crc == crc);
1167 }
1168
1169 void
1170 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1171 {
1172         if (leaf->data_len == 0) {
1173                 leaf->data_crc = 0;
1174         } else {
1175                 switch(leaf->base.rec_type) {
1176                 case HAMMER_RECTYPE_INODE:
1177                         KKASSERT(leaf->data_len ==
1178                                   sizeof(struct hammer_inode_data));
1179                         leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1180                         break;
1181                 default:
1182                         leaf->data_crc = crc32(data, leaf->data_len);
1183                         break;
1184                 }
1185         }
1186 }
1187
1188 void
1189 hkprintf(const char *ctl, ...)
1190 {
1191         __va_list va;
1192
1193         if (hammer_debug_debug) {
1194                 __va_start(va, ctl);
1195                 kvprintf(ctl, va);
1196                 __va_end(va);
1197         }
1198 }
1199
1200 /*
1201  * Return the block size at the specified file offset.
1202  */
1203 int
1204 hammer_blocksize(int64_t file_offset)
1205 {
1206         if (file_offset < HAMMER_XDEMARC)
1207                 return(HAMMER_BUFSIZE);
1208         else
1209                 return(HAMMER_XBUFSIZE);
1210 }
1211
1212 int
1213 hammer_blockoff(int64_t file_offset)
1214 {
1215         if (file_offset < HAMMER_XDEMARC)
1216                 return((int)file_offset & HAMMER_BUFMASK);
1217         else
1218                 return((int)file_offset & HAMMER_XBUFMASK);
1219 }
1220
1221 /*
1222  * Return the demarkation point between the two offsets where
1223  * the block size changes. 
1224  */
1225 int64_t
1226 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1227 {
1228         if (file_offset1 < HAMMER_XDEMARC) {
1229                 if (file_offset2 <= HAMMER_XDEMARC)
1230                         return(file_offset2);
1231                 return(HAMMER_XDEMARC);
1232         }
1233         panic("hammer_blockdemarc: illegal range %lld %lld\n",
1234               (long long)file_offset1, (long long)file_offset2);
1235 }
1236
1237 udev_t
1238 hammer_fsid_to_udev(uuid_t *uuid)
1239 {
1240         u_int32_t crc;
1241
1242         crc = crc32(uuid, sizeof(*uuid));
1243         return((udev_t)crc);
1244 }
1245