HAMMER VFS - Major retooling of the refcount mechanics, and fix a deadlock
[dragonfly.git] / sys / vfs / hammer / hammer_subs.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
35  */
36 /*
37  * HAMMER structural locking
38  */
39
40 #include "hammer.h"
41 #include <sys/dirent.h>
42
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
45 {
46         thread_t td = curthread;
47         u_int lv;
48         u_int nlv;
49
50         KKASSERT(lock->refs);
51         for (;;) {
52                 lv = lock->lockval;
53
54                 if (lv == 0) {
55                         nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
56                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
57                                 lock->lowner = td;
58                                 break;
59                         }
60                 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
61                            lock->lowner == td) {
62                         nlv = (lv + 1);
63                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
64                                 break;
65                 } else {
66                         if (hammer_debug_locks) {
67                                 kprintf("hammer_lock_ex: held by %p\n",
68                                         lock->lowner);
69                         }
70                         nlv = lv | HAMMER_LOCKF_WANTED;
71                         ++hammer_contention_count;
72                         tsleep_interlock(&lock->lockval, 0);
73                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
74                                 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
75                                 if (hammer_debug_locks)
76                                         kprintf("hammer_lock_ex: try again\n");
77                         }
78                 }
79         }
80 }
81
82 /*
83  * Try to obtain an exclusive lock
84  */
85 int
86 hammer_lock_ex_try(struct hammer_lock *lock)
87 {
88         thread_t td = curthread;
89         int error;
90         u_int lv;
91         u_int nlv;
92
93         KKASSERT(lock->refs);
94         for (;;) {
95                 lv = lock->lockval;
96
97                 if (lv == 0) {
98                         nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
99                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
100                                 lock->lowner = td;
101                                 error = 0;
102                                 break;
103                         }
104                 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
105                            lock->lowner == td) {
106                         nlv = (lv + 1);
107                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
108                                 error = 0;
109                                 break;
110                         }
111                 } else {
112                         error = EAGAIN;
113                         break;
114                 }
115         }
116         return (error);
117 }
118
119 /*
120  * Obtain a shared lock
121  *
122  * We do not give pending exclusive locks priority over shared locks as
123  * doing so could lead to a deadlock.
124  */
125 void
126 hammer_lock_sh(struct hammer_lock *lock)
127 {
128         thread_t td = curthread;
129         u_int lv;
130         u_int nlv;
131         const char *ident = "hmrlck";
132
133         KKASSERT(lock->refs);
134         for (;;) {
135                 lv = lock->lockval;
136
137                 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
138                         nlv = (lv + 1);
139                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
140                                 break;
141                 } else if (lock->lowner == td) {
142                         /*
143                          * Disallowed case, drop into kernel debugger for
144                          * now.  A cont continues w/ an exclusive lock.
145                          */
146                         nlv = (lv + 1);
147                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
148                                 if (hammer_debug_critical)
149                                         Debugger("hammer_lock_sh: holding ex");
150                                 break;
151                         }
152                 } else {
153                         nlv = lv | HAMMER_LOCKF_WANTED;
154                         ++hammer_contention_count;
155                         tsleep_interlock(&lock->lockval, 0);
156                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
157                                 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
158                 }
159         }
160 }
161
162 int
163 hammer_lock_sh_try(struct hammer_lock *lock)
164 {
165         thread_t td = curthread;
166         u_int lv;
167         u_int nlv;
168         int error;
169
170         KKASSERT(lock->refs);
171         for (;;) {
172                 lv = lock->lockval;
173
174                 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
175                         nlv = (lv + 1);
176                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
177                                 error = 0;
178                                 break;
179                         }
180                 } else if (lock->lowner == td) {
181                         /*
182                          * Disallowed case, drop into kernel debugger for
183                          * now.  A cont continues w/ an exclusive lock.
184                          */
185                         nlv = (lv + 1);
186                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
187                                 if (hammer_debug_critical)
188                                         Debugger("hammer_lock_sh: holding ex");
189                                 error = 0;
190                                 break;
191                         }
192                 } else {
193                         error = EAGAIN;
194                         break;
195                 }
196         }
197         return (error);
198 }
199
200 /*
201  * Upgrade a shared lock to an exclusively held lock.  This function will
202  * return EDEADLK If there is more then one shared holder.
203  *
204  * No error occurs and no action is taken if the lock is already exclusively
205  * held by the caller.  If the lock is not held at all or held exclusively
206  * by someone else, this function will panic.
207  */
208 int
209 hammer_lock_upgrade(struct hammer_lock *lock)
210 {
211         thread_t td = curthread;
212         u_int lv;
213         u_int nlv;
214         int error;
215
216         for (;;) {
217                 lv = lock->lockval;
218
219                 if ((lv & ~HAMMER_LOCKF_WANTED) == 1) {
220                         nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
221                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
222                                 lock->lowner = td;
223                                 error = 0;
224                                 break;
225                         }
226                 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
227                         if (lock->lowner != curthread)
228                                 panic("hammer_lock_upgrade: illegal state");
229                         error = 0;
230                         break;
231                 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
232                         panic("hammer_lock_upgrade: lock is not held");
233                         /* NOT REACHED */
234                         error = EDEADLK;
235                         break;
236                 } else {
237                         error = EDEADLK;
238                         break;
239                 }
240         }
241         return (error);
242 }
243
244 /*
245  * Downgrade an exclusively held lock to a shared lock.
246  */
247 void
248 hammer_lock_downgrade(struct hammer_lock *lock)
249 {
250         thread_t td __debugvar = curthread;
251         u_int lv;
252         u_int nlv;
253
254         KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
255                  (HAMMER_LOCKF_EXCLUSIVE | 1));
256         KKASSERT(lock->lowner == td);
257
258         /*
259          * NOTE: Must clear owner before releasing exclusivity
260          */
261         lock->lowner = NULL;
262
263         for (;;) {
264                 lv = lock->lockval;
265                 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
266                 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
267                         if (lv & HAMMER_LOCKF_WANTED)
268                                 wakeup(&lock->lockval);
269                         break;
270                 }
271         }
272 }
273
274 void
275 hammer_unlock(struct hammer_lock *lock)
276 {
277         thread_t td __debugvar = curthread;
278         u_int lv;
279         u_int nlv;
280
281         lv = lock->lockval;
282         KKASSERT(lv != 0);
283         if (lv & HAMMER_LOCKF_EXCLUSIVE)
284                 KKASSERT(lock->lowner == td);
285
286         for (;;) {
287                 lv = lock->lockval;
288                 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
289                 if (nlv > 1) {
290                         nlv = lv - 1;
291                         if (atomic_cmpset_int(&lock->lockval, lv, nlv))
292                                 break;
293                 } else if (nlv == 1) {
294                         nlv = 0;
295                         if (lv & HAMMER_LOCKF_EXCLUSIVE)
296                                 lock->lowner = NULL;
297                         if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
298                                 if (lv & HAMMER_LOCKF_WANTED)
299                                         wakeup(&lock->lockval);
300                                 break;
301                         }
302                 } else {
303                         panic("hammer_unlock: lock %p is not held", lock);
304                 }
305         }
306 }
307
308 /*
309  * The calling thread must be holding a shared or exclusive lock.
310  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
311  */
312 int
313 hammer_lock_status(struct hammer_lock *lock)
314 {
315         u_int lv = lock->lockval;
316
317         if (lv & HAMMER_LOCKF_EXCLUSIVE)
318                 return(1);
319         else if (lv)
320                 return(-1);
321         panic("hammer_lock_status: lock must be held: %p", lock);
322 }
323
324 /*
325  * Bump the ref count for a lock (not the excl/share count, but a separate
326  * structural reference count).  The CHECK flag will be set on a 0->1
327  * transition.
328  *
329  * This function does nothing to serialize races between multple threads.
330  * The caller can interlock it later on to deal with serialization.
331  *
332  * MPSAFE
333  */
334 void
335 hammer_ref(struct hammer_lock *lock)
336 {
337         u_int lv;
338         u_int nlv;
339
340         for (;;) {
341                 lv = lock->refs;
342                 if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
343                         nlv = (lv + 1) | HAMMER_REFS_CHECK;
344                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
345                                 return;
346                 } else {
347                         nlv = (lv + 1);
348                         KKASSERT((int)nlv > 0);
349                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
350                                 return;
351                 }
352         }
353         /* not reached */
354 }
355
356 /*
357  * Drop the ref count for a lock (not the excl/share count, but a separate
358  * structural reference count).  The CHECK flag will be cleared on a 1->0
359  * transition.
360  *
361  * This function does nothing to serialize races between multple threads.
362  *
363  * MPSAFE
364  */
365 void
366 hammer_rel(struct hammer_lock *lock)
367 {
368         u_int lv;
369         u_int nlv;
370
371         for (;;) {
372                 lv = lock->refs;
373                 if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
374                         nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
375                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
376                                 return;
377                 } else {
378                         KKASSERT((int)lv > 0);
379                         nlv = (lv - 1);
380                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
381                                 return;
382                 }
383         }
384         /* not reached */
385 }
386
387 /*
388  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
389  * more sophisticated versions which handle MP transition races and block
390  * when necessary.
391  *
392  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
393  * the interlock for 0->1 transitions or if the CHECK is found to be set.
394  *
395  * This case will return TRUE, the interlock will be held, and the CHECK
396  * bit also set.  Other threads attempting to ref will see the CHECK bit
397  * and block until we clean up.
398  *
399  * FALSE is returned for transitions other than 0->1 when the CHECK bit
400  * is not found to be set, or if the function loses the race with another
401  * thread.
402  *
403  * TRUE is only returned to one thread and the others will block.
404  * Effectively a TRUE indicator means 'someone transitioned 0->1
405  * and you are the first guy to successfully lock it after that, so you
406  * need to check'.  Due to races the ref-count may be greater than 1 upon
407  * return.
408  *
409  * MPSAFE
410  */
411 int
412 hammer_ref_interlock(struct hammer_lock *lock)
413 {
414         u_int lv;
415         u_int nlv;
416
417         /*
418          * Integrated reference count bump, lock, and check, with hot-path.
419          *
420          * (a) Return 1 (+LOCKED, +CHECK)       0->1 transition
421          * (b) Return 0 (-LOCKED, -CHECK)       N->N+1 transition
422          * (c) Break out (+CHECK)               Check condition and Cannot lock
423          * (d) Return 1 (+LOCKED, +CHECK)       Successfully locked
424          */
425         for (;;) {
426                 lv = lock->refs;
427                 if (lv == 0) {
428                         nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
429                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
430                                 lock->rowner = curthread;
431                                 return(1);
432                         }
433                 } else {
434                         nlv = (lv + 1);
435                         if ((lv & ~HAMMER_REFS_FLAGS) == 0)
436                                 nlv |= HAMMER_REFS_CHECK;
437                         if ((nlv & HAMMER_REFS_CHECK) == 0) {
438                                 if (atomic_cmpset_int(&lock->refs, lv, nlv))
439                                         return(0);
440                         } else if (lv & HAMMER_REFS_LOCKED) {
441                                 /* CHECK also set here */
442                                 if (atomic_cmpset_int(&lock->refs, lv, nlv))
443                                         break;
444                         } else {
445                                 /* CHECK also set here */
446                                 nlv |= HAMMER_REFS_LOCKED;
447                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
448                                         lock->rowner = curthread;
449                                         return(1);
450                                 }
451                         }
452                 }
453         }
454
455         /*
456          * Defered check condition because we were unable to acquire the
457          * lock.  We must block until the check condition is cleared due
458          * to a race with another thread, or we are able to acquire the
459          * lock.
460          *
461          * (a) Return 0 (-CHECK)                Another thread handled it
462          * (b) Return 1 (+LOCKED, +CHECK)       We handled it.
463          */
464         for (;;) {
465                 lv = lock->refs;
466                 if ((lv & HAMMER_REFS_CHECK) == 0)
467                         return(0);
468                 if (lv & HAMMER_REFS_LOCKED) {
469                         tsleep_interlock(&lock->refs, 0);
470                         nlv = (lv | HAMMER_REFS_WANTED);
471                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
472                                 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
473                 } else {
474                         /* CHECK also set here */
475                         nlv = lv | HAMMER_REFS_LOCKED;
476                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
477                                 lock->rowner = curthread;
478                                 return(1);
479                         }
480                 }
481         }
482         /* not reached */
483 }
484
485 /*
486  * This is the same as hammer_ref_interlock() but asserts that the
487  * 0->1 transition is always true, thus the lock must have no references
488  * on entry or have CHECK set, and will have one reference with the
489  * interlock held on return.  It must also not be interlocked on entry
490  * by anyone.
491  *
492  * NOTE that CHECK will never be found set when the ref-count is 0.
493  *
494  * TRUE is always returned to match the API for hammer_ref_interlock().
495  * This function returns with one ref, the lock held, and the CHECK bit set.
496  */
497 int
498 hammer_ref_interlock_true(struct hammer_lock *lock)
499 {
500         u_int lv;
501         u_int nlv;
502
503         for (;;) {
504                 lv = lock->refs;
505
506                 if (lv) {
507                         panic("hammer_ref_interlock_true: bad lock %p %08x\n",
508                               lock, lock->refs);
509                 }
510                 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
511                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
512                         lock->rowner = curthread;
513                         return (1);
514                 }
515         }
516 }
517
518 /*
519  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
520  * CHECK flag.  The ref-count remains unchanged.
521  *
522  * This routine is called in the load path when the load succeeds.
523  */
524 void
525 hammer_ref_interlock_done(struct hammer_lock *lock)
526 {
527         u_int lv;
528         u_int nlv;
529
530         for (;;) {
531                 lv = lock->refs;
532                 nlv = lv & ~HAMMER_REFS_FLAGS;
533                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
534                         if (lv & HAMMER_REFS_WANTED)
535                                 wakeup(&lock->refs);
536                         break;
537                 }
538         }
539 }
540
541 /*
542  * hammer_rel_interlock() works a bit differently in that it must
543  * acquire the lock in tandem with a 1->0 transition.  CHECK is
544  * not used.
545  *
546  * TRUE is returned on 1->0 transitions with the lock held on return
547  * and FALSE is returned otherwise with the lock not held.
548  *
549  * It is important to note that the refs are not stable and may
550  * increase while we hold the lock, the TRUE indication only means
551  * that we transitioned 1->0, not necessarily that we stayed at 0.
552  *
553  * Another thread bumping refs while we hold the lock will set CHECK,
554  * causing one of the competing hammer_ref_interlock() calls to
555  * return TRUE after we release our lock.
556  *
557  * MPSAFE
558  */
559 int
560 hammer_rel_interlock(struct hammer_lock *lock, int locked)
561 {
562         u_int lv;
563         u_int nlv;
564
565         /*
566          * In locked mode (failure/unload path) we release the
567          * ref-count but leave it locked.
568          */
569         if (locked) {
570                 hammer_rel(lock);
571                 return(1);
572         }
573
574         /*
575          * Integrated reference count drop with LOCKED, plus the hot-path
576          * returns.
577          */
578         for (;;) {
579                 lv = lock->refs;
580
581                 if (lv == 1) {
582                         nlv = 0 | HAMMER_REFS_LOCKED;
583                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
584                                 lock->rowner = curthread;
585                                 return(1);
586                         }
587                 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
588                         if ((lv & HAMMER_REFS_LOCKED) == 0) {
589                                 nlv = (lv - 1) | HAMMER_REFS_LOCKED;
590                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
591                                         lock->rowner = curthread;
592                                         return(1);
593                                 }
594                         } else {
595                                 nlv = lv | HAMMER_REFS_WANTED;
596                                 tsleep_interlock(&lock->refs, 0);
597                                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
598                                         tsleep(&lock->refs, PINTERLOCKED,
599                                                "h0lk", 0);
600                                 }
601                         }
602                 } else {
603                         nlv = (lv - 1);
604                         KKASSERT((int)nlv >= 0);
605                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
606                                 return(0);
607                 }
608         }
609         /* not reached */
610 }
611
612 /*
613  * Unlock the interlock acquired by hammer_rel_interlock().
614  *
615  * If orig_locked is non-zero the interlock was originally held prior to
616  * the hammer_rel_interlock() call and passed through to us.  In this
617  * case we want to retain the CHECK error state if not transitioning
618  * to 0.
619  *
620  * The code is the same either way so we do not have to conditionalize
621  * on orig_locked.
622  */
623 void
624 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
625 {
626         u_int lv;
627         u_int nlv;
628
629         for (;;) {
630                 lv = lock->refs;
631                 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
632                 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
633                         nlv &= ~HAMMER_REFS_CHECK;
634                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
635                         if (lv & HAMMER_REFS_WANTED)
636                                 wakeup(&lock->refs);
637                         break;
638                 }
639         }
640 }
641
642 /*
643  * Acquire the interlock on lock->refs.
644  *
645  * Return TRUE if CHECK is currently set.  Note that CHECK will not
646  * be set if the reference count is 0, but can get set if this function
647  * is preceeded by, say, hammer_ref(), or through races with other
648  * threads.  The return value allows the caller to use the same logic
649  * as hammer_ref_interlock().
650  *
651  * MPSAFE
652  */
653 int
654 hammer_get_interlock(struct hammer_lock *lock)
655 {
656         u_int lv;
657         u_int nlv;
658
659         for (;;) {
660                 lv = lock->refs;
661                 if (lv & HAMMER_REFS_LOCKED) {
662                         nlv = lv | HAMMER_REFS_WANTED;
663                         tsleep_interlock(&lock->refs, 0);
664                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
665                                 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
666                 } else {
667                         nlv = (lv | HAMMER_REFS_LOCKED);
668                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
669                                 lock->rowner = curthread;
670                                 return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
671                         }
672                 }
673         }
674 }
675
676 /*
677  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
678  * cache callback code to disassociate or lock the bufs related to HAMMER
679  * structures.
680  *
681  * During teardown the related bp will be acquired by hammer_io_release()
682  * which interocks our test.
683  *
684  * Returns non-zero on success, zero on failure.
685  */
686 int
687 hammer_try_interlock_norefs(struct hammer_lock *lock)
688 {
689         u_int lv;
690         u_int nlv;
691
692         for (;;) {
693                 lv = lock->refs;
694                 if (lv == 0) {
695                         nlv = lv | HAMMER_REFS_LOCKED;
696                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
697                                 lock->rowner = curthread;
698                                 return(1);
699                         }
700                 } else {
701                         return(0);
702                 }
703         }
704         /* not reached */
705 }
706
707 /*
708  * Release the interlock on lock->refs.  This function will set
709  * CHECK if the refs is non-zero and error is non-zero, and clear
710  * CHECK otherwise.
711  *
712  * MPSAFE
713  */
714 void
715 hammer_put_interlock(struct hammer_lock *lock, int error)
716 {
717         u_int lv;
718         u_int nlv;
719
720         for (;;) {
721                 lv = lock->refs;
722                 KKASSERT(lv & HAMMER_REFS_LOCKED);
723                 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
724
725                 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
726                         nlv &= ~HAMMER_REFS_CHECK;
727                 else
728                         nlv |= HAMMER_REFS_CHECK;
729
730                 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
731                         if (lv & HAMMER_REFS_WANTED)
732                                 wakeup(&lock->refs);
733                         return;
734                 }
735         }
736 }
737
738 /*
739  * The sync_lock must be held when doing any modifying operations on
740  * meta-data.  It does not have to be held when modifying non-meta-data buffers
741  * (backend or frontend).
742  *
743  * The flusher holds the lock exclusively while all other consumers hold it
744  * shared.  All modifying operations made while holding the lock are atomic
745  * in that they will be made part of the same flush group.
746  *
747  * Due to the atomicy requirement deadlock recovery code CANNOT release the
748  * sync lock, nor can we give pending exclusive sync locks priority over
749  * a shared sync lock as this could lead to a 3-way deadlock.
750  */
751 void
752 hammer_sync_lock_ex(hammer_transaction_t trans)
753 {
754         ++trans->sync_lock_refs;
755         hammer_lock_ex(&trans->hmp->sync_lock);
756 }
757
758 void
759 hammer_sync_lock_sh(hammer_transaction_t trans)
760 {
761         ++trans->sync_lock_refs;
762         hammer_lock_sh(&trans->hmp->sync_lock);
763 }
764
765 int
766 hammer_sync_lock_sh_try(hammer_transaction_t trans)
767 {
768         int error;
769
770         ++trans->sync_lock_refs;
771         if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
772                 --trans->sync_lock_refs;
773         return (error);
774 }
775
776 void
777 hammer_sync_unlock(hammer_transaction_t trans)
778 {
779         --trans->sync_lock_refs;
780         hammer_unlock(&trans->hmp->sync_lock);
781 }
782
783 /*
784  * Misc
785  */
786 u_int32_t
787 hammer_to_unix_xid(uuid_t *uuid)
788 {
789         return(*(u_int32_t *)&uuid->node[2]);
790 }
791
792 void
793 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
794 {
795         bzero(uuid, sizeof(*uuid));
796         *(u_int32_t *)&uuid->node[2] = guid;
797 }
798
799 void
800 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
801 {
802         ts->tv_sec = (unsigned long)(xtime / 1000000);
803         ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
804 }
805
806 u_int64_t
807 hammer_timespec_to_time(struct timespec *ts)
808 {
809         u_int64_t xtime;
810
811         xtime = (unsigned)(ts->tv_nsec / 1000) +
812                 (unsigned long)ts->tv_sec * 1000000ULL;
813         return(xtime);
814 }
815
816
817 /*
818  * Convert a HAMMER filesystem object type to a vnode type
819  */
820 enum vtype
821 hammer_get_vnode_type(u_int8_t obj_type)
822 {
823         switch(obj_type) {
824         case HAMMER_OBJTYPE_DIRECTORY:
825                 return(VDIR);
826         case HAMMER_OBJTYPE_REGFILE:
827                 return(VREG);
828         case HAMMER_OBJTYPE_DBFILE:
829                 return(VDATABASE);
830         case HAMMER_OBJTYPE_FIFO:
831                 return(VFIFO);
832         case HAMMER_OBJTYPE_SOCKET:
833                 return(VSOCK);
834         case HAMMER_OBJTYPE_CDEV:
835                 return(VCHR);
836         case HAMMER_OBJTYPE_BDEV:
837                 return(VBLK);
838         case HAMMER_OBJTYPE_SOFTLINK:
839                 return(VLNK);
840         default:
841                 return(VBAD);
842         }
843         /* not reached */
844 }
845
846 int
847 hammer_get_dtype(u_int8_t obj_type)
848 {
849         switch(obj_type) {
850         case HAMMER_OBJTYPE_DIRECTORY:
851                 return(DT_DIR);
852         case HAMMER_OBJTYPE_REGFILE:
853                 return(DT_REG);
854         case HAMMER_OBJTYPE_DBFILE:
855                 return(DT_DBF);
856         case HAMMER_OBJTYPE_FIFO:
857                 return(DT_FIFO);
858         case HAMMER_OBJTYPE_SOCKET:
859                 return(DT_SOCK);
860         case HAMMER_OBJTYPE_CDEV:
861                 return(DT_CHR);
862         case HAMMER_OBJTYPE_BDEV:
863                 return(DT_BLK);
864         case HAMMER_OBJTYPE_SOFTLINK:
865                 return(DT_LNK);
866         default:
867                 return(DT_UNKNOWN);
868         }
869         /* not reached */
870 }
871
872 u_int8_t
873 hammer_get_obj_type(enum vtype vtype)
874 {
875         switch(vtype) {
876         case VDIR:
877                 return(HAMMER_OBJTYPE_DIRECTORY);
878         case VREG:
879                 return(HAMMER_OBJTYPE_REGFILE);
880         case VDATABASE:
881                 return(HAMMER_OBJTYPE_DBFILE);
882         case VFIFO:
883                 return(HAMMER_OBJTYPE_FIFO);
884         case VSOCK:
885                 return(HAMMER_OBJTYPE_SOCKET);
886         case VCHR:
887                 return(HAMMER_OBJTYPE_CDEV);
888         case VBLK:
889                 return(HAMMER_OBJTYPE_BDEV);
890         case VLNK:
891                 return(HAMMER_OBJTYPE_SOFTLINK);
892         default:
893                 return(HAMMER_OBJTYPE_UNKNOWN);
894         }
895         /* not reached */
896 }
897
898 /*
899  * Return flags for hammer_delete_at_cursor()
900  */
901 int
902 hammer_nohistory(hammer_inode_t ip)
903 {
904         if (ip->hmp->hflags & HMNT_NOHISTORY)
905                 return(HAMMER_DELETE_DESTROY);
906         if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
907                 return(HAMMER_DELETE_DESTROY);
908         return(0);
909 }
910
911 /*
912  * ALGORITHM VERSION 1:
913  *      Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
914  *      crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
915  *      to generate a unique key and will scan all entries with the same upper
916  *      32 bits when issuing a lookup.
917  *
918  *      0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
919  *
920  * ALGORITHM VERSION 2:
921  *
922  *      The 64 bit hash key is generated from the following components.  The
923  *      first three characters are encoded as 5-bit quantities, the middle
924  *      N characters are hashed into a 6 bit quantity, and the last two
925  *      characters are encoded as 5-bit quantities.  A 32 bit hash of the
926  *      entire filename is encoded in the low 32 bits.  Bit 0 is set to
927  *      0 to guarantee us a 2^24 bit iteration space.
928  *
929  *      0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0
930  *
931  *      This gives us a domain sort for the first three characters, the last
932  *      two characters, and breaks the middle space into 64 random domains.
933  *      The domain sort folds upper case, lower case, digits, and punctuation
934  *      spaces together, the idea being the filenames tend to not be a mix
935  *      of those domains.
936  *
937  *      The 64 random domains act as a sub-sort for the middle characters
938  *      but may cause a random seek.  If the filesystem is being accessed
939  *      in sorted order we should tend to get very good linearity for most
940  *      filenames and devolve into more random seeks otherwise.
941  *
942  * We strip bit 63 in order to provide a positive key, this way a seek
943  * offset of 0 will represent the base of the directory.
944  *
945  * This function can never return 0.  We use the MSB-0 space to synthesize
946  * artificial directory entries such as "." and "..".
947  */
948 int64_t
949 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
950                          u_int32_t *max_iterationsp)
951 {
952         int64_t key;
953         int32_t crcx;
954         const char *aname = name;
955
956         switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
957         case HAMMER_INODE_CAP_DIRHASH_ALG0:
958                 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
959                 if (key == 0)
960                         key |= 0x100000000LL;
961                 *max_iterationsp = 0xFFFFFFFFU;
962                 break;
963         case HAMMER_INODE_CAP_DIRHASH_ALG1:
964                 key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU;
965
966                 switch(len) {
967                 default:
968                         crcx = crc32(aname + 3, len - 5);
969                         crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12);
970                         key |=  (int64_t)(crcx & 0x3F) << 42;
971                         /* fall through */
972                 case 5:
973                 case 4:
974                         /* fall through */
975                 case 3:
976                         key |= ((int64_t)(aname[2] & 0x1F) << 48);
977                         /* fall through */
978                 case 2:
979                         key |= ((int64_t)(aname[1] & 0x1F) << 53) |
980                                ((int64_t)(aname[len-2] & 0x1F) << 37);
981                         /* fall through */
982                 case 1:
983                         key |= ((int64_t)(aname[0] & 0x1F) << 58) |
984                                ((int64_t)(aname[len-1] & 0x1F) << 32);
985                         /* fall through */
986                 case 0:
987                         break;
988                 }
989                 if ((key & 0xFFFFFFFF00000000LL) == 0)
990                         key |= 0x100000000LL;
991                 if (hammer_debug_general & 0x0400) {
992                         kprintf("namekey2: 0x%016llx %*.*s\n",
993                                 (long long)key, len, len, aname);
994                 }
995                 *max_iterationsp = 0x00FFFFFF;
996                 break;
997         case HAMMER_INODE_CAP_DIRHASH_ALG2:
998         case HAMMER_INODE_CAP_DIRHASH_ALG3:
999         default:
1000                 key = 0;                        /* compiler warning */
1001                 *max_iterationsp = 1;           /* sanity */
1002                 panic("hammer_directory_namekey: bad algorithm %p\n", dip);
1003                 break;
1004         }
1005         return(key);
1006 }
1007
1008 /*
1009  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1010  * EINVAL on failure.
1011  *
1012  * If this function fails *ispfs, *tidp, and *localizationp will not
1013  * be modified.
1014  */
1015 int
1016 hammer_str_to_tid(const char *str, int *ispfsp,
1017                   hammer_tid_t *tidp, u_int32_t *localizationp)
1018 {
1019         hammer_tid_t tid;
1020         u_int32_t localization;
1021         char *ptr;
1022         int ispfs;
1023         int n;
1024
1025         /*
1026          * Forms allowed for TID:  "0x%016llx"
1027          *                         "-1"
1028          */
1029         tid = strtouq(str, &ptr, 0);
1030         n = ptr - str;
1031         if (n == 2 && str[0] == '-' && str[1] == '1') {
1032                 /* ok */
1033         } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1034                 /* ok */
1035         } else {
1036                 return(EINVAL);
1037         }
1038
1039         /*
1040          * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1041          */
1042         str = ptr;
1043         if (*str == ':') {
1044                 localization = strtoul(str + 1, &ptr, 10) << 16;
1045                 if (ptr - str != 6)
1046                         return(EINVAL);
1047                 str = ptr;
1048                 ispfs = 1;
1049         } else {
1050                 localization = *localizationp;
1051                 ispfs = 0;
1052         }
1053
1054         /*
1055          * Any trailing junk invalidates special extension handling.
1056          */
1057         if (*str)
1058                 return(EINVAL);
1059         *tidp = tid;
1060         *localizationp = localization;
1061         *ispfsp = ispfs;
1062         return(0);
1063 }
1064
1065 void
1066 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1067 {
1068         blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1069 }
1070
1071 void
1072 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1073 {
1074         ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1075                           crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1076 }
1077
1078 int
1079 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1080 {
1081         hammer_crc_t crc;
1082
1083         crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1084         return (blockmap->entry_crc == crc);
1085 }
1086
1087 int
1088 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1089 {
1090         hammer_crc_t crc;
1091
1092         crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1093               crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1094         return (ondisk->vol_crc == crc);
1095 }
1096
1097 int
1098 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1099 {
1100         hammer_crc_t crc;
1101
1102         crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1103         return (ondisk->crc == crc);
1104 }
1105
1106 /*
1107  * Test or set the leaf->data_crc field.  Deal with any special cases given
1108  * a generic B-Tree leaf element and its data.
1109  *
1110  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1111  *       to be updated in-place.
1112  */
1113 int
1114 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1115 {
1116         hammer_crc_t crc;
1117
1118         if (leaf->data_len == 0) {
1119                 crc = 0;
1120         } else {
1121                 switch(leaf->base.rec_type) {
1122                 case HAMMER_RECTYPE_INODE:
1123                         if (leaf->data_len != sizeof(struct hammer_inode_data))
1124                                 return(0);
1125                         crc = crc32(data, HAMMER_INODE_CRCSIZE);
1126                         break;
1127                 default:
1128                         crc = crc32(data, leaf->data_len);
1129                         break;
1130                 }
1131         }
1132         return (leaf->data_crc == crc);
1133 }
1134
1135 void
1136 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1137 {
1138         if (leaf->data_len == 0) {
1139                 leaf->data_crc = 0;
1140         } else {
1141                 switch(leaf->base.rec_type) {
1142                 case HAMMER_RECTYPE_INODE:
1143                         KKASSERT(leaf->data_len ==
1144                                   sizeof(struct hammer_inode_data));
1145                         leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1146                         break;
1147                 default:
1148                         leaf->data_crc = crc32(data, leaf->data_len);
1149                         break;
1150                 }
1151         }
1152 }
1153
1154 void
1155 hkprintf(const char *ctl, ...)
1156 {
1157         __va_list va;
1158
1159         if (hammer_debug_debug) {
1160                 __va_start(va, ctl);
1161                 kvprintf(ctl, va);
1162                 __va_end(va);
1163         }
1164 }
1165
1166 /*
1167  * Return the block size at the specified file offset.
1168  */
1169 int
1170 hammer_blocksize(int64_t file_offset)
1171 {
1172         if (file_offset < HAMMER_XDEMARC)
1173                 return(HAMMER_BUFSIZE);
1174         else
1175                 return(HAMMER_XBUFSIZE);
1176 }
1177
1178 int
1179 hammer_blockoff(int64_t file_offset)
1180 {
1181         if (file_offset < HAMMER_XDEMARC)
1182                 return((int)file_offset & HAMMER_BUFMASK);
1183         else
1184                 return((int)file_offset & HAMMER_XBUFMASK);
1185 }
1186
1187 /*
1188  * Return the demarkation point between the two offsets where
1189  * the block size changes. 
1190  */
1191 int64_t
1192 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1193 {
1194         if (file_offset1 < HAMMER_XDEMARC) {
1195                 if (file_offset2 <= HAMMER_XDEMARC)
1196                         return(file_offset2);
1197                 return(HAMMER_XDEMARC);
1198         }
1199         panic("hammer_blockdemarc: illegal range %lld %lld\n",
1200               (long long)file_offset1, (long long)file_offset2);
1201 }
1202
1203 udev_t
1204 hammer_fsid_to_udev(uuid_t *uuid)
1205 {
1206         u_int32_t crc;
1207
1208         crc = crc32(uuid, sizeof(*uuid));
1209         return((udev_t)crc);
1210 }
1211