2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
37 * HAMMER structural locking
41 #include <sys/dirent.h>
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
46 thread_t td = curthread;
50 KKASSERT(lock->refs > 0);
55 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
56 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
60 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && lock->owner == td) {
62 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
65 if (hammer_debug_locks) {
66 kprintf("hammer_lock_ex: held by %p\n",
69 nlv = lv | HAMMER_LOCKF_WANTED;
70 ++hammer_contention_count;
72 tsleep_interlock(lock);
73 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
74 tsleep(lock, 0, ident, 0);
75 if (hammer_debug_locks)
76 kprintf("hammer_lock_ex: try again\n");
84 * Try to obtain an exclusive lock
87 hammer_lock_ex_try(struct hammer_lock *lock)
89 thread_t td = curthread;
94 KKASSERT(lock->refs > 0);
99 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
100 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
105 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && lock->owner == td) {
107 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
120 * Obtain a shared lock
122 * We do not give pending exclusive locks priority over shared locks as
123 * doing so could lead to a deadlock.
126 hammer_lock_sh(struct hammer_lock *lock)
128 thread_t td = curthread;
132 KKASSERT(lock->refs > 0);
136 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
138 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
140 } else if (lock->owner == td) {
142 * Disallowed case, drop into kernel debugger for
143 * now. A cont continues w/ an exclusive lock.
146 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
147 Debugger("hammer_lock_sh: already hold ex");
151 nlv = lv | HAMMER_LOCKF_WANTED;
152 ++hammer_contention_count;
154 tsleep_interlock(lock);
155 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
156 tsleep(lock, 0, "hmrlck", 0);
164 hammer_lock_sh_try(struct hammer_lock *lock)
166 thread_t td = curthread;
171 KKASSERT(lock->refs > 0);
175 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
177 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
181 } else if (lock->owner == td) {
183 * Disallowed case, drop into kernel debugger for
184 * now. A cont continues w/ an exclusive lock.
187 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
188 Debugger("hammer_lock_sh: already hold ex");
201 * Upgrade a shared lock to an exclusively held lock. This function will
202 * return EDEADLK If there is more then one shared holder.
204 * No error occurs and no action is taken if the lock is already exclusively
205 * held by the caller. If the lock is not held at all or held exclusively
206 * by someone else, this function will panic.
209 hammer_lock_upgrade(struct hammer_lock *lock)
211 thread_t td = curthread;
219 if ((lv & ~HAMMER_LOCKF_WANTED) == 1) {
220 nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
221 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
226 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
227 if (lock->owner != curthread)
228 panic("hammer_lock_upgrade: illegal state");
231 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
232 panic("hammer_lock_upgrade: lock is not held");
245 * Downgrade an exclusively held lock to a shared lock.
248 hammer_lock_downgrade(struct hammer_lock *lock)
250 thread_t td = curthread;
254 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
255 (HAMMER_LOCKF_EXCLUSIVE | 1));
256 KKASSERT(lock->owner == td);
259 * NOTE: Must clear owner before releasing exclusivity
265 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
266 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
267 if (lv & HAMMER_LOCKF_WANTED)
275 hammer_unlock(struct hammer_lock *lock)
277 thread_t td = curthread;
283 if (lv & HAMMER_LOCKF_EXCLUSIVE)
284 KKASSERT(lock->owner == td);
288 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
291 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
293 } else if (nlv == 1) {
295 if (lv & HAMMER_LOCKF_EXCLUSIVE)
297 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
298 if (lv & HAMMER_LOCKF_WANTED)
303 panic("hammer_unlock: lock %p is not held", lock);
309 * The calling thread must be holding a shared or exclusive lock.
310 * Returns < 0 if lock is held shared, and > 0 if held exlusively.
313 hammer_lock_status(struct hammer_lock *lock)
315 u_int lv = lock->lockval;
317 if (lv & HAMMER_LOCKF_EXCLUSIVE)
321 panic("hammer_lock_status: lock must be held: %p", lock);
325 hammer_ref(struct hammer_lock *lock)
327 KKASSERT(lock->refs >= 0);
328 atomic_add_int(&lock->refs, 1);
332 hammer_unref(struct hammer_lock *lock)
334 KKASSERT(lock->refs > 0);
335 atomic_subtract_int(&lock->refs, 1);
339 * The sync_lock must be held when doing any modifying operations on
340 * meta-data. It does not have to be held when modifying non-meta-data buffers
341 * (backend or frontend).
343 * The flusher holds the lock exclusively while all other consumers hold it
344 * shared. All modifying operations made while holding the lock are atomic
345 * in that they will be made part of the same flush group.
347 * Due to the atomicy requirement deadlock recovery code CANNOT release the
348 * sync lock, nor can we give pending exclusive sync locks priority over
349 * a shared sync lock as this could lead to a 3-way deadlock.
352 hammer_sync_lock_ex(hammer_transaction_t trans)
354 ++trans->sync_lock_refs;
355 hammer_lock_ex(&trans->hmp->sync_lock);
359 hammer_sync_lock_sh(hammer_transaction_t trans)
361 ++trans->sync_lock_refs;
362 hammer_lock_sh(&trans->hmp->sync_lock);
366 hammer_sync_lock_sh_try(hammer_transaction_t trans)
370 ++trans->sync_lock_refs;
371 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
372 --trans->sync_lock_refs;
377 hammer_sync_unlock(hammer_transaction_t trans)
379 --trans->sync_lock_refs;
380 hammer_unlock(&trans->hmp->sync_lock);
387 hammer_to_unix_xid(uuid_t *uuid)
389 return(*(u_int32_t *)&uuid->node[2]);
393 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
395 bzero(uuid, sizeof(*uuid));
396 *(u_int32_t *)&uuid->node[2] = guid;
400 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
402 ts->tv_sec = (unsigned long)(xtime / 1000000);
403 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
407 hammer_timespec_to_time(struct timespec *ts)
411 xtime = (unsigned)(ts->tv_nsec / 1000) +
412 (unsigned long)ts->tv_sec * 1000000ULL;
418 * Convert a HAMMER filesystem object type to a vnode type
421 hammer_get_vnode_type(u_int8_t obj_type)
424 case HAMMER_OBJTYPE_DIRECTORY:
426 case HAMMER_OBJTYPE_REGFILE:
428 case HAMMER_OBJTYPE_DBFILE:
430 case HAMMER_OBJTYPE_FIFO:
432 case HAMMER_OBJTYPE_SOCKET:
434 case HAMMER_OBJTYPE_CDEV:
436 case HAMMER_OBJTYPE_BDEV:
438 case HAMMER_OBJTYPE_SOFTLINK:
447 hammer_get_dtype(u_int8_t obj_type)
450 case HAMMER_OBJTYPE_DIRECTORY:
452 case HAMMER_OBJTYPE_REGFILE:
454 case HAMMER_OBJTYPE_DBFILE:
456 case HAMMER_OBJTYPE_FIFO:
458 case HAMMER_OBJTYPE_SOCKET:
460 case HAMMER_OBJTYPE_CDEV:
462 case HAMMER_OBJTYPE_BDEV:
464 case HAMMER_OBJTYPE_SOFTLINK:
473 hammer_get_obj_type(enum vtype vtype)
477 return(HAMMER_OBJTYPE_DIRECTORY);
479 return(HAMMER_OBJTYPE_REGFILE);
481 return(HAMMER_OBJTYPE_DBFILE);
483 return(HAMMER_OBJTYPE_FIFO);
485 return(HAMMER_OBJTYPE_SOCKET);
487 return(HAMMER_OBJTYPE_CDEV);
489 return(HAMMER_OBJTYPE_BDEV);
491 return(HAMMER_OBJTYPE_SOFTLINK);
493 return(HAMMER_OBJTYPE_UNKNOWN);
499 * Return flags for hammer_delete_at_cursor()
502 hammer_nohistory(hammer_inode_t ip)
504 if (ip->hmp->hflags & HMNT_NOHISTORY)
505 return(HAMMER_DELETE_DESTROY);
506 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
507 return(HAMMER_DELETE_DESTROY);
512 * ALGORITHM VERSION 1:
513 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit
514 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits
515 * to generate a unique key and will scan all entries with the same upper
516 * 32 bits when issuing a lookup.
518 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
520 * ALGORITHM VERSION 2:
522 * The 64 bit hash key is generated from the following components. The
523 * first three characters are encoded as 5-bit quantities, the middle
524 * N characters are hashed into a 6 bit quantity, and the last two
525 * characters are encoded as 5-bit quantities. A 32 bit hash of the
526 * entire filename is encoded in the low 32 bits. Bit 0 is set to
527 * 0 to guarantee us a 2^24 bit iteration space.
529 * 0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0
531 * This gives us a domain sort for the first three characters, the last
532 * two characters, and breaks the middle space into 64 random domains.
533 * The domain sort folds upper case, lower case, digits, and punctuation
534 * spaces together, the idea being the filenames tend to not be a mix
537 * The 64 random domains act as a sub-sort for the middle characters
538 * but may cause a random seek. If the filesystem is being accessed
539 * in sorted order we should tend to get very good linearity for most
540 * filenames and devolve into more random seeks otherwise.
542 * We strip bit 63 in order to provide a positive key, this way a seek
543 * offset of 0 will represent the base of the directory.
545 * This function can never return 0. We use the MSB-0 space to synthesize
546 * artificial directory entries such as "." and "..".
549 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
550 u_int32_t *max_iterationsp)
554 const char *aname = name;
556 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
557 case HAMMER_INODE_CAP_DIRHASH_ALG0:
558 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
560 key |= 0x100000000LL;
561 *max_iterationsp = 0xFFFFFFFFU;
563 case HAMMER_INODE_CAP_DIRHASH_ALG1:
564 key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU;
568 crcx = crc32(aname + 3, len - 5);
569 crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12);
570 key |= (int64_t)(crcx & 0x3F) << 42;
576 key |= ((int64_t)(aname[2] & 0x1F) << 48);
579 key |= ((int64_t)(aname[1] & 0x1F) << 53) |
580 ((int64_t)(aname[len-2] & 0x1F) << 37);
583 key |= ((int64_t)(aname[0] & 0x1F) << 58) |
584 ((int64_t)(aname[len-1] & 0x1F) << 32);
589 if ((key & 0xFFFFFFFF00000000LL) == 0)
590 key |= 0x100000000LL;
591 if (hammer_debug_general & 0x0400) {
592 kprintf("namekey2: 0x%016llx %*.*s\n",
593 (long long)key, len, len, aname);
595 *max_iterationsp = 0x00FFFFFF;
597 case HAMMER_INODE_CAP_DIRHASH_ALG2:
598 case HAMMER_INODE_CAP_DIRHASH_ALG3:
600 key = 0; /* compiler warning */
601 *max_iterationsp = 1; /* sanity */
602 panic("hammer_directory_namekey: bad algorithm %p\n", dip);
609 * Convert string after @@ (@@ not included) to TID. Returns 0 on success,
612 * If this function fails *ispfs, *tidp, and *localizationp will not
616 hammer_str_to_tid(const char *str, int *ispfsp,
617 hammer_tid_t *tidp, u_int32_t *localizationp)
620 u_int32_t localization;
626 * Forms allowed for TID: "0x%016llx"
629 tid = strtouq(str, &ptr, 0);
631 if (n == 2 && str[0] == '-' && str[1] == '1') {
633 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
640 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal).
644 localization = strtoul(str + 1, &ptr, 10) << 16;
650 localization = *localizationp;
655 * Any trailing junk invalidates special extension handling.
660 *localizationp = localization;
666 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
668 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
672 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
674 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
675 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
679 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
683 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
684 return (blockmap->entry_crc == crc);
688 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
692 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
693 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
694 return (ondisk->vol_crc == crc);
698 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
702 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
703 return (ondisk->crc == crc);
707 * Test or set the leaf->data_crc field. Deal with any special cases given
708 * a generic B-Tree leaf element and its data.
710 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
711 * to be updated in-place.
714 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
718 if (leaf->data_len == 0) {
721 switch(leaf->base.rec_type) {
722 case HAMMER_RECTYPE_INODE:
723 if (leaf->data_len != sizeof(struct hammer_inode_data))
725 crc = crc32(data, HAMMER_INODE_CRCSIZE);
728 crc = crc32(data, leaf->data_len);
732 return (leaf->data_crc == crc);
736 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
738 if (leaf->data_len == 0) {
741 switch(leaf->base.rec_type) {
742 case HAMMER_RECTYPE_INODE:
743 KKASSERT(leaf->data_len ==
744 sizeof(struct hammer_inode_data));
745 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
748 leaf->data_crc = crc32(data, leaf->data_len);
755 hkprintf(const char *ctl, ...)
759 if (hammer_debug_debug) {
767 * Return the block size at the specified file offset.
770 hammer_blocksize(int64_t file_offset)
772 if (file_offset < HAMMER_XDEMARC)
773 return(HAMMER_BUFSIZE);
775 return(HAMMER_XBUFSIZE);
779 * Return the demarkation point between the two offsets where
780 * the block size changes.
783 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
785 if (file_offset1 < HAMMER_XDEMARC) {
786 if (file_offset2 <= HAMMER_XDEMARC)
787 return(file_offset2);
788 return(HAMMER_XDEMARC);
790 panic("hammer_blockdemarc: illegal range %lld %lld\n",
791 (long long)file_offset1, (long long)file_offset2);
795 hammer_fsid_to_udev(uuid_t *uuid)
799 crc = crc32(uuid, sizeof(*uuid));