2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
41 #include <sys/dirent.h>
48 * HAMMER2 offers shared locks and exclusive locks on inodes.
50 * An inode's ip->chain pointer is resolved and stable while an inode is
51 * locked, and can be cleaned out at any time (become NULL) when an inode
54 * The underlying chain is also locked and returned.
56 * NOTE: We don't combine the inode/chain lock because putting away an
57 * inode would otherwise confuse multiple lock holders of the inode.
60 hammer2_inode_lock_ex(hammer2_inode_t *ip)
62 hammer2_chain_t *chain;
64 hammer2_inode_ref(ip);
65 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
68 * ip->chain fixup. Certain duplications used to move inodes
69 * into indirect blocks (for example) can cause ip->chain to
76 while (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED))
77 chain = chain->duplink;
78 if (chain != ip->chain) {
79 hammer2_chain_ref(chain);
80 hammer2_chain_drop(ip->chain);
84 KKASSERT(chain != NULL); /* for now */
85 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
88 * Resolve duplication races
90 if (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) {
91 hammer2_chain_unlock(chain);
97 hammer2_inode_unlock_ex(hammer2_inode_t *ip)
99 hammer2_chain_t *chain;
102 * XXX this will catch parent directories too which we don't
107 if (chain->flags & (HAMMER2_CHAIN_MODIFIED |
108 HAMMER2_CHAIN_SUBMODIFIED)) {
109 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
111 hammer2_chain_unlock(chain);
113 ccms_thread_unlock(&ip->topo_cst);
114 hammer2_inode_drop(ip);
118 * NOTE: We don't combine the inode/chain lock because putting away an
119 * inode would otherwise confuse multiple lock holders of the inode.
121 * Shared locks are especially sensitive to having too many shared
122 * lock counts (from the same thread) on certain paths which might
123 * need to upgrade them. Only one count of a shared lock can be
127 hammer2_inode_lock_sh(hammer2_inode_t *ip)
129 hammer2_chain_t *chain;
131 hammer2_inode_ref(ip);
133 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
136 KKASSERT(chain != NULL); /* for now */
137 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
138 HAMMER2_RESOLVE_SHARED);
141 * Resolve duplication races
143 if (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) {
144 hammer2_chain_unlock(chain);
145 ccms_thread_unlock(&ip->topo_cst);
146 hammer2_inode_lock_ex(ip);
147 hammer2_inode_unlock_ex(ip);
154 hammer2_inode_unlock_sh(hammer2_inode_t *ip)
157 hammer2_chain_unlock(ip->chain);
158 ccms_thread_unlock(&ip->topo_cst);
159 hammer2_inode_drop(ip);
163 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
165 return(ccms_thread_lock_temp_release(&ip->topo_cst));
169 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate)
171 ccms_thread_lock_temp_restore(&ip->topo_cst, ostate);
175 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
177 return(ccms_thread_lock_upgrade(&ip->topo_cst));
181 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate)
183 ccms_thread_lock_downgrade(&ip->topo_cst, ostate);
191 hammer2_mount_exlock(hammer2_mount_t *hmp)
193 ccms_thread_lock(&hmp->vchain.core->cst, CCMS_STATE_EXCLUSIVE);
197 hammer2_mount_shlock(hammer2_mount_t *hmp)
199 ccms_thread_lock(&hmp->vchain.core->cst, CCMS_STATE_SHARED);
203 hammer2_mount_unlock(hammer2_mount_t *hmp)
205 ccms_thread_unlock(&hmp->vchain.core->cst);
209 hammer2_voldata_lock(hammer2_mount_t *hmp)
211 lockmgr(&hmp->voldatalk, LK_EXCLUSIVE);
215 hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify)
218 (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) {
219 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED);
220 hammer2_chain_ref(&hmp->vchain);
222 lockmgr(&hmp->voldatalk, LK_RELEASE);
226 * Return the directory entry type for an inode.
228 * ip must be locked sh/ex.
231 hammer2_get_dtype(hammer2_chain_t *chain)
235 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
237 if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
238 type = chain->data->ipdata.target_type;
241 case HAMMER2_OBJTYPE_UNKNOWN:
243 case HAMMER2_OBJTYPE_DIRECTORY:
245 case HAMMER2_OBJTYPE_REGFILE:
247 case HAMMER2_OBJTYPE_FIFO:
249 case HAMMER2_OBJTYPE_CDEV: /* not supported */
251 case HAMMER2_OBJTYPE_BDEV: /* not supported */
253 case HAMMER2_OBJTYPE_SOFTLINK:
255 case HAMMER2_OBJTYPE_HARDLINK: /* (never directly associated w/vp) */
257 case HAMMER2_OBJTYPE_SOCKET:
259 case HAMMER2_OBJTYPE_WHITEOUT: /* not supported */
268 * Return the directory entry type for an inode
271 hammer2_get_vtype(hammer2_chain_t *chain)
273 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
275 switch(chain->data->ipdata.type) {
276 case HAMMER2_OBJTYPE_UNKNOWN:
278 case HAMMER2_OBJTYPE_DIRECTORY:
280 case HAMMER2_OBJTYPE_REGFILE:
282 case HAMMER2_OBJTYPE_FIFO:
284 case HAMMER2_OBJTYPE_CDEV: /* not supported */
286 case HAMMER2_OBJTYPE_BDEV: /* not supported */
288 case HAMMER2_OBJTYPE_SOFTLINK:
290 case HAMMER2_OBJTYPE_HARDLINK: /* XXX */
292 case HAMMER2_OBJTYPE_SOCKET:
294 case HAMMER2_OBJTYPE_WHITEOUT: /* not supported */
303 hammer2_get_obj_type(enum vtype vtype)
307 return(HAMMER2_OBJTYPE_DIRECTORY);
309 return(HAMMER2_OBJTYPE_REGFILE);
311 return(HAMMER2_OBJTYPE_FIFO);
313 return(HAMMER2_OBJTYPE_SOCKET);
315 return(HAMMER2_OBJTYPE_CDEV);
317 return(HAMMER2_OBJTYPE_BDEV);
319 return(HAMMER2_OBJTYPE_SOFTLINK);
321 return(HAMMER2_OBJTYPE_UNKNOWN);
327 * Convert a hammer2 64-bit time to a timespec.
330 hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts)
332 ts->tv_sec = (unsigned long)(xtime / 1000000);
333 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
337 hammer2_timespec_to_time(struct timespec *ts)
341 xtime = (unsigned)(ts->tv_nsec / 1000) +
342 (unsigned long)ts->tv_sec * 1000000ULL;
347 * Convert a uuid to a unix uid or gid
350 hammer2_to_unix_xid(uuid_t *uuid)
352 return(*(u_int32_t *)&uuid->node[2]);
356 hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
358 bzero(uuid, sizeof(*uuid));
359 *(u_int32_t *)&uuid->node[2] = guid;
363 * Borrow HAMMER1's directory hash algorithm #1 with a few modifications.
364 * The filename is split into fields which are hashed separately and then
367 * Differences include: bit 63 must be set to 1 for HAMMER2 (HAMMER1 sets
368 * it to 0), this is because bit63=0 is used for hidden hardlinked inodes.
369 * (This means we do not need to do a 0-check/or-with-0x100000000 either).
371 * Also, the iscsi crc code is used instead of the old crc32 code.
374 hammer2_dirhash(const unsigned char *name, size_t len)
376 const unsigned char *aname = name;
388 for (i = j = 0; i < len; ++i) {
389 if (aname[i] == '.' ||
394 crcx += hammer2_icrc32(aname + j, i - j);
399 crcx += hammer2_icrc32(aname + j, i - j);
402 * The directory hash utilizes the top 32 bits of the 64-bit key.
403 * Bit 63 must be set to 1.
406 key |= (uint64_t)crcx << 32;
409 * l16 - crc of entire filename
411 * This crc reduces degenerate hash collision conditions
413 crcx = hammer2_icrc32(aname, len);
414 crcx = crcx ^ (crcx << 16);
415 key |= crcx & 0xFFFF0000U;
418 * Set bit 15. This allows readdir to strip bit 63 so a positive
419 * 64-bit cookie/offset can always be returned, and still guarantee
420 * that the values 0x0000-0x7FFF are available for artificial entries.
429 * Return the power-of-2 radix greater or equal to
430 * the specified number of bytes.
432 * Always returns at least the minimum media allocation
433 * size radix, HAMMER2_MIN_RADIX (10), which is 1KB.
436 hammer2_allocsize(size_t bytes)
440 if (bytes < HAMMER2_MIN_ALLOC)
441 bytes = HAMMER2_MIN_ALLOC;
442 if (bytes == HAMMER2_PBUFSIZE)
443 radix = HAMMER2_PBUFRADIX;
444 else if (bytes >= 16384)
446 else if (bytes >= 1024)
449 radix = HAMMER2_MIN_RADIX;
451 while (((size_t)1 << radix) < bytes)
457 * ip must be locked sh/ex
460 hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
461 hammer2_key_t *lbasep, hammer2_key_t *leofp)
463 hammer2_inode_data_t *ipdata = &ip->chain->data->ipdata;
466 *lbasep = uoff & ~HAMMER2_PBUFMASK64;
467 *leofp = ipdata->size & ~HAMMER2_PBUFMASK64;
468 KKASSERT(*lbasep <= *leofp);
469 if (*lbasep == *leofp /*&& *leofp < 1024 * 1024*/) {
470 radix = hammer2_allocsize((size_t)(ipdata->size - *leofp));
471 if (radix < HAMMER2_MINALLOCRADIX)
472 radix = HAMMER2_MINALLOCRADIX;
473 *leofp += 1U << radix;
474 return (1U << radix);
476 return (HAMMER2_PBUFSIZE);
481 hammer2_update_time(uint64_t *timep)
486 *timep = (unsigned long)tv.tv_sec * 1000000 + tv.tv_usec;