Merge branch 'vendor/GDTOA'
[dragonfly.git] / sys / vfs / hammer / hammer_subs.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
35  */
36 /*
37  * HAMMER structural locking
38  */
39
40 #include "hammer.h"
41 #include <sys/dirent.h>
42
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
45 {
46         thread_t td = curthread;
47
48         KKASSERT(lock->refs > 0);
49         crit_enter();
50         if (lock->locktd != td) {
51                 while (lock->locktd != NULL || lock->lockcount) {
52                         ++lock->exwanted;
53                         lock->wanted = 1;
54                         if (hammer_debug_locks) {
55                                 kprintf("hammer_lock_ex: held by %p\n",
56                                         lock->locktd);
57                         }
58                         ++hammer_contention_count;
59                         tsleep(lock, 0, ident, 0);
60                         if (hammer_debug_locks)
61                                 kprintf("hammer_lock_ex: try again\n");
62                         --lock->exwanted;
63                 }
64                 lock->locktd = td;
65         }
66         KKASSERT(lock->lockcount >= 0);
67         ++lock->lockcount;
68         crit_exit();
69 }
70
71 /*
72  * Try to obtain an exclusive lock
73  */
74 int
75 hammer_lock_ex_try(struct hammer_lock *lock)
76 {
77         thread_t td = curthread;
78
79         KKASSERT(lock->refs > 0);
80         crit_enter();
81         if (lock->locktd != td) {
82                 if (lock->locktd != NULL || lock->lockcount) {
83                         crit_exit();
84                         return(EAGAIN);
85                 }
86                 lock->locktd = td;
87         }
88         KKASSERT(lock->lockcount >= 0);
89         ++lock->lockcount;
90         crit_exit();
91         return(0);
92 }
93
94 /*
95  * Obtain a shared lock
96  *
97  * We do not give pending exclusive locks priority over shared locks as
98  * doing so could lead to a deadlock.
99  */
100 void
101 hammer_lock_sh(struct hammer_lock *lock)
102 {
103         KKASSERT(lock->refs > 0);
104         crit_enter();
105         while (lock->locktd != NULL) {
106                 if (lock->locktd == curthread) {
107                         Debugger("hammer_lock_sh: lock_sh on exclusive");
108                         ++lock->lockcount;
109                         crit_exit();
110                         return;
111                 }
112                 lock->wanted = 1;
113                 tsleep(lock, 0, "hmrlck", 0);
114         }
115         KKASSERT(lock->lockcount <= 0);
116         --lock->lockcount;
117         crit_exit();
118 }
119
120 int
121 hammer_lock_sh_try(struct hammer_lock *lock)
122 {
123         KKASSERT(lock->refs > 0);
124         crit_enter();
125         if (lock->locktd) {
126                 crit_exit();
127                 return(EAGAIN);
128         }
129         KKASSERT(lock->lockcount <= 0);
130         --lock->lockcount;
131         crit_exit();
132         return(0);
133 }
134
135 /*
136  * Upgrade a shared lock to an exclusively held lock.  This function will
137  * return EDEADLK If there is more then one shared holder.
138  *
139  * No error occurs and no action is taken if the lock is already exclusively
140  * held by the caller.  If the lock is not held at all or held exclusively
141  * by someone else, this function will panic.
142  */
143 int
144 hammer_lock_upgrade(struct hammer_lock *lock)
145 {
146         int error;
147
148         crit_enter();
149         if (lock->lockcount > 0) {
150                 if (lock->locktd != curthread)
151                         panic("hammer_lock_upgrade: illegal lock state");
152                 error = 0;
153         } else if (lock->lockcount == -1) {
154                 lock->lockcount = 1;
155                 lock->locktd = curthread;
156                 error = 0;
157         } else if (lock->lockcount != 0) {
158                 error = EDEADLK;
159         } else {
160                 panic("hammer_lock_upgrade: lock is not held");
161                 /* NOT REACHED */
162                 error = 0;
163         }
164         crit_exit();
165         return(error);
166 }
167
168 /*
169  * Downgrade an exclusively held lock to a shared lock.
170  */
171 void
172 hammer_lock_downgrade(struct hammer_lock *lock)
173 {
174         KKASSERT(lock->lockcount == 1 && lock->locktd == curthread);
175         crit_enter();
176         lock->lockcount = -1;
177         lock->locktd = NULL;
178         if (lock->wanted) {
179                 lock->wanted = 0;
180                 wakeup(lock);
181         }
182         crit_exit();
183         /* XXX memory barrier */
184 }
185
186 void
187 hammer_unlock(struct hammer_lock *lock)
188 {
189         crit_enter();
190         KKASSERT(lock->lockcount != 0);
191         if (lock->lockcount < 0) {
192                 if (++lock->lockcount == 0 && lock->wanted) {
193                         lock->wanted = 0;
194                         wakeup(lock);
195                 }
196         } else {
197                 KKASSERT(lock->locktd == curthread);
198                 if (--lock->lockcount == 0) {
199                         lock->locktd = NULL;
200                         if (lock->wanted) {
201                                 lock->wanted = 0;
202                                 wakeup(lock);
203                         }
204                 }
205
206         }
207         crit_exit();
208 }
209
210 /*
211  * The calling thread must be holding a shared or exclusive lock.
212  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
213  */
214 int
215 hammer_lock_status(struct hammer_lock *lock)
216 {
217         if (lock->lockcount < 0)
218                 return(-1);
219         if (lock->lockcount > 0)
220                 return(1);
221         panic("hammer_lock_status: lock must be held: %p", lock);
222 }
223
224 void
225 hammer_ref(struct hammer_lock *lock)
226 {
227         KKASSERT(lock->refs >= 0);
228         crit_enter();
229         ++lock->refs;
230         crit_exit();
231 }
232
233 void
234 hammer_unref(struct hammer_lock *lock)
235 {
236         KKASSERT(lock->refs > 0);
237         crit_enter();
238         --lock->refs;
239         crit_exit();
240 }
241
242 /*
243  * The sync_lock must be held when doing any modifying operations on
244  * meta-data.  It does not have to be held when modifying non-meta-data buffers
245  * (backend or frontend).
246  *
247  * The flusher holds the lock exclusively while all other consumers hold it
248  * shared.  All modifying operations made while holding the lock are atomic
249  * in that they will be made part of the same flush group.
250  *
251  * Due to the atomicy requirement deadlock recovery code CANNOT release the
252  * sync lock, nor can we give pending exclusive sync locks priority over
253  * a shared sync lock as this could lead to a 3-way deadlock.
254  */
255 void
256 hammer_sync_lock_ex(hammer_transaction_t trans)
257 {
258         ++trans->sync_lock_refs;
259         hammer_lock_ex(&trans->hmp->sync_lock);
260 }
261
262 void
263 hammer_sync_lock_sh(hammer_transaction_t trans)
264 {
265         ++trans->sync_lock_refs;
266         hammer_lock_sh(&trans->hmp->sync_lock);
267 }
268
269 int
270 hammer_sync_lock_sh_try(hammer_transaction_t trans)
271 {
272         int error;
273
274         ++trans->sync_lock_refs;
275         if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
276                 --trans->sync_lock_refs;
277         return (error);
278 }
279
280 void
281 hammer_sync_unlock(hammer_transaction_t trans)
282 {
283         --trans->sync_lock_refs;
284         hammer_unlock(&trans->hmp->sync_lock);
285 }
286
287 /*
288  * Misc
289  */
290 u_int32_t
291 hammer_to_unix_xid(uuid_t *uuid)
292 {
293         return(*(u_int32_t *)&uuid->node[2]);
294 }
295
296 void
297 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
298 {
299         bzero(uuid, sizeof(*uuid));
300         *(u_int32_t *)&uuid->node[2] = guid;
301 }
302
303 void
304 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
305 {
306         ts->tv_sec = (unsigned long)(xtime / 1000000);
307         ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
308 }
309
310 u_int64_t
311 hammer_timespec_to_time(struct timespec *ts)
312 {
313         u_int64_t xtime;
314
315         xtime = (unsigned)(ts->tv_nsec / 1000) +
316                 (unsigned long)ts->tv_sec * 1000000ULL;
317         return(xtime);
318 }
319
320
321 /*
322  * Convert a HAMMER filesystem object type to a vnode type
323  */
324 enum vtype
325 hammer_get_vnode_type(u_int8_t obj_type)
326 {
327         switch(obj_type) {
328         case HAMMER_OBJTYPE_DIRECTORY:
329                 return(VDIR);
330         case HAMMER_OBJTYPE_REGFILE:
331                 return(VREG);
332         case HAMMER_OBJTYPE_DBFILE:
333                 return(VDATABASE);
334         case HAMMER_OBJTYPE_FIFO:
335                 return(VFIFO);
336         case HAMMER_OBJTYPE_SOCKET:
337                 return(VSOCK);
338         case HAMMER_OBJTYPE_CDEV:
339                 return(VCHR);
340         case HAMMER_OBJTYPE_BDEV:
341                 return(VBLK);
342         case HAMMER_OBJTYPE_SOFTLINK:
343                 return(VLNK);
344         default:
345                 return(VBAD);
346         }
347         /* not reached */
348 }
349
350 int
351 hammer_get_dtype(u_int8_t obj_type)
352 {
353         switch(obj_type) {
354         case HAMMER_OBJTYPE_DIRECTORY:
355                 return(DT_DIR);
356         case HAMMER_OBJTYPE_REGFILE:
357                 return(DT_REG);
358         case HAMMER_OBJTYPE_DBFILE:
359                 return(DT_DBF);
360         case HAMMER_OBJTYPE_FIFO:
361                 return(DT_FIFO);
362         case HAMMER_OBJTYPE_SOCKET:
363                 return(DT_SOCK);
364         case HAMMER_OBJTYPE_CDEV:
365                 return(DT_CHR);
366         case HAMMER_OBJTYPE_BDEV:
367                 return(DT_BLK);
368         case HAMMER_OBJTYPE_SOFTLINK:
369                 return(DT_LNK);
370         default:
371                 return(DT_UNKNOWN);
372         }
373         /* not reached */
374 }
375
376 u_int8_t
377 hammer_get_obj_type(enum vtype vtype)
378 {
379         switch(vtype) {
380         case VDIR:
381                 return(HAMMER_OBJTYPE_DIRECTORY);
382         case VREG:
383                 return(HAMMER_OBJTYPE_REGFILE);
384         case VDATABASE:
385                 return(HAMMER_OBJTYPE_DBFILE);
386         case VFIFO:
387                 return(HAMMER_OBJTYPE_FIFO);
388         case VSOCK:
389                 return(HAMMER_OBJTYPE_SOCKET);
390         case VCHR:
391                 return(HAMMER_OBJTYPE_CDEV);
392         case VBLK:
393                 return(HAMMER_OBJTYPE_BDEV);
394         case VLNK:
395                 return(HAMMER_OBJTYPE_SOFTLINK);
396         default:
397                 return(HAMMER_OBJTYPE_UNKNOWN);
398         }
399         /* not reached */
400 }
401
402 /*
403  * Return flags for hammer_delete_at_cursor()
404  */
405 int
406 hammer_nohistory(hammer_inode_t ip)
407 {
408         if (ip->hmp->hflags & HMNT_NOHISTORY)
409                 return(HAMMER_DELETE_DESTROY);
410         if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
411                 return(HAMMER_DELETE_DESTROY);
412         return(0);
413 }
414
415 /*
416  * ALGORITHM VERSION 1:
417  *      Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
418  *      crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
419  *      to generate a unique key and will scan all entries with the same upper
420  *      32 bits when issuing a lookup.
421  *
422  *      0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
423  *
424  * ALGORITHM VERSION 2:
425  *
426  *      The 64 bit hash key is generated from the following components.  The
427  *      first three characters are encoded as 5-bit quantities, the middle
428  *      N characters are hashed into a 6 bit quantity, and the last two
429  *      characters are encoded as 5-bit quantities.  A 32 bit hash of the
430  *      entire filename is encoded in the low 32 bits.  Bit 0 is set to
431  *      0 to guarantee us a 2^24 bit iteration space.
432  *
433  *      0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0
434  *
435  *      This gives us a domain sort for the first three characters, the last
436  *      two characters, and breaks the middle space into 64 random domains.
437  *      The domain sort folds upper case, lower case, digits, and punctuation
438  *      spaces together, the idea being the filenames tend to not be a mix
439  *      of those domains.
440  *
441  *      The 64 random domains act as a sub-sort for the middle characters
442  *      but may cause a random seek.  If the filesystem is being accessed
443  *      in sorted order we should tend to get very good linearity for most
444  *      filenames and devolve into more random seeks otherwise.
445  *
446  * We strip bit 63 in order to provide a positive key, this way a seek
447  * offset of 0 will represent the base of the directory.
448  *
449  * This function can never return 0.  We use the MSB-0 space to synthesize
450  * artificial directory entries such as "." and "..".
451  */
452 int64_t
453 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
454                          u_int32_t *max_iterationsp)
455 {
456         int64_t key;
457         int32_t crcx;
458         const char *aname = name;
459
460         switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
461         case HAMMER_INODE_CAP_DIRHASH_ALG0:
462                 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
463                 if (key == 0)
464                         key |= 0x100000000LL;
465                 *max_iterationsp = 0xFFFFFFFFU;
466                 break;
467         case HAMMER_INODE_CAP_DIRHASH_ALG1:
468                 key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU;
469
470                 switch(len) {
471                 default:
472                         crcx = crc32(aname + 3, len - 5);
473                         crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12);
474                         key |=  (int64_t)(crcx & 0x3F) << 42;
475                         /* fall through */
476                 case 5:
477                 case 4:
478                         /* fall through */
479                 case 3:
480                         key |= ((int64_t)(aname[2] & 0x1F) << 48);
481                         /* fall through */
482                 case 2:
483                         key |= ((int64_t)(aname[1] & 0x1F) << 53) |
484                                ((int64_t)(aname[len-2] & 0x1F) << 37);
485                         /* fall through */
486                 case 1:
487                         key |= ((int64_t)(aname[0] & 0x1F) << 58) |
488                                ((int64_t)(aname[len-1] & 0x1F) << 32);
489                         /* fall through */
490                 case 0:
491                         break;
492                 }
493                 if ((key & 0xFFFFFFFF00000000LL) == 0)
494                         key |= 0x100000000LL;
495                 if (hammer_debug_general & 0x0400) {
496                         kprintf("namekey2: 0x%016llx %*.*s\n",
497                                 key, len, len, aname);
498                 }
499                 *max_iterationsp = 0x00FFFFFF;
500                 break;
501         case HAMMER_INODE_CAP_DIRHASH_ALG2:
502         case HAMMER_INODE_CAP_DIRHASH_ALG3:
503         default:
504                 key = 0;                        /* compiler warning */
505                 *max_iterationsp = 1;           /* sanity */
506                 panic("hammer_directory_namekey: bad algorithm %p\n", dip);
507                 break;
508         }
509         return(key);
510 }
511
512 /*
513  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
514  * EINVAL on failure.
515  *
516  * If this function fails *ispfs, *tidp, and *localizationp will not
517  * be modified.
518  */
519 int
520 hammer_str_to_tid(const char *str, int *ispfsp,
521                   hammer_tid_t *tidp, u_int32_t *localizationp)
522 {
523         hammer_tid_t tid;
524         u_int32_t localization;
525         char *ptr;
526         int ispfs;
527         int n;
528
529         /*
530          * Forms allowed for TID:  "0x%016llx"
531          *                         "-1"
532          */
533         tid = strtouq(str, &ptr, 0);
534         n = ptr - str;
535         if (n == 2 && str[0] == '-' && str[1] == '1') {
536                 /* ok */
537         } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
538                 /* ok */
539         } else {
540                 return(EINVAL);
541         }
542
543         /*
544          * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
545          */
546         str = ptr;
547         if (*str == ':') {
548                 localization = strtoul(str + 1, &ptr, 10) << 16;
549                 if (ptr - str != 6)
550                         return(EINVAL);
551                 str = ptr;
552                 ispfs = 1;
553         } else {
554                 localization = *localizationp;
555                 ispfs = 0;
556         }
557
558         /*
559          * Any trailing junk invalidates special extension handling.
560          */
561         if (*str)
562                 return(EINVAL);
563         *tidp = tid;
564         *localizationp = localization;
565         *ispfsp = ispfs;
566         return(0);
567 }
568
569 void
570 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
571 {
572         blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
573 }
574
575 void
576 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
577 {
578         ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
579                           crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
580 }
581
582 int
583 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
584 {
585         hammer_crc_t crc;
586
587         crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
588         return (blockmap->entry_crc == crc);
589 }
590
591 int
592 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
593 {
594         hammer_crc_t crc;
595
596         crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
597               crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
598         return (ondisk->vol_crc == crc);
599 }
600
601 int
602 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
603 {
604         hammer_crc_t crc;
605
606         crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
607         return (ondisk->crc == crc);
608 }
609
610 /*
611  * Test or set the leaf->data_crc field.  Deal with any special cases given
612  * a generic B-Tree leaf element and its data.
613  *
614  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
615  *       to be updated in-place.
616  */
617 int
618 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
619 {
620         hammer_crc_t crc;
621
622         if (leaf->data_len == 0) {
623                 crc = 0;
624         } else {
625                 switch(leaf->base.rec_type) {
626                 case HAMMER_RECTYPE_INODE:
627                         if (leaf->data_len != sizeof(struct hammer_inode_data))
628                                 return(0);
629                         crc = crc32(data, HAMMER_INODE_CRCSIZE);
630                         break;
631                 default:
632                         crc = crc32(data, leaf->data_len);
633                         break;
634                 }
635         }
636         return (leaf->data_crc == crc);
637 }
638
639 void
640 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
641 {
642         if (leaf->data_len == 0) {
643                 leaf->data_crc = 0;
644         } else {
645                 switch(leaf->base.rec_type) {
646                 case HAMMER_RECTYPE_INODE:
647                         KKASSERT(leaf->data_len ==
648                                   sizeof(struct hammer_inode_data));
649                         leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
650                         break;
651                 default:
652                         leaf->data_crc = crc32(data, leaf->data_len);
653                         break;
654                 }
655         }
656 }
657
658 void
659 hkprintf(const char *ctl, ...)
660 {
661         __va_list va;
662
663         if (hammer_debug_debug) {
664                 __va_start(va, ctl);
665                 kvprintf(ctl, va);
666                 __va_end(va);
667         }
668 }
669
670 /*
671  * Return the block size at the specified file offset.
672  */
673 int
674 hammer_blocksize(int64_t file_offset)
675 {
676         if (file_offset < HAMMER_XDEMARC)
677                 return(HAMMER_BUFSIZE);
678         else
679                 return(HAMMER_XBUFSIZE);
680 }
681
682 /*
683  * Return the demarkation point between the two offsets where
684  * the block size changes. 
685  */
686 int64_t
687 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
688 {
689         if (file_offset1 < HAMMER_XDEMARC) {
690                 if (file_offset2 <= HAMMER_XDEMARC)
691                         return(file_offset2);
692                 return(HAMMER_XDEMARC);
693         }
694         panic("hammer_blockdemarc: illegal range %lld %lld\n",
695               file_offset1, file_offset2);
696 }
697
698 udev_t
699 hammer_fsid_to_udev(uuid_t *uuid)
700 {
701         u_int32_t crc;
702
703         crc = crc32(uuid, sizeof(*uuid));
704         return((udev_t)crc);
705 }
706