Add KQUEUE support to HAMMER.
[dragonfly.git] / sys / vfs / hammer / hammer_subs.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
35  */
36 /*
37  * HAMMER structural locking
38  */
39
40 #include "hammer.h"
41 #include <sys/dirent.h>
42
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
45 {
46         thread_t td = curthread;
47
48         KKASSERT(lock->refs > 0);
49         crit_enter();
50         if (lock->locktd != td) {
51                 while (lock->locktd != NULL || lock->lockcount) {
52                         ++lock->exwanted;
53                         lock->wanted = 1;
54                         if (hammer_debug_locks) {
55                                 kprintf("hammer_lock_ex: held by %p\n",
56                                         lock->locktd);
57                         }
58                         ++hammer_contention_count;
59                         tsleep(lock, 0, ident, 0);
60                         if (hammer_debug_locks)
61                                 kprintf("hammer_lock_ex: try again\n");
62                         --lock->exwanted;
63                 }
64                 lock->locktd = td;
65         }
66         KKASSERT(lock->lockcount >= 0);
67         ++lock->lockcount;
68         crit_exit();
69 }
70
71 /*
72  * Try to obtain an exclusive lock
73  */
74 int
75 hammer_lock_ex_try(struct hammer_lock *lock)
76 {
77         thread_t td = curthread;
78
79         KKASSERT(lock->refs > 0);
80         crit_enter();
81         if (lock->locktd != td) {
82                 if (lock->locktd != NULL || lock->lockcount) {
83                         crit_exit();
84                         return(EAGAIN);
85                 }
86                 lock->locktd = td;
87         }
88         KKASSERT(lock->lockcount >= 0);
89         ++lock->lockcount;
90         crit_exit();
91         return(0);
92 }
93
94 /*
95  * Obtain a shared lock
96  *
97  * We do not give pending exclusive locks priority over shared locks as
98  * doing so could lead to a deadlock.
99  */
100 void
101 hammer_lock_sh(struct hammer_lock *lock)
102 {
103         KKASSERT(lock->refs > 0);
104         crit_enter();
105         while (lock->locktd != NULL) {
106                 if (lock->locktd == curthread) {
107                         Debugger("hammer_lock_sh: lock_sh on exclusive");
108                         ++lock->lockcount;
109                         crit_exit();
110                         return;
111                 }
112                 lock->wanted = 1;
113                 tsleep(lock, 0, "hmrlck", 0);
114         }
115         KKASSERT(lock->lockcount <= 0);
116         --lock->lockcount;
117         crit_exit();
118 }
119
120 int
121 hammer_lock_sh_try(struct hammer_lock *lock)
122 {
123         KKASSERT(lock->refs > 0);
124         crit_enter();
125         if (lock->locktd) {
126                 crit_exit();
127                 return(EAGAIN);
128         }
129         KKASSERT(lock->lockcount <= 0);
130         --lock->lockcount;
131         crit_exit();
132         return(0);
133 }
134
135 /*
136  * Upgrade a shared lock to an exclusively held lock.  This function will
137  * return EDEADLK If there is more then one shared holder.
138  *
139  * No error occurs and no action is taken if the lock is already exclusively
140  * held by the caller.  If the lock is not held at all or held exclusively
141  * by someone else, this function will panic.
142  */
143 int
144 hammer_lock_upgrade(struct hammer_lock *lock)
145 {
146         int error;
147
148         crit_enter();
149         if (lock->lockcount > 0) {
150                 if (lock->locktd != curthread)
151                         panic("hammer_lock_upgrade: illegal lock state");
152                 error = 0;
153         } else if (lock->lockcount == -1) {
154                 lock->lockcount = 1;
155                 lock->locktd = curthread;
156                 error = 0;
157         } else if (lock->lockcount != 0) {
158                 error = EDEADLK;
159         } else {
160                 panic("hammer_lock_upgrade: lock is not held");
161                 /* NOT REACHED */
162                 error = 0;
163         }
164         crit_exit();
165         return(error);
166 }
167
168 /*
169  * Downgrade an exclusively held lock to a shared lock.
170  */
171 void
172 hammer_lock_downgrade(struct hammer_lock *lock)
173 {
174         KKASSERT(lock->lockcount == 1 && lock->locktd == curthread);
175         crit_enter();
176         lock->lockcount = -1;
177         lock->locktd = NULL;
178         if (lock->wanted) {
179                 lock->wanted = 0;
180                 wakeup(lock);
181         }
182         crit_exit();
183         /* XXX memory barrier */
184 }
185
186 void
187 hammer_unlock(struct hammer_lock *lock)
188 {
189         crit_enter();
190         KKASSERT(lock->lockcount != 0);
191         if (lock->lockcount < 0) {
192                 if (++lock->lockcount == 0 && lock->wanted) {
193                         lock->wanted = 0;
194                         wakeup(lock);
195                 }
196         } else {
197                 KKASSERT(lock->locktd == curthread);
198                 if (--lock->lockcount == 0) {
199                         lock->locktd = NULL;
200                         if (lock->wanted) {
201                                 lock->wanted = 0;
202                                 wakeup(lock);
203                         }
204                 }
205
206         }
207         crit_exit();
208 }
209
210 /*
211  * The calling thread must be holding a shared or exclusive lock.
212  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
213  */
214 int
215 hammer_lock_status(struct hammer_lock *lock)
216 {
217         if (lock->lockcount < 0)
218                 return(-1);
219         if (lock->lockcount > 0)
220                 return(1);
221         panic("hammer_lock_status: lock must be held: %p", lock);
222 }
223
224 void
225 hammer_ref(struct hammer_lock *lock)
226 {
227         KKASSERT(lock->refs >= 0);
228         crit_enter();
229         ++lock->refs;
230         crit_exit();
231 }
232
233 void
234 hammer_unref(struct hammer_lock *lock)
235 {
236         KKASSERT(lock->refs > 0);
237         crit_enter();
238         --lock->refs;
239         crit_exit();
240 }
241
242 /*
243  * The sync_lock must be held when doing any modifying operations on
244  * meta-data.  It does not have to be held when modifying non-meta-data buffers
245  * (backend or frontend).
246  *
247  * The flusher holds the lock exclusively while all other consumers hold it
248  * shared.  All modifying operations made while holding the lock are atomic
249  * in that they will be made part of the same flush group.
250  *
251  * Due to the atomicy requirement deadlock recovery code CANNOT release the
252  * sync lock, nor can we give pending exclusive sync locks priority over
253  * a shared sync lock as this could lead to a 3-way deadlock.
254  */
255 void
256 hammer_sync_lock_ex(hammer_transaction_t trans)
257 {
258         ++trans->sync_lock_refs;
259         hammer_lock_ex(&trans->hmp->sync_lock);
260 }
261
262 void
263 hammer_sync_lock_sh(hammer_transaction_t trans)
264 {
265         ++trans->sync_lock_refs;
266         hammer_lock_sh(&trans->hmp->sync_lock);
267 }
268
269 int
270 hammer_sync_lock_sh_try(hammer_transaction_t trans)
271 {
272         int error;
273
274         ++trans->sync_lock_refs;
275         if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
276                 --trans->sync_lock_refs;
277         return (error);
278 }
279
280 void
281 hammer_sync_unlock(hammer_transaction_t trans)
282 {
283         --trans->sync_lock_refs;
284         hammer_unlock(&trans->hmp->sync_lock);
285 }
286
287 /*
288  * Misc
289  */
290 u_int32_t
291 hammer_to_unix_xid(uuid_t *uuid)
292 {
293         return(*(u_int32_t *)&uuid->node[2]);
294 }
295
296 void
297 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
298 {
299         bzero(uuid, sizeof(*uuid));
300         *(u_int32_t *)&uuid->node[2] = guid;
301 }
302
303 void
304 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
305 {
306         ts->tv_sec = (unsigned long)(xtime / 1000000);
307         ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
308 }
309
310 u_int64_t
311 hammer_timespec_to_time(struct timespec *ts)
312 {
313         u_int64_t xtime;
314
315         xtime = (unsigned)(ts->tv_nsec / 1000) +
316                 (unsigned long)ts->tv_sec * 1000000ULL;
317         return(xtime);
318 }
319
320
321 /*
322  * Convert a HAMMER filesystem object type to a vnode type
323  */
324 enum vtype
325 hammer_get_vnode_type(u_int8_t obj_type)
326 {
327         switch(obj_type) {
328         case HAMMER_OBJTYPE_DIRECTORY:
329                 return(VDIR);
330         case HAMMER_OBJTYPE_REGFILE:
331                 return(VREG);
332         case HAMMER_OBJTYPE_DBFILE:
333                 return(VDATABASE);
334         case HAMMER_OBJTYPE_FIFO:
335                 return(VFIFO);
336         case HAMMER_OBJTYPE_SOCKET:
337                 return(VSOCK);
338         case HAMMER_OBJTYPE_CDEV:
339                 return(VCHR);
340         case HAMMER_OBJTYPE_BDEV:
341                 return(VBLK);
342         case HAMMER_OBJTYPE_SOFTLINK:
343                 return(VLNK);
344         default:
345                 return(VBAD);
346         }
347         /* not reached */
348 }
349
350 int
351 hammer_get_dtype(u_int8_t obj_type)
352 {
353         switch(obj_type) {
354         case HAMMER_OBJTYPE_DIRECTORY:
355                 return(DT_DIR);
356         case HAMMER_OBJTYPE_REGFILE:
357                 return(DT_REG);
358         case HAMMER_OBJTYPE_DBFILE:
359                 return(DT_DBF);
360         case HAMMER_OBJTYPE_FIFO:
361                 return(DT_FIFO);
362         case HAMMER_OBJTYPE_SOCKET:
363                 return(DT_SOCK);
364         case HAMMER_OBJTYPE_CDEV:
365                 return(DT_CHR);
366         case HAMMER_OBJTYPE_BDEV:
367                 return(DT_BLK);
368         case HAMMER_OBJTYPE_SOFTLINK:
369                 return(DT_LNK);
370         default:
371                 return(DT_UNKNOWN);
372         }
373         /* not reached */
374 }
375
376 u_int8_t
377 hammer_get_obj_type(enum vtype vtype)
378 {
379         switch(vtype) {
380         case VDIR:
381                 return(HAMMER_OBJTYPE_DIRECTORY);
382         case VREG:
383                 return(HAMMER_OBJTYPE_REGFILE);
384         case VDATABASE:
385                 return(HAMMER_OBJTYPE_DBFILE);
386         case VFIFO:
387                 return(HAMMER_OBJTYPE_FIFO);
388         case VSOCK:
389                 return(HAMMER_OBJTYPE_SOCKET);
390         case VCHR:
391                 return(HAMMER_OBJTYPE_CDEV);
392         case VBLK:
393                 return(HAMMER_OBJTYPE_BDEV);
394         case VLNK:
395                 return(HAMMER_OBJTYPE_SOFTLINK);
396         default:
397                 return(HAMMER_OBJTYPE_UNKNOWN);
398         }
399         /* not reached */
400 }
401
402 /*
403  * Return flags for hammer_delete_at_cursor()
404  */
405 int
406 hammer_nohistory(hammer_inode_t ip)
407 {
408         if (ip->hmp->hflags & HMNT_NOHISTORY)
409                 return(HAMMER_DELETE_DESTROY);
410         if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
411                 return(HAMMER_DELETE_DESTROY);
412         return(0);
413 }
414
415 /*
416  * Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
417  * crc in the MSB and 0 in the LSB.  The caller will use the low bits to
418  * generate a unique key and will scan all entries with the same upper
419  * 32 bits when issuing a lookup.
420  *
421  * We strip bit 63 in order to provide a positive key, this way a seek
422  * offset of 0 will represent the base of the directory.
423  *
424  * This function can never return 0.  We use the MSB-0 space to synthesize
425  * artificial directory entries such as "." and "..".
426  */
427 int64_t
428 hammer_directory_namekey(const void *name, int len)
429 {
430         int64_t key;
431
432         key = (int64_t)(crc32(name, len) & 0x7FFFFFFF) << 32;
433         if (key == 0)
434                 key |= 0x100000000LL;
435         return(key);
436 }
437
438 /*
439  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
440  * EINVAL on failure.
441  *
442  * If this function fails *ispfs, *tidp, and *localizationp will not
443  * be modified.
444  */
445 int
446 hammer_str_to_tid(const char *str, int *ispfsp,
447                   hammer_tid_t *tidp, u_int32_t *localizationp)
448 {
449         hammer_tid_t tid;
450         u_int32_t localization;
451         char *ptr;
452         int ispfs;
453         int n;
454
455         /*
456          * Forms allowed for TID:  "0x%016llx"
457          *                         "-1"
458          */
459         tid = strtouq(str, &ptr, 0);
460         n = ptr - str;
461         if (n == 2 && str[0] == '-' && str[1] == '1') {
462                 /* ok */
463         } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
464                 /* ok */
465         } else {
466                 return(EINVAL);
467         }
468
469         /*
470          * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
471          */
472         str = ptr;
473         if (*str == ':') {
474                 localization = strtoul(str + 1, &ptr, 10) << 16;
475                 if (ptr - str != 6)
476                         return(EINVAL);
477                 str = ptr;
478                 ispfs = 1;
479         } else {
480                 localization = *localizationp;
481                 ispfs = 0;
482         }
483
484         /*
485          * Any trailing junk invalidates special extension handling.
486          */
487         if (*str)
488                 return(EINVAL);
489         *tidp = tid;
490         *localizationp = localization;
491         *ispfsp = ispfs;
492         return(0);
493 }
494
495 void
496 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
497 {
498         blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
499 }
500
501 void
502 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
503 {
504         ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
505                           crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
506 }
507
508 int
509 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
510 {
511         hammer_crc_t crc;
512
513         crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
514         return (blockmap->entry_crc == crc);
515 }
516
517 int
518 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
519 {
520         hammer_crc_t crc;
521
522         crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
523               crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
524         return (ondisk->vol_crc == crc);
525 }
526
527 int
528 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
529 {
530         hammer_crc_t crc;
531
532         crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
533         return (ondisk->crc == crc);
534 }
535
536 /*
537  * Test or set the leaf->data_crc field.  Deal with any special cases given
538  * a generic B-Tree leaf element and its data.
539  *
540  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
541  *       to be updated in-place.
542  */
543 int
544 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
545 {
546         hammer_crc_t crc;
547
548         if (leaf->data_len == 0) {
549                 crc = 0;
550         } else {
551                 switch(leaf->base.rec_type) {
552                 case HAMMER_RECTYPE_INODE:
553                         if (leaf->data_len != sizeof(struct hammer_inode_data))
554                                 return(0);
555                         crc = crc32(data, HAMMER_INODE_CRCSIZE);
556                         break;
557                 default:
558                         crc = crc32(data, leaf->data_len);
559                         break;
560                 }
561         }
562         return (leaf->data_crc == crc);
563 }
564
565 void
566 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
567 {
568         if (leaf->data_len == 0) {
569                 leaf->data_crc = 0;
570         } else {
571                 switch(leaf->base.rec_type) {
572                 case HAMMER_RECTYPE_INODE:
573                         KKASSERT(leaf->data_len ==
574                                   sizeof(struct hammer_inode_data));
575                         leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
576                         break;
577                 default:
578                         leaf->data_crc = crc32(data, leaf->data_len);
579                         break;
580                 }
581         }
582 }
583
584 void
585 hkprintf(const char *ctl, ...)
586 {
587         __va_list va;
588
589         if (hammer_debug_debug) {
590                 __va_start(va, ctl);
591                 kvprintf(ctl, va);
592                 __va_end(va);
593         }
594 }
595
596 /*
597  * Return the block size at the specified file offset.
598  */
599 int
600 hammer_blocksize(int64_t file_offset)
601 {
602         if (file_offset < HAMMER_XDEMARC)
603                 return(HAMMER_BUFSIZE);
604         else
605                 return(HAMMER_XBUFSIZE);
606 }
607
608 /*
609  * Return the demarkation point between the two offsets where
610  * the block size changes. 
611  */
612 int64_t
613 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
614 {
615         if (file_offset1 < HAMMER_XDEMARC) {
616                 if (file_offset2 <= HAMMER_XDEMARC)
617                         return(file_offset2);
618                 return(HAMMER_XDEMARC);
619         }
620         panic("hammer_blockdemarc: illegal range %lld %lld\n",
621               file_offset1, file_offset2);
622 }
623
624 udev_t
625 hammer_fsid_to_udev(uuid_t *uuid)
626 {
627         u_int32_t crc;
628
629         crc = crc32(uuid, sizeof(*uuid));
630         return((udev_t)crc);
631 }
632