hammer2 - Rewrite internal chain algorithms - stabilization
[dragonfly.git] / sys / vfs / hammer2 / hammer2.h
1 /*
2  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35
36 /*
37  * This header file contains structures used internally by the HAMMER2
38  * implementation.  See hammer2_disk.h for on-disk structures.
39  */
40
41 #ifndef _VFS_HAMMER2_HAMMER2_H_
42 #define _VFS_HAMMER2_HAMMER2_H_
43
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
47 #include <sys/conf.h>
48 #include <sys/systm.h>
49 #include <sys/tree.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/mountctl.h>
55 #include <sys/priv.h>
56 #include <sys/stat.h>
57 #include <sys/thread.h>
58 #include <sys/globaldata.h>
59 #include <sys/lockf.h>
60 #include <sys/buf.h>
61 #include <sys/queue.h>
62 #include <sys/limits.h>
63 #include <sys/buf2.h>
64 #include <sys/signal2.h>
65 #include <sys/dmsg.h>
66 #include <sys/mutex.h>
67 #include <sys/mutex2.h>
68
69 #include "hammer2_disk.h"
70 #include "hammer2_mount.h"
71 #include "hammer2_ioctl.h"
72 #include "hammer2_ccms.h"
73
74 struct hammer2_chain;
75 struct hammer2_inode;
76 struct hammer2_mount;
77 struct hammer2_pfsmount;
78 struct hammer2_span;
79 struct hammer2_state;
80 struct hammer2_msg;
81
82 /*
83  * The chain structure tracks a portion of the media topology from the
84  * root (volume) down.  Chains represent volumes, inodes, indirect blocks,
85  * data blocks, and freemap nodes and leafs.
86  *
87  * The chain structure can be multi-homed and its topological recursion
88  * (chain->core) can be shared amongst several chains.  Chain structures
89  * are topologically stable once placed in the in-memory topology (they
90  * don't move around).  Modifications which cross flush synchronization
91  * boundaries, renames, resizing, or any move of the chain to elsewhere
92  * in the topology is accomplished via the DELETE-DUPLICATE mechanism.
93  *
94  * DELETE-DUPLICATE allows HAMMER2 to track work across flush synchronization
95  * points without stalling the filesystem or corrupting the flush
96  * sychronization point.  When necessary a chain will be marked DELETED
97  * and a new, duplicate chain will be allocated.
98  *
99  * This mechanism necessarily requires that we be able to overload chains
100  * at any given layer in the topology.  Overloading is accomplished via a
101  * RBTREE recursion through chain->rbtree.
102  *
103  * Advantages:
104  *
105  *      (1) Fully coherent snapshots can be taken without requiring
106  *          a pre-flush, resulting in extremely fast (sub-millisecond)
107  *          snapshots.
108  *
109  *      (2) Multiple synchronization points can be in-flight at the same
110  *          time, representing multiple snapshots or flushes.
111  *
112  *      (3) The algorithms needed to keep track of everything are actually
113  *          not that complex.
114  *
115  * Special Considerations:
116  *
117  *      A chain is ref-counted on a per-chain basis, but the chain's lock
118  *      is associated with the shared chain_core and is not per-chain.
119  *
120  *      The power-of-2 nature of the media radix tree ensures that there
121  *      will be no overlaps which straddle edges.
122  */
123 RB_HEAD(hammer2_chain_tree, hammer2_chain);
124 TAILQ_HEAD(h2_flush_deferral_list, hammer2_chain);
125 TAILQ_HEAD(h2_core_list, hammer2_chain);
126 TAILQ_HEAD(h2_layer_list, hammer2_chain_layer);
127
128 struct hammer2_chain_layer {
129         int             good;
130         TAILQ_ENTRY(hammer2_chain_layer) entry;
131         struct hammer2_chain_tree rbtree;
132         int     refs;           /* prevent destruction */
133 };
134
135 typedef struct hammer2_chain_layer hammer2_chain_layer_t;
136
137 struct hammer2_chain_core {
138         int             good;
139         struct ccms_cst cst;
140         struct h2_core_list ownerq;     /* chain's which own this core */
141         struct h2_layer_list layerq;
142         u_int           chain_count;    /* total chains in layers */
143         u_int           sharecnt;
144         u_int           flags;
145         u_int           live_count;     /* live (not deleted) chains in tree */
146 };
147
148 typedef struct hammer2_chain_core hammer2_chain_core_t;
149
150 #define HAMMER2_CORE_INDIRECT           0x0001
151
152 struct hammer2_chain {
153         RB_ENTRY(hammer2_chain) rbnode;         /* node */
154         TAILQ_ENTRY(hammer2_chain) core_entry;  /* contemporary chains */
155         hammer2_chain_layer_t   *inlayer;
156         hammer2_blockref_t      bref;
157         hammer2_chain_core_t    *core;
158         hammer2_chain_core_t    *above;
159         struct hammer2_state    *state;         /* if active cache msg */
160         struct hammer2_mount    *hmp;
161         struct hammer2_pfsmount *pmp;           /* can be NULL */
162
163         hammer2_tid_t   modify_tid;             /* snapshot/flush filter */
164         hammer2_tid_t   delete_tid;
165         hammer2_key_t   data_count;             /* delta's to apply */
166         hammer2_key_t   inode_count;            /* delta's to apply */
167         struct buf      *bp;                    /* physical data buffer */
168         u_int           bytes;                  /* physical data size */
169         u_int           flags;
170         u_int           refs;
171         u_int           lockcnt;
172         int             live_zero;              /* blockref array opt */
173         hammer2_media_data_t *data;             /* data pointer shortcut */
174         TAILQ_ENTRY(hammer2_chain) flush_node;  /* flush deferral list */
175 };
176
177 typedef struct hammer2_chain hammer2_chain_t;
178
179 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
180 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
181
182 /*
183  * Special notes on flags:
184  *
185  * INITIAL - This flag allows a chain to be created and for storage to
186  *           be allocated without having to immediately instantiate the
187  *           related buffer.  The data is assumed to be all-zeros.  It
188  *           is primarily used for indirect blocks.
189  *
190  * MOVED   - A modified chain becomes MOVED after it flushes.  A chain
191  *           can also become MOVED if it is moved within the topology
192  *           (even if not modified).
193  */
194 #define HAMMER2_CHAIN_MODIFIED          0x00000001      /* dirty chain data */
195 #define HAMMER2_CHAIN_ALLOCATED         0x00000002      /* kmalloc'd chain */
196 #define HAMMER2_CHAIN_DIRTYBP           0x00000004      /* dirty on unlock */
197 #define HAMMER2_CHAIN_SUBMODIFIED       0x00000008      /* recursive flush */
198 #define HAMMER2_CHAIN_DELETED           0x00000010      /* deleted chain */
199 #define HAMMER2_CHAIN_INITIAL           0x00000020      /* initial create */
200 #define HAMMER2_CHAIN_FLUSHED           0x00000040      /* flush on unlock */
201 #define HAMMER2_CHAIN_MOVED             0x00000080      /* bref changed */
202 #define HAMMER2_CHAIN_IOFLUSH           0x00000100      /* bawrite on put */
203 #define HAMMER2_CHAIN_DEFERRED          0x00000200      /* on a deferral list */
204 #define HAMMER2_CHAIN_DESTROYED         0x00000400      /* destroying inode */
205 #define HAMMER2_CHAIN_VOLUMESYNC        0x00000800      /* needs volume sync */
206 #define HAMMER2_CHAIN_RECYCLE           0x00001000      /* force recycle */
207 #define HAMMER2_CHAIN_MOUNTED           0x00002000      /* PFS is mounted */
208 #define HAMMER2_CHAIN_ONRBTREE          0x00004000      /* on parent RB tree */
209 #define HAMMER2_CHAIN_SNAPSHOT          0x00008000      /* snapshot special */
210 #define HAMMER2_CHAIN_EMBEDDED          0x00010000      /* embedded data */
211 #define HAMMER2_CHAIN_HARDLINK          0x00020000      /* converted to hlink */
212 #define HAMMER2_CHAIN_REPLACE           0x00040000      /* replace bref */
213 #define HAMMER2_CHAIN_COUNTEDBREFS      0x00080000      /* counted brefs */
214
215 /*
216  * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
217  *
218  * NOTE: MATCHIND allows an indirect block / freemap node to be returned
219  *       when the passed key range matches the radix.  Remember that key_end
220  *       is inclusive (e.g. {0x000,0xFFF}, not {0x000,0x1000}).
221  */
222 #define HAMMER2_LOOKUP_NOLOCK           0x00000001      /* ref only */
223 #define HAMMER2_LOOKUP_NODATA           0x00000002      /* data left NULL */
224 #define HAMMER2_LOOKUP_SHARED           0x00000100
225 #define HAMMER2_LOOKUP_MATCHIND         0x00000200
226 #define HAMMER2_LOOKUP_FREEMAP          0x00000400      /* freemap base */
227 #define HAMMER2_LOOKUP_ALWAYS           0x00000800      /* resolve data */
228
229 /*
230  * Flags passed to hammer2_chain_modify() and hammer2_chain_resize()
231  *
232  * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT
233  *       blocks in the INITIAL-create state.
234  */
235 #define HAMMER2_MODIFY_OPTDATA          0x00000002      /* data can be NULL */
236 #define HAMMER2_MODIFY_NO_MODIFY_TID    0x00000004
237 #define HAMMER2_MODIFY_ASSERTNOCOPY     0x00000008
238 #define HAMMER2_MODIFY_NOREALLOC        0x00000010
239
240 /*
241  * Flags passed to hammer2_chain_lock()
242  */
243 #define HAMMER2_RESOLVE_NEVER           1
244 #define HAMMER2_RESOLVE_MAYBE           2
245 #define HAMMER2_RESOLVE_ALWAYS          3
246 #define HAMMER2_RESOLVE_MASK            0x0F
247
248 #define HAMMER2_RESOLVE_SHARED          0x10    /* request shared lock */
249 #define HAMMER2_RESOLVE_NOREF           0x20    /* already ref'd on lock */
250
251 /*
252  * Flags passed to hammer2_chain_delete()
253  */
254 #define HAMMER2_DELETE_WILLDUP          0x0001  /* no blk free, will be dup */
255
256 /*
257  * Flags passed to hammer2_chain_delete_duplicate()
258  */
259 #define HAMMER2_DELDUP_RECORE           0x0001
260
261 /*
262  * Cluster different types of storage together for allocations
263  */
264 #define HAMMER2_FREECACHE_INODE         0
265 #define HAMMER2_FREECACHE_INDIR         1
266 #define HAMMER2_FREECACHE_DATA          2
267 #define HAMMER2_FREECACHE_UNUSED3       3
268 #define HAMMER2_FREECACHE_TYPES         4
269
270 /*
271  * hammer2_freemap_alloc() block preference
272  */
273 #define HAMMER2_OFF_NOPREF              ((hammer2_off_t)-1)
274
275 /*
276  * BMAP read-ahead maximum parameters
277  */
278 #define HAMMER2_BMAP_COUNT              16      /* max bmap read-ahead */
279 #define HAMMER2_BMAP_BYTES              (HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT)
280
281 /*
282  * Misc
283  */
284 #define HAMMER2_FLUSH_DEPTH_LIMIT       40      /* stack recursion limit */
285
286 /*
287  * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
288  *
289  * There is an in-memory representation of all on-media data structure.
290  *
291  * When accessed read-only the data will be mapped to the related buffer
292  * cache buffer.
293  *
294  * When accessed read-write (marked modified) a kmalloc()'d copy of the
295  * is created which can then be modified.  The copy is destroyed when a
296  * filesystem block is allocated to replace it.
297  *
298  * Active inodes (those with vnodes attached) will maintain the kmalloc()'d
299  * copy for both the read-only and the read-write case.  The combination of
300  * (bp) and (data) determines whether (data) was allocated or not.
301  *
302  * The in-memory representation may remain cached (for example in order to
303  * placemark clustering locks) even after the related data has been
304  * detached.
305  */
306
307 RB_HEAD(hammer2_inode_tree, hammer2_inode);
308
309 /*
310  * A hammer2 inode.
311  *
312  * NOTE: The inode's attribute CST which is also used to lock the inode
313  *       is embedded in the chain (chain.cst) and aliased w/ attr_cst.
314  */
315 struct hammer2_inode {
316         RB_ENTRY(hammer2_inode) rbnode;         /* inumber lookup (HL) */
317         ccms_cst_t              topo_cst;       /* directory topology cst */
318         struct hammer2_pfsmount *pmp;           /* PFS mount */
319         struct hammer2_inode    *pip;           /* parent inode */
320         struct vnode            *vp;
321         hammer2_chain_t         *chain;         /* NOTE: rehomed on rename */
322         struct lockf            advlock;
323         hammer2_tid_t           inum;
324         u_int                   flags;
325         u_int                   refs;           /* +vpref, +flushref */
326         uint8_t                 comp_heuristic;
327         hammer2_off_t           size;
328         uint64_t                mtime;
329 };
330
331 typedef struct hammer2_inode hammer2_inode_t;
332
333 #define HAMMER2_INODE_MODIFIED          0x0001
334 #define HAMMER2_INODE_SROOT             0x0002  /* kmalloc special case */
335 #define HAMMER2_INODE_RENAME_INPROG     0x0004
336 #define HAMMER2_INODE_ONRBTREE          0x0008
337 #define HAMMER2_INODE_RESIZED           0x0010
338 #define HAMMER2_INODE_MTIME             0x0020
339
340 int hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2);
341 RB_PROTOTYPE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
342                 hammer2_tid_t);
343
344 /*
345  * A hammer2 transaction and flush sequencing structure.
346  *
347  * This global structure is tied into hammer2_mount and is used
348  * to sequence modifying operations and flushes.
349  *
350  * (a) Any modifying operations with sync_tid >= flush_tid will stall until
351  *     all modifying operating with sync_tid < flush_tid complete.
352  *
353  *     The flush related to flush_tid stalls until all modifying operations
354  *     with sync_tid < flush_tid complete.
355  *
356  * (b) Once unstalled, modifying operations with sync_tid > flush_tid are
357  *     allowed to run.  All modifications cause modify/duplicate operations
358  *     to occur on the related chains.  Note that most INDIRECT blocks will
359  *     be unaffected because the modifications just overload the RBTREE
360  *     structurally instead of actually modifying the indirect blocks.
361  *
362  * (c) The actual flush unstalls and RUNS CONCURRENTLY with (b), but only
363  *     utilizes the chain structures with sync_tid <= flush_tid.  The
364  *     flush will modify related indirect blocks and inodes in-place
365  *     (rather than duplicate) since the adjustments are compatible with
366  *     (b)'s RBTREE overloading
367  *
368  *     SPECIAL NOTE:  Inode modifications have to also propagate along any
369  *                    modify/duplicate chains.  File writes detect the flush
370  *                    and force out the conflicting buffer cache buffer(s)
371  *                    before reusing them.
372  *
373  * (d) Snapshots can be made instantly but must be flushed and disconnected
374  *     from their duplicative source before they can be mounted.  This is
375  *     because while H2's on-media structure supports forks, its in-memory
376  *     structure only supports very simple forking for background flushing
377  *     purposes.
378  *
379  * TODO: Flush merging.  When fsync() is called on multiple discrete files
380  *       concurrently there is no reason to stall the second fsync.
381  *       The final flush that reaches to root can cover both fsync()s.
382  *
383  *     The chains typically terminate as they fly onto the disk.  The flush
384  *     ultimately reaches the volume header.
385  */
386 struct hammer2_trans {
387         TAILQ_ENTRY(hammer2_trans) entry;
388         struct hammer2_pfsmount *pmp;
389         hammer2_tid_t           sync_tid;
390         thread_t                td;             /* pointer */
391         int                     flags;
392         int                     blocked;
393         uint8_t                 inodes_created;
394         uint8_t                 dummy[7];
395 };
396
397 typedef struct hammer2_trans hammer2_trans_t;
398
399 #define HAMMER2_TRANS_ISFLUSH           0x0001  /* formal flush */
400 #define HAMMER2_TRANS_RESTRICTED        0x0002  /* snapshot flush restrict */
401 #define HAMMER2_TRANS_BUFCACHE          0x0004  /* from bioq strategy write */
402
403 #define HAMMER2_FREEMAP_HEUR_NRADIX     4       /* pwr 2 PBUFRADIX-MINIORADIX */
404 #define HAMMER2_FREEMAP_HEUR_TYPES      8
405 #define HAMMER2_FREEMAP_HEUR            (HAMMER2_FREEMAP_HEUR_NRADIX * \
406                                          HAMMER2_FREEMAP_HEUR_TYPES)
407
408 /*
409  * Global (per device) mount structure for device (aka vp->v_mount->hmp)
410  */
411 TAILQ_HEAD(hammer2_trans_queue, hammer2_trans);
412
413 struct hammer2_mount {
414         struct vnode    *devvp;         /* device vnode */
415         int             ronly;          /* read-only mount */
416         int             pmp_count;      /* PFS mounts backed by us */
417         TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
418
419         struct malloc_type *mchain;
420         int             nipstacks;
421         int             maxipstacks;
422         hammer2_chain_t vchain;         /* anchor chain */
423         hammer2_chain_t fchain;         /* freemap chain special */
424         hammer2_chain_t *schain;        /* super-root */
425         hammer2_inode_t *sroot;         /* super-root inode */
426         struct lock     alloclk;        /* lockmgr lock */
427         struct lock     voldatalk;      /* lockmgr lock */
428         struct hammer2_trans_queue transq; /* all in-progress transactions */
429         hammer2_trans_t *curflush;      /* current flush in progress */
430         hammer2_tid_t   topo_flush_tid; /* currently synchronizing flush pt */
431         hammer2_tid_t   free_flush_tid; /* currently synchronizing flush pt */
432         hammer2_off_t   heur_freemap[HAMMER2_FREEMAP_HEUR];
433         int             flushcnt;       /* #of flush trans on the list */
434
435         int             volhdrno;       /* last volhdrno written */
436         hammer2_volume_data_t voldata;
437         hammer2_volume_data_t volsync;  /* synchronized voldata */
438         struct bio_queue_head wthread_bioq; /* bio queue for write thread */
439         struct mtx wthread_mtx;     /* mutex for write thread */
440         int     wthread_destroy;    /* to control the write thread */
441 };
442
443 typedef struct hammer2_mount hammer2_mount_t;
444
445 /*
446  * HAMMER2 cluster - a device/root associated with a PFS.
447  *
448  * A PFS may have several hammer2_cluster's associated with it.
449  */
450 struct hammer2_cluster {
451         struct hammer2_mount    *hmp;           /* device global mount */
452         hammer2_chain_t         *rchain;        /* PFS root chain */
453 };
454
455 typedef struct hammer2_cluster hammer2_cluster_t;
456
457 /*
458  * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
459  *
460  * This structure represents a cluster mount and not necessarily a
461  * PFS under a specific device mount (HMP).  The distinction is important
462  * because the elements backing a cluster mount can change on the fly.
463  */
464 struct hammer2_pfsmount {
465         struct mount            *mp;            /* kernel mount */
466         hammer2_cluster_t       *mount_cluster;
467         hammer2_cluster_t       *cluster;
468         hammer2_inode_t         *iroot;         /* PFS root inode */
469         hammer2_off_t           inode_count;    /* copy of inode_count */
470         ccms_domain_t           ccms_dom;
471         struct netexport        export;         /* nfs export */
472         int                     ronly;          /* read-only mount */
473         struct malloc_type      *minode;
474         struct malloc_type      *mmsg;
475         kdmsg_iocom_t           iocom;
476         struct spinlock         inum_spin;      /* inumber lookup */
477         struct hammer2_inode_tree inum_tree;
478         long                    inmem_inodes;
479         long                    inmem_chains;
480         int                     inmem_waiting;
481 };
482
483 typedef struct hammer2_pfsmount hammer2_pfsmount_t;
484
485 struct hammer2_cbinfo {
486         hammer2_chain_t *chain;
487         void (*func)(hammer2_chain_t *, struct buf *, char *, void *);
488         void *arg;
489         size_t boff;
490 };
491
492 typedef struct hammer2_cbinfo hammer2_cbinfo_t;
493
494 #if defined(_KERNEL)
495
496 MALLOC_DECLARE(M_HAMMER2);
497
498 #define VTOI(vp)        ((hammer2_inode_t *)(vp)->v_data)
499 #define ITOV(ip)        ((ip)->vp)
500
501 /*
502  * Currently locked chains retain the locked buffer cache buffer for
503  * indirect blocks, and indirect blocks can be one of two sizes.  The
504  * device buffer has to match the case to avoid deadlocking recursive
505  * chains that might otherwise try to access different offsets within
506  * the same device buffer.
507  */
508 static __inline
509 int
510 hammer2_devblkradix(int radix)
511 {
512 #if 1
513         if (radix <= HAMMER2_LBUFRADIX) {
514                 return (HAMMER2_LBUFRADIX);
515         } else {
516                 return (HAMMER2_PBUFRADIX);
517         }
518 #else
519         return (HAMMER2_PBUFRADIX);
520 #endif
521 }
522
523 static __inline
524 size_t
525 hammer2_devblksize(size_t bytes)
526 {
527 #if 1
528         if (bytes <= HAMMER2_LBUFSIZE) {
529                 return(HAMMER2_LBUFSIZE);
530         } else {
531                 KKASSERT(bytes <= HAMMER2_PBUFSIZE &&
532                          (bytes ^ (bytes - 1)) == ((bytes << 1) - 1));
533                 return (HAMMER2_PBUFSIZE);
534         }
535 #else
536         KKASSERT(bytes <= HAMMER2_PBUFSIZE &&
537                  (bytes ^ (bytes - 1)) == ((bytes << 1) - 1));
538         return(HAMMER2_PBUFSIZE);
539 #endif
540 }
541
542
543 static __inline
544 hammer2_pfsmount_t *
545 MPTOPMP(struct mount *mp)
546 {
547         return ((hammer2_pfsmount_t *)mp->mnt_data);
548 }
549
550 static __inline
551 hammer2_mount_t *
552 MPTOHMP(struct mount *mp)
553 {
554         return (((hammer2_pfsmount_t *)mp->mnt_data)->cluster->hmp);
555 }
556
557 static __inline
558 int
559 hammer2_chain_refactor_test(hammer2_chain_t *chain, int traverse_hlink)
560 {
561         hammer2_chain_t *next;
562
563         next = TAILQ_NEXT(chain, core_entry);
564
565         if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
566             next &&
567             (next->flags & HAMMER2_CHAIN_SNAPSHOT) == 0) {
568                 return (1);
569         }
570         if (traverse_hlink &&
571             chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
572             (chain->flags & HAMMER2_CHAIN_HARDLINK) &&
573             next &&
574             (next->flags & HAMMER2_CHAIN_SNAPSHOT) == 0) {
575                 return(1);
576         }
577
578         return (0);
579 }
580
581 extern struct vop_ops hammer2_vnode_vops;
582 extern struct vop_ops hammer2_spec_vops;
583 extern struct vop_ops hammer2_fifo_vops;
584
585 extern int hammer2_debug;
586 extern int hammer2_cluster_enable;
587 extern int hammer2_hardlink_enable;
588 extern long hammer2_iod_file_read;
589 extern long hammer2_iod_meta_read;
590 extern long hammer2_iod_indr_read;
591 extern long hammer2_iod_fmap_read;
592 extern long hammer2_iod_volu_read;
593 extern long hammer2_iod_file_write;
594 extern long hammer2_iod_meta_write;
595 extern long hammer2_iod_indr_write;
596 extern long hammer2_iod_fmap_write;
597 extern long hammer2_iod_volu_write;
598 extern long hammer2_ioa_file_read;
599 extern long hammer2_ioa_meta_read;
600 extern long hammer2_ioa_indr_read;
601 extern long hammer2_ioa_fmap_read;
602 extern long hammer2_ioa_volu_read;
603 extern long hammer2_ioa_file_write;
604 extern long hammer2_ioa_meta_write;
605 extern long hammer2_ioa_indr_write;
606 extern long hammer2_ioa_fmap_write;
607 extern long hammer2_ioa_volu_write;
608
609 extern struct objcache *cache_buffer_read;
610 extern struct objcache *cache_buffer_write;
611
612 extern int destroy;
613 extern int write_thread_wakeup;
614
615 extern mtx_t thread_protect;
616
617 /*
618  * hammer2_subr.c
619  */
620 #define hammer2_icrc32(buf, size)       iscsi_crc32((buf), (size))
621 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc))
622
623 hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
624 hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
625 void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain);
626 void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain);
627 void hammer2_voldata_lock(hammer2_mount_t *hmp);
628 void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify);
629 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
630 void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate);
631 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
632 void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate);
633
634 void hammer2_mount_exlock(hammer2_mount_t *hmp);
635 void hammer2_mount_shlock(hammer2_mount_t *hmp);
636 void hammer2_mount_unlock(hammer2_mount_t *hmp);
637
638 int hammer2_get_dtype(hammer2_chain_t *chain);
639 int hammer2_get_vtype(hammer2_chain_t *chain);
640 u_int8_t hammer2_get_obj_type(enum vtype vtype);
641 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
642 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
643 u_int32_t hammer2_to_unix_xid(uuid_t *uuid);
644 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
645
646 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
647 int hammer2_getradix(size_t bytes);
648
649 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
650                         hammer2_key_t *lbasep, hammer2_key_t *leofp);
651 int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase);
652 void hammer2_update_time(uint64_t *timep);
653
654 /*
655  * hammer2_inode.c
656  */
657 struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp);
658
659 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
660 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
661 hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp,
662                         hammer2_tid_t inum);
663 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
664                         hammer2_inode_t *dip, hammer2_chain_t *chain);
665 void hammer2_inode_free(hammer2_inode_t *ip);
666 void hammer2_inode_ref(hammer2_inode_t *ip);
667 void hammer2_inode_drop(hammer2_inode_t *ip);
668 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
669                         hammer2_chain_t *chain);
670
671 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
672                         hammer2_inode_t *dip,
673                         struct vattr *vap, struct ucred *cred,
674                         const uint8_t *name, size_t name_len,
675                         hammer2_chain_t **chainp, int *errorp);
676 int hammer2_inode_connect(hammer2_trans_t *trans, int hlink,
677                         hammer2_inode_t *dip, hammer2_chain_t **chainp,
678                         const uint8_t *name, size_t name_len);
679 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip,
680                         hammer2_inode_t *tdip);
681 void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
682                         hammer2_chain_t **parentp);
683 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
684                         const uint8_t *name, size_t name_len, int isdir,
685                         int *hlinkp);
686 int hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip,
687                         hammer2_chain_t **chainp,
688                         hammer2_inode_t *tdip, int linkcnt);
689 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip,
690                         hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
691 int hammer2_hardlink_find(hammer2_inode_t *dip,
692                         hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
693
694 /*
695  * hammer2_chain.c
696  */
697 void hammer2_modify_volume(hammer2_mount_t *hmp);
698 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp,
699                                 hammer2_trans_t *trans, hammer2_blockref_t *bref);
700 void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *nchain,
701                                 hammer2_chain_t *ochain);
702 void hammer2_chain_ref(hammer2_chain_t *chain);
703 void hammer2_chain_drop(hammer2_chain_t *chain);
704 int hammer2_chain_lock(hammer2_chain_t *chain, int how);
705 void hammer2_chain_load_async(hammer2_chain_t *chain,
706                                 void (*func)(hammer2_chain_t *, struct buf *,
707                                              char *, void *),
708                                 void *arg);
709 void hammer2_chain_moved(hammer2_chain_t *chain);
710 void hammer2_chain_modify(hammer2_trans_t *trans,
711                                 hammer2_chain_t **chainp, int flags);
712 hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans,
713                                 hammer2_inode_t *ip, hammer2_chain_t **chainp,
714                                 int flags);
715 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
716                                 hammer2_chain_t *parent,
717                                 hammer2_chain_t **chainp,
718                                 int nradix, int flags);
719 void hammer2_chain_unlock(hammer2_chain_t *chain);
720 void hammer2_chain_wait(hammer2_chain_t *chain);
721 hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent,
722                                 hammer2_blockref_t *bref);
723 hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags);
724 void hammer2_chain_lookup_done(hammer2_chain_t *parent);
725 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp,
726                                 hammer2_key_t *key_nextp,
727                                 hammer2_key_t key_beg, hammer2_key_t key_end,
728                                 int *cache_indexp, int flags);
729 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp,
730                                 hammer2_chain_t *chain,
731                                 hammer2_key_t *key_nextp,
732                                 hammer2_key_t key_beg, hammer2_key_t key_end,
733                                 int *cache_indexp, int flags);
734
735 int hammer2_chain_create(hammer2_trans_t *trans,
736                                 hammer2_chain_t **parentp,
737                                 hammer2_chain_t **chainp,
738                                 hammer2_key_t key, int keybits,
739                                 int type, size_t bytes);
740 void hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent,
741                                 hammer2_chain_t **chainp,
742                                 hammer2_blockref_t *bref);
743 int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_inode_t *ip,
744                                 hammer2_ioc_pfs_t *pfs);
745 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
746                                 int flags);
747 void hammer2_chain_delete_duplicate(hammer2_trans_t *trans,
748                                 hammer2_chain_t **chainp, int flags);
749 void hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain);
750 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain);
751 void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain);
752
753 void hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp);
754 void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp);
755 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
756                                 hammer2_blockref_t *base, int count);
757 void hammer2_chain_layer_check_locked(hammer2_mount_t *hmp,
758                                 hammer2_chain_core_t *core);
759
760 int hammer2_base_find(hammer2_chain_t *chain,
761                                 hammer2_blockref_t *base, int count,
762                                 int *cache_indexp, hammer2_key_t *key_nextp,
763                                 hammer2_key_t key_beg, hammer2_key_t key_end);
764 void hammer2_base_delete(hammer2_chain_t *chain,
765                                 hammer2_blockref_t *base, int count,
766                                 int *cache_indexp, hammer2_blockref_t *elm);
767 void hammer2_base_insert(hammer2_chain_t *chain,
768                                 hammer2_blockref_t *base, int count,
769                                 int *cache_indexp, hammer2_blockref_t *elm,
770                                 int flags);
771
772 /*
773  * hammer2_trans.c
774  */
775 void hammer2_trans_init(hammer2_trans_t *trans,
776                         hammer2_pfsmount_t *pmp, int flags);
777 void hammer2_trans_done(hammer2_trans_t *trans);
778
779 /*
780  * hammer2_ioctl.c
781  */
782 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
783                                 int fflag, struct ucred *cred);
784
785 /*
786  * hammer2_msgops.c
787  */
788 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg);
789 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
790
791 /*
792  * hammer2_vfsops.c
793  */
794 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
795 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index);
796 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp);
797 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp);
798
799 /*
800  * hammer2_freemap.c
801  */
802 int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp,
803                                 hammer2_blockref_t *bref, size_t bytes);
804 void hammer2_freemap_free(hammer2_trans_t *trans, hammer2_mount_t *hmp,
805                                 hammer2_blockref_t *bref, int how);
806
807
808 #endif /* !_KERNEL */
809 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */