2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * This header file contains structures used internally by the HAMMER2
38 * implementation. See hammer2_disk.h for on-disk structures.
41 #ifndef _VFS_HAMMER2_HAMMER2_H_
42 #define _VFS_HAMMER2_HAMMER2_H_
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
48 #include <sys/systm.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
54 #include <sys/mountctl.h>
57 #include <sys/thread.h>
58 #include <sys/globaldata.h>
59 #include <sys/lockf.h>
61 #include <sys/queue.h>
62 #include <sys/limits.h>
64 #include <sys/signal2.h>
66 #include <sys/mutex.h>
67 #include <sys/mutex2.h>
69 #include "hammer2_disk.h"
70 #include "hammer2_mount.h"
71 #include "hammer2_ioctl.h"
72 #include "hammer2_ccms.h"
77 struct hammer2_pfsmount;
83 * The chain structure tracks a portion of the media topology from the
84 * root (volume) down. Chains represent volumes, inodes, indirect blocks,
85 * data blocks, and freemap nodes and leafs.
87 * The chain structure can be multi-homed and its topological recursion
88 * (chain->core) can be shared amongst several chains. Chain structures
89 * are topologically stable once placed in the in-memory topology (they
90 * don't move around). Modifications which cross flush synchronization
91 * boundaries, renames, resizing, or any move of the chain to elsewhere
92 * in the topology is accomplished via the DELETE-DUPLICATE mechanism.
94 * Deletions and delete-duplicates:
96 * Any movement of chains within the topology utilize a delete-duplicate
97 * operation instead of a simple rename. That is, the chain must be
98 * deleted from its original location and then duplicated to the new
99 * location. A new chain structure is allocated while the old is
100 * deleted. Deleted chains are removed from the above chain_core's
101 * rbtree but remain linked via the shadow topology for flush
102 * synchronization purposes.
104 * delete_bmap is allocated and a bit set if the chain was originally
105 * loaded via the blockmap.
107 * Flush synchronization:
109 * Flushes must synchronize chains up through the root. To do this
110 * the in-memory topology would normally have to be frozen during the
111 * flush. To avoid freezing the topology and to allow concurrent
112 * foreground / flush activity, any new modifications made while a
113 * flush is in progress retains the original chain in a shadow topology
114 * that is only visible to the flush code. Only one flush can be
115 * running at a time so the shadow hierarchy can be implemented with
116 * just a few link fields in our in-memory data structures.
120 * (1) Fully coherent snapshots can be taken without requiring
121 * a pre-flush, resulting in extremely fast (sub-millisecond)
124 * (2) Multiple synchronization points can be in-flight at the same
125 * time, representing multiple snapshots or flushes.
127 * (3) The algorithms needed to keep track of everything are actually
130 * Special Considerations:
132 * A chain is ref-counted on a per-chain basis, but the chain's lock
133 * is associated with the shared chain_core and is not per-chain.
135 * The power-of-2 nature of the media radix tree ensures that there
136 * will be no overlaps which straddle edges.
138 RB_HEAD(hammer2_chain_tree, hammer2_chain);
139 TAILQ_HEAD(h2_flush_deferral_list, hammer2_chain);
140 TAILQ_HEAD(h2_core_list, hammer2_chain);
142 #define CHAIN_CORE_DELETE_BMAP_ENTRIES \
143 (HAMMER2_PBUFSIZE / sizeof(hammer2_blockref_t) / sizeof(uint32_t))
145 struct hammer2_chain_core {
148 struct h2_core_list ownerq; /* all chains sharing this core */
149 struct hammer2_chain_tree rbtree; /* live chains */
150 struct hammer2_chain_tree dbtree; /* bmapped deletions */
151 struct h2_core_list dbq; /* other deletions */
152 int live_zero; /* blockref array opt */
155 u_int live_count; /* live (not deleted) chains in tree */
156 u_int chain_count; /* live + deleted chains under core */
157 int generation; /* generation number (inserts only) */
160 typedef struct hammer2_chain_core hammer2_chain_core_t;
162 #define HAMMER2_CORE_UNUSED0001 0x0001
163 #define HAMMER2_CORE_COUNTEDBREFS 0x0002
166 * H2 is a copy-on-write filesystem. In order to allow chains to allocate
167 * smaller blocks (down to 64-bytes), but improve performance and make
168 * clustered I/O possible using larger block sizes, the kernel buffer cache
169 * is abstracted via the hammer2_io structure.
171 RB_HEAD(hammer2_io_tree, hammer2_io);
174 RB_ENTRY(hammer2_io) rbnode; /* indexed by device offset */
175 struct spinlock spin;
176 struct hammer2_mount *hmp;
181 void (*callback)(struct hammer2_io *dio,
182 struct hammer2_chain *chain,
183 void *arg1, off_t arg2);
184 struct hammer2_chain *arg_c; /* INPROG I/O only */
185 void *arg_p; /* INPROG I/O only */
186 off_t arg_o; /* INPROG I/O only */
188 int act; /* activity */
191 typedef struct hammer2_io hammer2_io_t;
194 * Primary chain structure keeps track of the topology in-memory.
196 struct hammer2_chain {
197 TAILQ_ENTRY(hammer2_chain) core_entry; /* contemporary chains */
198 RB_ENTRY(hammer2_chain) rbnode; /* live chain(s) */
199 TAILQ_ENTRY(hammer2_chain) db_entry; /* non bmapped deletions */
200 hammer2_blockref_t bref;
201 hammer2_chain_core_t *core;
202 hammer2_chain_core_t *above;
203 struct hammer2_state *state; /* if active cache msg */
204 struct hammer2_mount *hmp;
205 struct hammer2_pfsmount *pmp; /* can be NULL */
207 hammer2_blockref_t dsrc; /* DEBUG */
208 int ninserts; /* DEBUG */
209 int nremoves; /* DEBUG */
210 hammer2_tid_t dsrc_dupfromat; /* DEBUG */
211 uint32_t dsrc_dupfromflags; /* DEBUG */
212 int dsrc_reason; /* DEBUG */
213 int dsrc_ninserts; /* DEBUG */
214 uint32_t dsrc_flags; /* DEBUG */
215 hammer2_tid_t dsrc_modify; /* DEBUG */
216 hammer2_tid_t dsrc_delete; /* DEBUG */
217 hammer2_tid_t dsrc_update_lo; /* DEBUG */
218 struct hammer2_chain *dsrc_original; /* DEBUG */
220 hammer2_tid_t modify_tid; /* flush filter */
221 hammer2_tid_t delete_tid; /* flush filter */
222 hammer2_tid_t update_lo; /* flush propagation */
223 hammer2_tid_t update_hi; /* setsubmod propagation */
224 hammer2_key_t data_count; /* delta's to apply */
225 hammer2_key_t inode_count; /* delta's to apply */
226 hammer2_io_t *dio; /* physical data buffer */
227 u_int bytes; /* physical data size */
231 hammer2_media_data_t *data; /* data pointer shortcut */
232 TAILQ_ENTRY(hammer2_chain) flush_node; /* flush deferral list */
237 typedef struct hammer2_chain hammer2_chain_t;
239 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
240 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
243 * Special notes on flags:
245 * INITIAL - This flag allows a chain to be created and for storage to
246 * be allocated without having to immediately instantiate the
247 * related buffer. The data is assumed to be all-zeros. It
248 * is primarily used for indirect blocks.
250 * MODIFIED- The chain's media data has been modified.
252 #define HAMMER2_CHAIN_MODIFIED 0x00000001 /* dirty chain data */
253 #define HAMMER2_CHAIN_ALLOCATED 0x00000002 /* kmalloc'd chain */
254 #define HAMMER2_CHAIN_FLUSH_TEMPORARY 0x00000004
255 #define HAMMER2_CHAIN_FORCECOW 0x00000008 /* force copy-on-wr */
256 #define HAMMER2_CHAIN_DELETED 0x00000010 /* deleted chain */
257 #define HAMMER2_CHAIN_INITIAL 0x00000020 /* initial create */
258 #define HAMMER2_CHAIN_FLUSH_CREATE 0x00000040 /* needs flush blkadd */
259 #define HAMMER2_CHAIN_FLUSH_DELETE 0x00000080 /* needs flush blkdel */
260 #define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */
261 #define HAMMER2_CHAIN_DEFERRED 0x00000200 /* on a deferral list */
262 #define HAMMER2_CHAIN_UNLINKED 0x00000400 /* delete on reclaim */
263 #define HAMMER2_CHAIN_VOLUMESYNC 0x00000800 /* needs volume sync */
264 #define HAMMER2_CHAIN_ONDBQ 0x00001000 /* !bmapped deletes */
265 #define HAMMER2_CHAIN_MOUNTED 0x00002000 /* PFS is mounted */
266 #define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */
267 #define HAMMER2_CHAIN_SNAPSHOT 0x00008000 /* snapshot special */
268 #define HAMMER2_CHAIN_EMBEDDED 0x00010000 /* embedded data */
269 #define HAMMER2_CHAIN_RELEASE 0x00020000 /* don't keep around */
270 #define HAMMER2_CHAIN_BMAPPED 0x00040000 /* in parent blkmap */
271 #define HAMMER2_CHAIN_ONDBTREE 0x00080000 /* bmapped deletes */
272 #define HAMMER2_CHAIN_DUPLICATED 0x00100000 /* fwd delete-dup */
273 #define HAMMER2_CHAIN_PFSROOT 0x00200000 /* in pfs->cluster */
276 * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
278 * NOTE: MATCHIND allows an indirect block / freemap node to be returned
279 * when the passed key range matches the radix. Remember that key_end
280 * is inclusive (e.g. {0x000,0xFFF}, not {0x000,0x1000}).
282 #define HAMMER2_LOOKUP_NOLOCK 0x00000001 /* ref only */
283 #define HAMMER2_LOOKUP_NODATA 0x00000002 /* data left NULL */
284 #define HAMMER2_LOOKUP_SHARED 0x00000100
285 #define HAMMER2_LOOKUP_MATCHIND 0x00000200 /* return all chains */
286 #define HAMMER2_LOOKUP_UNUSED0400 0x00000400
287 #define HAMMER2_LOOKUP_ALWAYS 0x00000800 /* resolve data */
290 * Flags passed to hammer2_chain_modify() and hammer2_chain_resize()
292 * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT
293 * blocks in the INITIAL-create state.
295 #define HAMMER2_MODIFY_OPTDATA 0x00000002 /* data can be NULL */
296 #define HAMMER2_MODIFY_NO_MODIFY_TID 0x00000004
297 #define HAMMER2_MODIFY_ASSERTNOCOPY 0x00000008 /* assert no del-dup */
298 #define HAMMER2_MODIFY_NOREALLOC 0x00000010
299 #define HAMMER2_MODIFY_INPLACE 0x00000020 /* don't del-dup */
302 * Flags passed to hammer2_chain_lock()
304 #define HAMMER2_RESOLVE_NEVER 1
305 #define HAMMER2_RESOLVE_MAYBE 2
306 #define HAMMER2_RESOLVE_ALWAYS 3
307 #define HAMMER2_RESOLVE_MASK 0x0F
309 #define HAMMER2_RESOLVE_SHARED 0x10 /* request shared lock */
310 #define HAMMER2_RESOLVE_NOREF 0x20 /* already ref'd on lock */
313 * Flags passed to hammer2_chain_delete()
315 #define HAMMER2_DELETE_UNUSED0001 0x0001
318 * Flags passed to hammer2_chain_delete_duplicate()
320 #define HAMMER2_DELDUP_RECORE 0x0001
323 * Cluster different types of storage together for allocations
325 #define HAMMER2_FREECACHE_INODE 0
326 #define HAMMER2_FREECACHE_INDIR 1
327 #define HAMMER2_FREECACHE_DATA 2
328 #define HAMMER2_FREECACHE_UNUSED3 3
329 #define HAMMER2_FREECACHE_TYPES 4
332 * hammer2_freemap_alloc() block preference
334 #define HAMMER2_OFF_NOPREF ((hammer2_off_t)-1)
337 * BMAP read-ahead maximum parameters
339 #define HAMMER2_BMAP_COUNT 16 /* max bmap read-ahead */
340 #define HAMMER2_BMAP_BYTES (HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT)
345 #define HAMMER2_FLUSH_DEPTH_LIMIT 10 /* stack recursion limit */
348 * hammer2_freemap_adjust()
350 #define HAMMER2_FREEMAP_DORECOVER 1
351 #define HAMMER2_FREEMAP_DOMAYFREE 2
352 #define HAMMER2_FREEMAP_DOREALFREE 3
355 * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
357 * There is an in-memory representation of all on-media data structure.
358 * Basically everything is represented by a hammer2_chain structure
359 * in-memory and other higher-level structures map to chains.
361 * A great deal of data is accessed simply via its buffer cache buffer,
362 * which is mapped for the duration of the chain's lock. However, because
363 * chains may represent blocks smaller than the 16KB minimum we impose
364 * on buffer cache buffers, we cannot hold related buffer cache buffers
365 * locked for smaller blocks. In these situations we kmalloc() a copy
368 * When modifications are made to a chain a new filesystem block must be
369 * allocated. Multiple modifications do not necessarily allocate new
370 * blocks. However, when a flush occurs a flush synchronization point
371 * is created and any new modifications made after this point will allocate
372 * a new block even if the chain is already in a modified state.
374 * The in-memory representation may remain cached (for example in order to
375 * placemark clustering locks) even after the related data has been
380 * In order to support concurrent flushes a flush synchronization point
381 * is created represented by a transaction id. Among other things,
382 * operations may move filesystem objects from one part of the topology
383 * to another (for example, if you rename a file or when indirect blocks
384 * are created or destroyed, and a few other things). When this occurs
385 * across a flush synchronization point the flusher needs to be able to
386 * recurse down BOTH the 'before' version of the topology and the 'after'
389 * To facilitate this modifications to chains do what is called a
390 * DELETE-DUPLICATE operation. Chains are not actually moved in-memory.
391 * Instead the chain we wish to move is deleted and a new chain is created
392 * at the target location in the topology. ANY SUBCHAINS PLACED UNDER THE
393 * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES. To make this work
394 * all sub-chains are managed by the hammer2_chain_core structure. This
395 * structure can be multi-homed, meaning that it can have more than one
396 * chain as its parent. When a chain is delete-duplicated the chain's core
397 * becomes shared under both the old and new chain.
401 * When a chain is delete-duplicated the old chain typically becomes stale.
402 * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags.
403 * To avoid executing live filesystem operations on stale chains, the inode
404 * locking code will follow stale chains via core->ownerq until it finds
405 * the live chain. The lock prevents ripups by other threads. Lookups
406 * must properly order locking operations to prevent other threads from
407 * racing the lookup operation and will also follow stale chains when
411 RB_HEAD(hammer2_inode_tree, hammer2_inode);
416 * NOTE: The inode's attribute CST which is also used to lock the inode
417 * is embedded in the chain (chain.cst) and aliased w/ attr_cst.
419 struct hammer2_inode {
420 RB_ENTRY(hammer2_inode) rbnode; /* inumber lookup (HL) */
421 ccms_cst_t topo_cst; /* directory topology cst */
422 struct hammer2_pfsmount *pmp; /* PFS mount */
423 struct hammer2_inode *pip; /* parent inode */
425 hammer2_chain_t *chain; /* NOTE: rehomed on rename */
426 struct lockf advlock;
429 u_int refs; /* +vpref, +flushref */
430 uint8_t comp_heuristic;
435 typedef struct hammer2_inode hammer2_inode_t;
437 #define HAMMER2_INODE_MODIFIED 0x0001
438 #define HAMMER2_INODE_SROOT 0x0002 /* kmalloc special case */
439 #define HAMMER2_INODE_RENAME_INPROG 0x0004
440 #define HAMMER2_INODE_ONRBTREE 0x0008
441 #define HAMMER2_INODE_RESIZED 0x0010
442 #define HAMMER2_INODE_MTIME 0x0020
444 int hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2);
445 RB_PROTOTYPE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
449 * A hammer2 transaction and flush sequencing structure.
451 * This global structure is tied into hammer2_mount and is used
452 * to sequence modifying operations and flushes.
454 * (a) Any modifying operations with sync_tid >= flush_tid will stall until
455 * all modifying operating with sync_tid < flush_tid complete.
457 * The flush related to flush_tid stalls until all modifying operations
458 * with sync_tid < flush_tid complete.
460 * (b) Once unstalled, modifying operations with sync_tid > flush_tid are
461 * allowed to run. All modifications cause modify/duplicate operations
462 * to occur on the related chains. Note that most INDIRECT blocks will
463 * be unaffected because the modifications just overload the RBTREE
464 * structurally instead of actually modifying the indirect blocks.
466 * (c) The actual flush unstalls and RUNS CONCURRENTLY with (b), but only
467 * utilizes the chain structures with sync_tid <= flush_tid. The
468 * flush will modify related indirect blocks and inodes in-place
469 * (rather than duplicate) since the adjustments are compatible with
470 * (b)'s RBTREE overloading
472 * SPECIAL NOTE: Inode modifications have to also propagate along any
473 * modify/duplicate chains. File writes detect the flush
474 * and force out the conflicting buffer cache buffer(s)
475 * before reusing them.
477 * (d) Snapshots can be made instantly but must be flushed and disconnected
478 * from their duplicative source before they can be mounted. This is
479 * because while H2's on-media structure supports forks, its in-memory
480 * structure only supports very simple forking for background flushing
483 * TODO: Flush merging. When fsync() is called on multiple discrete files
484 * concurrently there is no reason to stall the second fsync.
485 * The final flush that reaches to root can cover both fsync()s.
487 * The chains typically terminate as they fly onto the disk. The flush
488 * ultimately reaches the volume header.
490 struct hammer2_trans {
491 TAILQ_ENTRY(hammer2_trans) entry;
492 struct hammer2_pfsmount *pmp; /* might be NULL */
493 struct hammer2_mount *hmp_single; /* if single-targetted */
494 hammer2_tid_t orig_tid;
495 hammer2_tid_t sync_tid; /* effective transaction id */
496 hammer2_tid_t inode_tid;
497 thread_t td; /* pointer */
500 uint8_t inodes_created;
504 typedef struct hammer2_trans hammer2_trans_t;
506 #define HAMMER2_TRANS_ISFLUSH 0x0001 /* formal flush */
507 #define HAMMER2_TRANS_CONCURRENT 0x0002 /* concurrent w/flush */
508 #define HAMMER2_TRANS_BUFCACHE 0x0004 /* from bioq strategy write */
509 #define HAMMER2_TRANS_NEWINODE 0x0008 /* caller allocating inode */
510 #define HAMMER2_TRANS_ISALLOCATING 0x0010 /* in allocator */
512 #define HAMMER2_FREEMAP_HEUR_NRADIX 4 /* pwr 2 PBUFRADIX-MINIORADIX */
513 #define HAMMER2_FREEMAP_HEUR_TYPES 8
514 #define HAMMER2_FREEMAP_HEUR (HAMMER2_FREEMAP_HEUR_NRADIX * \
515 HAMMER2_FREEMAP_HEUR_TYPES)
518 * Global (per device) mount structure for device (aka vp->v_mount->hmp)
520 TAILQ_HEAD(hammer2_trans_queue, hammer2_trans);
522 struct hammer2_mount {
523 struct vnode *devvp; /* device vnode */
524 int ronly; /* read-only mount */
525 int pmp_count; /* PFS mounts backed by us */
526 TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
528 struct malloc_type *mchain;
531 struct spinlock io_spin; /* iotree access */
532 struct hammer2_io_tree iotree;
534 hammer2_chain_t vchain; /* anchor chain (topology) */
535 hammer2_chain_t fchain; /* anchor chain (freemap) */
536 hammer2_inode_t *sroot; /* super-root localized to media */
537 struct lock alloclk; /* lockmgr lock */
538 struct lock voldatalk; /* lockmgr lock */
539 struct hammer2_trans_queue transq; /* all in-progress transactions */
540 hammer2_off_t heur_freemap[HAMMER2_FREEMAP_HEUR];
541 int flushcnt; /* #of flush trans on the list */
543 int volhdrno; /* last volhdrno written */
544 hammer2_volume_data_t voldata;
545 hammer2_volume_data_t volsync; /* synchronized voldata */
548 typedef struct hammer2_mount hammer2_mount_t;
551 * HAMMER2 cluster - a device/root associated with a PFS.
553 * A PFS may have several hammer2_cluster's associated with it.
555 #define HAMMER2_MAXCLUSTER 8
557 struct hammer2_cluster {
560 hammer2_chain_t *chains[HAMMER2_MAXCLUSTER];
563 typedef struct hammer2_cluster hammer2_cluster_t;
566 * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
567 * This has a 1:1 correspondence to struct mount (note that the
568 * hammer2_mount structure has a N:1 correspondence).
570 * This structure represents a cluster mount and not necessarily a
571 * PFS under a specific device mount (HMP). The distinction is important
572 * because the elements backing a cluster mount can change on the fly.
574 * Usually the first element under the cluster represents the original
575 * user-requested mount that bootstraps the whole mess. In significant
576 * setups the original is usually just a read-only media image (or
577 * representitive file) that simply contains a bootstrap volume header
578 * listing the configuration.
580 struct hammer2_pfsmount {
582 hammer2_cluster_t cluster;
583 hammer2_inode_t *iroot; /* PFS root inode */
584 hammer2_inode_t *ihidden; /* PFS hidden directory */
585 struct lock lock; /* PFS lock for certain ops */
586 hammer2_off_t inode_count; /* copy of inode_count */
587 ccms_domain_t ccms_dom;
588 struct netexport export; /* nfs export */
589 int ronly; /* read-only mount */
590 struct malloc_type *minode;
591 struct malloc_type *mmsg;
593 struct spinlock inum_spin; /* inumber lookup */
594 struct hammer2_inode_tree inum_tree;
596 long inmem_dirty_chains;
597 int count_lwinprog; /* logical write in prog */
598 thread_t wthread_td; /* write thread td */
599 struct bio_queue_head wthread_bioq; /* logical buffer bioq */
600 struct mtx wthread_mtx; /* interlock */
601 int wthread_destroy;/* termination sequencing */
604 typedef struct hammer2_pfsmount hammer2_pfsmount_t;
606 #define HAMMER2_DIRTYCHAIN_WAITING 0x80000000
607 #define HAMMER2_DIRTYCHAIN_MASK 0x7FFFFFFF
609 #define HAMMER2_LWINPROG_WAITING 0x80000000
610 #define HAMMER2_LWINPROG_MASK 0x7FFFFFFF
614 MALLOC_DECLARE(M_HAMMER2);
616 #define VTOI(vp) ((hammer2_inode_t *)(vp)->v_data)
617 #define ITOV(ip) ((ip)->vp)
620 * Currently locked chains retain the locked buffer cache buffer for
621 * indirect blocks, and indirect blocks can be one of two sizes. The
622 * device buffer has to match the case to avoid deadlocking recursive
623 * chains that might otherwise try to access different offsets within
624 * the same device buffer.
628 hammer2_devblkradix(int radix)
630 if (radix <= HAMMER2_LBUFRADIX) {
631 return (HAMMER2_LBUFRADIX);
633 return (HAMMER2_PBUFRADIX);
639 hammer2_devblksize(size_t bytes)
641 if (bytes <= HAMMER2_LBUFSIZE) {
642 return(HAMMER2_LBUFSIZE);
644 KKASSERT(bytes <= HAMMER2_PBUFSIZE &&
645 (bytes ^ (bytes - 1)) == ((bytes << 1) - 1));
646 return (HAMMER2_PBUFSIZE);
653 MPTOPMP(struct mount *mp)
655 return ((hammer2_pfsmount_t *)mp->mnt_data);
658 extern struct vop_ops hammer2_vnode_vops;
659 extern struct vop_ops hammer2_spec_vops;
660 extern struct vop_ops hammer2_fifo_vops;
662 extern int hammer2_debug;
663 extern int hammer2_cluster_enable;
664 extern int hammer2_hardlink_enable;
665 extern int hammer2_flush_pipe;
666 extern int hammer2_synchronous_flush;
667 extern long hammer2_limit_dirty_chains;
668 extern long hammer2_iod_file_read;
669 extern long hammer2_iod_meta_read;
670 extern long hammer2_iod_indr_read;
671 extern long hammer2_iod_fmap_read;
672 extern long hammer2_iod_volu_read;
673 extern long hammer2_iod_file_write;
674 extern long hammer2_iod_meta_write;
675 extern long hammer2_iod_indr_write;
676 extern long hammer2_iod_fmap_write;
677 extern long hammer2_iod_volu_write;
678 extern long hammer2_ioa_file_read;
679 extern long hammer2_ioa_meta_read;
680 extern long hammer2_ioa_indr_read;
681 extern long hammer2_ioa_fmap_read;
682 extern long hammer2_ioa_volu_read;
683 extern long hammer2_ioa_file_write;
684 extern long hammer2_ioa_meta_write;
685 extern long hammer2_ioa_indr_write;
686 extern long hammer2_ioa_fmap_write;
687 extern long hammer2_ioa_volu_write;
689 extern struct objcache *cache_buffer_read;
690 extern struct objcache *cache_buffer_write;
693 extern int write_thread_wakeup;
695 extern mtx_t thread_protect;
700 #define hammer2_icrc32(buf, size) iscsi_crc32((buf), (size))
701 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc))
703 hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
704 hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
705 void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain);
706 void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain);
707 void hammer2_chain_refactor(hammer2_chain_t **chainp);
708 void hammer2_voldata_lock(hammer2_mount_t *hmp);
709 void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify);
710 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
711 void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate);
712 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
713 void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate);
715 void hammer2_mount_exlock(hammer2_mount_t *hmp);
716 void hammer2_mount_shlock(hammer2_mount_t *hmp);
717 void hammer2_mount_unlock(hammer2_mount_t *hmp);
719 int hammer2_get_dtype(hammer2_chain_t *chain);
720 int hammer2_get_vtype(hammer2_chain_t *chain);
721 u_int8_t hammer2_get_obj_type(enum vtype vtype);
722 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
723 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
724 u_int32_t hammer2_to_unix_xid(uuid_t *uuid);
725 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
727 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
728 int hammer2_getradix(size_t bytes);
730 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
731 hammer2_key_t *lbasep, hammer2_key_t *leofp);
732 int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase);
733 void hammer2_update_time(uint64_t *timep);
738 struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp);
740 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
741 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
742 hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp,
744 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
745 hammer2_inode_t *dip, hammer2_chain_t *chain);
746 void hammer2_inode_free(hammer2_inode_t *ip);
747 void hammer2_inode_ref(hammer2_inode_t *ip);
748 void hammer2_inode_drop(hammer2_inode_t *ip);
749 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
750 hammer2_chain_t *chain);
752 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
753 hammer2_inode_t *dip,
754 struct vattr *vap, struct ucred *cred,
755 const uint8_t *name, size_t name_len,
756 hammer2_chain_t **chainp, int *errorp);
757 int hammer2_inode_connect(hammer2_trans_t *trans,
758 hammer2_chain_t **chainp, int hlink,
759 hammer2_inode_t *dip, hammer2_chain_t **dchainp,
760 const uint8_t *name, size_t name_len,
762 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip,
763 hammer2_inode_t *tdip);
764 void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
765 hammer2_chain_t **parentp);
766 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
767 const uint8_t *name, size_t name_len, int isdir,
768 int *hlinkp, struct nchandle *nch);
769 int hammer2_hardlink_consolidate(hammer2_trans_t *trans,
770 hammer2_inode_t *ip, hammer2_chain_t **chainp,
771 hammer2_inode_t *cdip, hammer2_chain_t **cdchainp,
773 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip,
774 hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
775 int hammer2_hardlink_find(hammer2_inode_t *dip,
776 hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
777 void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp);
782 void hammer2_modify_volume(hammer2_mount_t *hmp);
783 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
784 hammer2_pfsmount_t *pmp,
785 hammer2_trans_t *trans,
786 hammer2_blockref_t *bref);
787 void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *nchain,
788 hammer2_chain_t *ochain);
789 void hammer2_chain_ref(hammer2_chain_t *chain);
790 void hammer2_chain_drop(hammer2_chain_t *chain);
791 int hammer2_chain_lock(hammer2_chain_t *chain, int how);
792 void hammer2_chain_load_async(hammer2_chain_t *chain,
793 void (*func)(hammer2_io_t *dio,
794 hammer2_chain_t *chain,
795 void *arg_p, off_t arg_o),
796 void *arg_p, off_t arg_o);
797 void hammer2_chain_moved(hammer2_chain_t *chain);
798 void hammer2_chain_modify(hammer2_trans_t *trans,
799 hammer2_chain_t **chainp, int flags);
800 hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans,
801 hammer2_inode_t *ip, hammer2_chain_t **chainp,
803 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
804 hammer2_chain_t *parent,
805 hammer2_chain_t **chainp,
806 int nradix, int flags);
807 void hammer2_chain_unlock(hammer2_chain_t *chain);
808 void hammer2_chain_wait(hammer2_chain_t *chain);
809 hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent, int generation,
810 hammer2_blockref_t *bref);
811 hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags);
812 void hammer2_chain_lookup_done(hammer2_chain_t *parent);
813 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp,
814 hammer2_key_t *key_nextp,
815 hammer2_key_t key_beg, hammer2_key_t key_end,
816 int *cache_indexp, int flags);
817 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp,
818 hammer2_chain_t *chain,
819 hammer2_key_t *key_nextp,
820 hammer2_key_t key_beg, hammer2_key_t key_end,
821 int *cache_indexp, int flags);
822 hammer2_chain_t *hammer2_chain_scan(hammer2_chain_t *parent,
823 hammer2_chain_t *chain,
824 int *cache_indexp, int flags);
826 int hammer2_chain_create(hammer2_trans_t *trans,
827 hammer2_chain_t **parentp,
828 hammer2_chain_t **chainp,
829 hammer2_key_t key, int keybits,
830 int type, size_t bytes);
831 void hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
832 hammer2_chain_t **chainp,
833 hammer2_blockref_t *bref, int snapshot,
834 int duplicate_reason);
835 int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **chainp,
836 hammer2_ioc_pfs_t *pfs);
837 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
839 void hammer2_chain_delete_duplicate(hammer2_trans_t *trans,
840 hammer2_chain_t **chainp, int flags);
841 void hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp);
842 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain);
843 void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain);
845 void hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp);
846 void hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp);
847 void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp);
848 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
849 hammer2_blockref_t *base, int count);
851 int hammer2_base_find(hammer2_chain_t *chain,
852 hammer2_blockref_t *base, int count,
853 int *cache_indexp, hammer2_key_t *key_nextp,
854 hammer2_key_t key_beg, hammer2_key_t key_end,
856 void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
857 hammer2_blockref_t *base, int count,
858 int *cache_indexp, hammer2_chain_t *child);
859 void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain,
860 hammer2_blockref_t *base, int count,
861 int *cache_indexp, hammer2_chain_t *child);
866 void hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp,
867 hammer2_mount_t *hmp, int flags);
868 void hammer2_trans_clear_invfsync(hammer2_trans_t *trans);
869 void hammer2_trans_done(hammer2_trans_t *trans);
874 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
875 int fflag, struct ucred *cred);
880 hammer2_io_t *hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase,
881 int lsize, int *ownerp);
882 void hammer2_io_putblk(hammer2_io_t **diop);
883 void hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree);
884 char *hammer2_io_data(hammer2_io_t *dio, off_t lbase);
885 int hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
886 hammer2_io_t **diop);
887 int hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
888 hammer2_io_t **diop);
889 int hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
890 hammer2_io_t **diop);
891 int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
892 hammer2_io_t **diop);
893 void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
894 void (*callback)(hammer2_io_t *dio,
895 hammer2_chain_t *arg_c,
896 void *arg_p, off_t arg_o),
897 hammer2_chain_t *arg_c,
898 void *arg_p, off_t arg_o);
899 void hammer2_io_bawrite(hammer2_io_t **diop);
900 void hammer2_io_bdwrite(hammer2_io_t **diop);
901 int hammer2_io_bwrite(hammer2_io_t **diop);
902 int hammer2_io_isdirty(hammer2_io_t *dio);
903 void hammer2_io_setdirty(hammer2_io_t *dio);
904 void hammer2_io_setinval(hammer2_io_t *dio, u_int bytes);
905 void hammer2_io_brelse(hammer2_io_t **diop);
906 void hammer2_io_bqrelse(hammer2_io_t **diop);
911 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg);
912 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
917 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
918 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index);
919 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp);
920 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx);
921 void hammer2_bioq_sync(hammer2_pfsmount_t *pmp);
922 int hammer2_vfs_sync(struct mount *mp, int waitflags);
923 void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp);
924 void hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp);
925 void hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp);
930 int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain,
932 void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
933 hammer2_blockref_t *bref, int how);
936 #endif /* !_KERNEL */
937 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */