2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
39 * This header file contains structures used internally by the HAMMER2
40 * implementation. See hammer2_disk.h for on-disk structures.
42 * There is an in-memory representation of all on-media data structure.
43 * Almost everything is represented by a hammer2_chain structure in-memory.
44 * Other higher-level structures typically map to chains.
46 * A great deal of data is accessed simply via its buffer cache buffer,
47 * which is mapped for the duration of the chain's lock. Hammer2 must
48 * implement its own buffer cache layer on top of the system layer to
49 * allow for different threads to lock different sub-block-sized buffers.
51 * When modifications are made to a chain a new filesystem block must be
52 * allocated. Multiple modifications do not typically allocate new blocks
53 * until the current block has been flushed. Flushes do not block the
54 * front-end unless the front-end operation crosses the current inode being
57 * The in-memory representation may remain cached (for example in order to
58 * placemark clustering locks) even after the related data has been
62 #ifndef _VFS_HAMMER2_HAMMER2_H_
63 #define _VFS_HAMMER2_HAMMER2_H_
65 #include <sys/param.h>
66 #include <sys/types.h>
67 #include <sys/kernel.h>
69 #include <sys/systm.h>
71 #include <sys/malloc.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
75 #include <sys/mountctl.h>
78 #include <sys/thread.h>
79 #include <sys/globaldata.h>
80 #include <sys/lockf.h>
82 #include <sys/queue.h>
83 #include <sys/limits.h>
84 #include <sys/signal2.h>
86 #include <sys/mutex.h>
87 #include <sys/kern_syscall.h>
90 #include <sys/mutex2.h>
92 #include "hammer2_disk.h"
93 #include "hammer2_mount.h"
94 #include "hammer2_ioctl.h"
95 #include "hammer2_ccms.h"
100 struct hammer2_cluster;
101 struct hammer2_inode;
102 struct hammer2_mount;
103 struct hammer2_pfsmount;
105 struct hammer2_state;
109 * The xid tracks internal transactional updates.
111 * XXX fix-me, really needs to be 64-bits
113 typedef uint32_t hammer2_xid_t;
115 #define HAMMER2_XID_MIN 0x00000000U
116 #define HAMMER2_XID_MAX 0x7FFFFFFFU
119 * The chain structure tracks a portion of the media topology from the
120 * root (volume) down. Chains represent volumes, inodes, indirect blocks,
121 * data blocks, and freemap nodes and leafs.
123 * The chain structure utilizes a simple singly-homed topology and the
124 * chain's in-memory topology will move around as the chains do, due mainly
125 * to renames and indirect block creation.
127 * Block Table Updates
129 * Block table updates for insertions and updates are delayed until the
130 * flush. This allows us to avoid having to modify the parent chain
131 * all the way to the root.
133 * Block table deletions are performed immediately (modifying the parent
134 * in the process) because the flush code uses the chain structure to
135 * track delayed updates and the chain will be (likely) gone or moved to
136 * another location in the topology after a deletion.
138 * A prior iteration of the code tried to keep the relationship intact
139 * on deletes by doing a delete-duplicate operation on the chain, but
140 * it added way too much complexity to the codebase.
142 * Flush Synchronization
144 * The flush code must flush modified chains bottom-up. Because chain
145 * structures can shift around and are NOT topologically stable,
146 * modified chains are independently indexed for the flush. As the flush
147 * runs it modifies (or further modifies) and updates the parents,
148 * propagating the flush all the way to the volume root.
150 * Modifying front-end operations can occur during a flush but will block
151 * in two cases: (1) when the front-end tries to operate on the inode
152 * currently in the midst of being flushed and (2) if the front-end
153 * crosses an inode currently being flushed (such as during a rename).
154 * So, for example, if you rename directory "x" to "a/b/c/d/e/f/g/x" and
155 * the flusher is currently working on "a/b/c", the rename will block
156 * temporarily in order to ensure that "x" exists in one place or the
159 * Meta-data statistics are updated by the flusher. The front-end will
160 * make estimates but meta-data must be fully synchronized only during a
161 * flush in order to ensure that it remains correct across a crash.
163 * Multiple flush synchronizations can theoretically be in-flight at the
164 * same time but the implementation is not coded to handle the case and
165 * currently serializes them.
169 * Snapshots currently require the subdirectory tree being snapshotted
170 * to be flushed. The snapshot then creates a new super-root inode which
171 * copies the flushed blockdata of the directory or file that was
176 * - Note that the radix tree runs in powers of 2 only so sub-trees
177 * cannot straddle edges.
179 RB_HEAD(hammer2_chain_tree, hammer2_chain);
180 TAILQ_HEAD(h2_flush_list, hammer2_chain);
181 TAILQ_HEAD(h2_core_list, hammer2_chain);
182 TAILQ_HEAD(h2_iocb_list, hammer2_iocb);
184 #define CHAIN_CORE_DELETE_BMAP_ENTRIES \
185 (HAMMER2_PBUFSIZE / sizeof(hammer2_blockref_t) / sizeof(uint32_t))
187 struct hammer2_chain_core {
189 struct hammer2_chain_tree rbtree; /* sub-chains */
190 int live_zero; /* blockref array opt */
192 u_int live_count; /* live (not deleted) chains in tree */
193 u_int chain_count; /* live + deleted chains under core */
194 int generation; /* generation number (inserts only) */
197 typedef struct hammer2_chain_core hammer2_chain_core_t;
199 #define HAMMER2_CORE_UNUSED0001 0x0001
200 #define HAMMER2_CORE_COUNTEDBREFS 0x0002
202 RB_HEAD(hammer2_io_tree, hammer2_io);
205 * IOCB - IO callback (into chain, cluster, or manual request)
207 struct hammer2_iocb {
208 TAILQ_ENTRY(hammer2_iocb) entry;
209 void (*callback)(struct hammer2_iocb *iocb);
210 struct hammer2_io *dio;
211 struct hammer2_cluster *cluster;
212 struct hammer2_chain *chain;
220 typedef struct hammer2_iocb hammer2_iocb_t;
222 #define HAMMER2_IOCB_INTERLOCK 0x00000001
223 #define HAMMER2_IOCB_ONQ 0x00000002
224 #define HAMMER2_IOCB_DONE 0x00000004
225 #define HAMMER2_IOCB_INPROG 0x00000008
226 #define HAMMER2_IOCB_UNUSED10 0x00000010
227 #define HAMMER2_IOCB_QUICK 0x00010000
228 #define HAMMER2_IOCB_ZERO 0x00020000
229 #define HAMMER2_IOCB_READ 0x00040000
230 #define HAMMER2_IOCB_WAKEUP 0x00080000
233 * DIO - Management structure wrapping system buffer cache.
235 * Used for multiple purposes including concurrent management
236 * if small requests by chains into larger DIOs.
239 RB_ENTRY(hammer2_io) rbnode; /* indexed by device offset */
240 struct h2_iocb_list iocbq;
241 struct spinlock spin;
242 struct hammer2_mount *hmp;
247 int act; /* activity */
250 typedef struct hammer2_io hammer2_io_t;
252 #define HAMMER2_DIO_INPROG 0x80000000 /* bio in progress */
253 #define HAMMER2_DIO_GOOD 0x40000000 /* dio->bp is stable */
254 #define HAMMER2_DIO_WAITING 0x20000000 /* (old) */
255 #define HAMMER2_DIO_DIRTY 0x10000000 /* flush on last drop */
257 #define HAMMER2_DIO_MASK 0x0FFFFFFF
260 * Primary chain structure keeps track of the topology in-memory.
262 struct hammer2_chain {
263 hammer2_chain_core_t core;
264 RB_ENTRY(hammer2_chain) rbnode; /* live chain(s) */
265 hammer2_blockref_t bref;
266 struct hammer2_chain *parent;
267 struct hammer2_state *state; /* if active cache msg */
268 struct hammer2_mount *hmp;
269 struct hammer2_pfsmount *pmp; /* (pfs-cluster pmp or spmp) */
271 hammer2_xid_t flush_xid; /* flush sequencing */
272 hammer2_key_t data_count; /* delta's to apply */
273 hammer2_key_t inode_count; /* delta's to apply */
274 hammer2_key_t data_count_up; /* delta's to apply */
275 hammer2_key_t inode_count_up; /* delta's to apply */
276 hammer2_io_t *dio; /* physical data buffer */
277 u_int bytes; /* physical data size */
281 hammer2_media_data_t *data; /* data pointer shortcut */
282 TAILQ_ENTRY(hammer2_chain) flush_node; /* flush list */
285 typedef struct hammer2_chain hammer2_chain_t;
287 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
288 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
291 * Special notes on flags:
293 * INITIAL - This flag allows a chain to be created and for storage to
294 * be allocated without having to immediately instantiate the
295 * related buffer. The data is assumed to be all-zeros. It
296 * is primarily used for indirect blocks.
298 * MODIFIED- The chain's media data has been modified.
299 * UPDATE - Chain might not be modified but parent blocktable needs update
301 * BMAPPED - Indicates that the chain is present in the parent blockmap.
302 * BMAPUPD - Indicates that the chain is present but needs to be updated
303 * in the parent blockmap.
305 #define HAMMER2_CHAIN_MODIFIED 0x00000001 /* dirty chain data */
306 #define HAMMER2_CHAIN_ALLOCATED 0x00000002 /* kmalloc'd chain */
307 #define HAMMER2_CHAIN_DESTROY 0x00000004
308 #define HAMMER2_CHAIN_UNUSED00000008 0x00000008
309 #define HAMMER2_CHAIN_DELETED 0x00000010 /* deleted chain */
310 #define HAMMER2_CHAIN_INITIAL 0x00000020 /* initial create */
311 #define HAMMER2_CHAIN_UPDATE 0x00000040 /* need parent update */
312 #define HAMMER2_CHAIN_DEFERRED 0x00000080 /* flush depth defer */
313 #define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */
314 #define HAMMER2_CHAIN_ONFLUSH 0x00000200 /* on a flush list */
315 #define HAMMER2_CHAIN_UNUSED00000400 0x00000400
316 #define HAMMER2_CHAIN_VOLUMESYNC 0x00000800 /* needs volume sync */
317 #define HAMMER2_CHAIN_UNUSED00001000 0x00001000
318 #define HAMMER2_CHAIN_MOUNTED 0x00002000 /* PFS is mounted */
319 #define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */
320 #define HAMMER2_CHAIN_SNAPSHOT 0x00008000 /* snapshot special */
321 #define HAMMER2_CHAIN_EMBEDDED 0x00010000 /* embedded data */
322 #define HAMMER2_CHAIN_RELEASE 0x00020000 /* don't keep around */
323 #define HAMMER2_CHAIN_BMAPPED 0x00040000 /* present in blkmap */
324 #define HAMMER2_CHAIN_BMAPUPD 0x00080000 /* +needs updating */
325 #define HAMMER2_CHAIN_UNUSED00100000 0x00100000
326 #define HAMMER2_CHAIN_UNUSED00200000 0x00200000
327 #define HAMMER2_CHAIN_PFSBOUNDARY 0x00400000 /* super->pfs inode */
329 #define HAMMER2_CHAIN_FLUSH_MASK (HAMMER2_CHAIN_MODIFIED | \
330 HAMMER2_CHAIN_UPDATE | \
331 HAMMER2_CHAIN_ONFLUSH)
334 * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
336 * NOTE: MATCHIND allows an indirect block / freemap node to be returned
337 * when the passed key range matches the radix. Remember that key_end
338 * is inclusive (e.g. {0x000,0xFFF}, not {0x000,0x1000}).
340 #define HAMMER2_LOOKUP_NOLOCK 0x00000001 /* ref only */
341 #define HAMMER2_LOOKUP_NODATA 0x00000002 /* data left NULL */
342 #define HAMMER2_LOOKUP_SHARED 0x00000100
343 #define HAMMER2_LOOKUP_MATCHIND 0x00000200 /* return all chains */
344 #define HAMMER2_LOOKUP_UNUSED0400 0x00000400
345 #define HAMMER2_LOOKUP_ALWAYS 0x00000800 /* resolve data */
348 * Flags passed to hammer2_chain_modify() and hammer2_chain_resize()
350 * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT
351 * blocks in the INITIAL-create state.
353 #define HAMMER2_MODIFY_OPTDATA 0x00000002 /* data can be NULL */
354 #define HAMMER2_MODIFY_NO_MODIFY_TID 0x00000004
355 #define HAMMER2_MODIFY_UNUSED0008 0x00000008
356 #define HAMMER2_MODIFY_NOREALLOC 0x00000010
359 * Flags passed to hammer2_chain_lock()
361 #define HAMMER2_RESOLVE_NEVER 1
362 #define HAMMER2_RESOLVE_MAYBE 2
363 #define HAMMER2_RESOLVE_ALWAYS 3
364 #define HAMMER2_RESOLVE_MASK 0x0F
366 #define HAMMER2_RESOLVE_SHARED 0x10 /* request shared lock */
367 #define HAMMER2_RESOLVE_NOREF 0x20 /* already ref'd on lock */
370 * Flags passed to hammer2_chain_delete()
372 #define HAMMER2_DELETE_PERMANENT 0x0001
373 #define HAMMER2_DELETE_NOSTATS 0x0002
375 #define HAMMER2_INSERT_NOSTATS 0x0002
378 * Flags passed to hammer2_chain_delete_duplicate()
380 #define HAMMER2_DELDUP_RECORE 0x0001
383 * Cluster different types of storage together for allocations
385 #define HAMMER2_FREECACHE_INODE 0
386 #define HAMMER2_FREECACHE_INDIR 1
387 #define HAMMER2_FREECACHE_DATA 2
388 #define HAMMER2_FREECACHE_UNUSED3 3
389 #define HAMMER2_FREECACHE_TYPES 4
392 * hammer2_freemap_alloc() block preference
394 #define HAMMER2_OFF_NOPREF ((hammer2_off_t)-1)
397 * BMAP read-ahead maximum parameters
399 #define HAMMER2_BMAP_COUNT 16 /* max bmap read-ahead */
400 #define HAMMER2_BMAP_BYTES (HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT)
403 * hammer2_freemap_adjust()
405 #define HAMMER2_FREEMAP_DORECOVER 1
406 #define HAMMER2_FREEMAP_DOMAYFREE 2
407 #define HAMMER2_FREEMAP_DOREALFREE 3
410 * HAMMER2 cluster - A set of chains representing the same entity.
412 * The hammer2_pfsmount structure embeds a hammer2_cluster. All other
413 * hammer2_cluster use cases use temporary allocations.
415 * The cluster API mimics the chain API. Except as used in the pfsmount,
416 * the cluster structure is a temporary 'working copy' of a set of chains
417 * representing targets compatible with the operation. However, for
418 * performance reasons the cluster API does not necessarily issue concurrent
419 * requests to the underlying chain API for all compatible chains all the
420 * time. This may sometimes necessitate revisiting parent cluster nodes
421 * to 'flesh out' (validate more chains).
423 * If an insufficient number of chains remain in a working copy, the operation
424 * may have to be downgraded, retried, or stall until the requisit number
425 * of chains are available.
427 #define HAMMER2_MAXCLUSTER 8
429 struct hammer2_cluster {
430 int status; /* operational status */
431 int refs; /* track for deallocation */
432 struct hammer2_pfsmount *pmp;
436 hammer2_chain_t *focus; /* current focus (or mod) */
437 hammer2_chain_t *array[HAMMER2_MAXCLUSTER];
438 char missed[HAMMER2_MAXCLUSTER];
439 int cache_index[HAMMER2_MAXCLUSTER];
442 typedef struct hammer2_cluster hammer2_cluster_t;
444 #define HAMMER2_CLUSTER_INODE 0x00000001 /* embedded in inode */
445 #define HAMMER2_CLUSTER_NOSYNC 0x00000002 /* not in sync (cumulative) */
448 RB_HEAD(hammer2_inode_tree, hammer2_inode);
453 * NOTE: The inode's attribute CST which is also used to lock the inode
454 * is embedded in the chain (chain.cst) and aliased w/ attr_cst.
456 * NOTE: The inode-embedded cluster is never used directly for I/O (since
457 * it may be shared). Instead it will be replicated-in and synchronized
458 * back out if changed.
460 struct hammer2_inode {
461 RB_ENTRY(hammer2_inode) rbnode; /* inumber lookup (HL) */
462 ccms_cst_t topo_cst; /* directory topology cst */
463 struct hammer2_pfsmount *pmp; /* PFS mount */
464 struct hammer2_inode *pip; /* parent inode */
466 hammer2_cluster_t cluster;
467 struct lockf advlock;
470 u_int refs; /* +vpref, +flushref */
471 uint8_t comp_heuristic;
476 typedef struct hammer2_inode hammer2_inode_t;
478 #define HAMMER2_INODE_MODIFIED 0x0001
479 #define HAMMER2_INODE_SROOT 0x0002 /* kmalloc special case */
480 #define HAMMER2_INODE_RENAME_INPROG 0x0004
481 #define HAMMER2_INODE_ONRBTREE 0x0008
482 #define HAMMER2_INODE_RESIZED 0x0010
483 #define HAMMER2_INODE_MTIME 0x0020
484 #define HAMMER2_INODE_UNLINKED 0x0040
486 int hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2);
487 RB_PROTOTYPE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
491 * inode-unlink side-structure
493 struct hammer2_inode_unlink {
494 TAILQ_ENTRY(hammer2_inode_unlink) entry;
497 TAILQ_HEAD(h2_unlk_list, hammer2_inode_unlink);
499 typedef struct hammer2_inode_unlink hammer2_inode_unlink_t;
502 * A hammer2 transaction and flush sequencing structure.
504 * This global structure is tied into hammer2_mount and is used
505 * to sequence modifying operations and flushes.
507 * (a) Any modifying operations with sync_tid >= flush_tid will stall until
508 * all modifying operating with sync_tid < flush_tid complete.
510 * The flush related to flush_tid stalls until all modifying operations
511 * with sync_tid < flush_tid complete.
513 * (b) Once unstalled, modifying operations with sync_tid > flush_tid are
514 * allowed to run. All modifications cause modify/duplicate operations
515 * to occur on the related chains. Note that most INDIRECT blocks will
516 * be unaffected because the modifications just overload the RBTREE
517 * structurally instead of actually modifying the indirect blocks.
519 * (c) The actual flush unstalls and RUNS CONCURRENTLY with (b), but only
520 * utilizes the chain structures with sync_tid <= flush_tid. The
521 * flush will modify related indirect blocks and inodes in-place
522 * (rather than duplicate) since the adjustments are compatible with
523 * (b)'s RBTREE overloading
525 * SPECIAL NOTE: Inode modifications have to also propagate along any
526 * modify/duplicate chains. File writes detect the flush
527 * and force out the conflicting buffer cache buffer(s)
528 * before reusing them.
530 * (d) Snapshots can be made instantly but must be flushed and disconnected
531 * from their duplicative source before they can be mounted. This is
532 * because while H2's on-media structure supports forks, its in-memory
533 * structure only supports very simple forking for background flushing
536 * TODO: Flush merging. When fsync() is called on multiple discrete files
537 * concurrently there is no reason to stall the second fsync.
538 * The final flush that reaches to root can cover both fsync()s.
540 * The chains typically terminate as they fly onto the disk. The flush
541 * ultimately reaches the volume header.
543 struct hammer2_trans {
544 TAILQ_ENTRY(hammer2_trans) entry;
545 struct hammer2_pfsmount *pmp;
546 hammer2_xid_t sync_xid;
547 hammer2_tid_t inode_tid; /* inode number assignment */
548 thread_t td; /* pointer */
551 uint8_t inodes_created;
555 typedef struct hammer2_trans hammer2_trans_t;
557 #define HAMMER2_TRANS_ISFLUSH 0x0001 /* formal flush */
558 #define HAMMER2_TRANS_CONCURRENT 0x0002 /* concurrent w/flush */
559 #define HAMMER2_TRANS_BUFCACHE 0x0004 /* from bioq strategy write */
560 #define HAMMER2_TRANS_NEWINODE 0x0008 /* caller allocating inode */
561 #define HAMMER2_TRANS_UNUSED0010 0x0010
562 #define HAMMER2_TRANS_PREFLUSH 0x0020 /* preflush state */
564 #define HAMMER2_FREEMAP_HEUR_NRADIX 4 /* pwr 2 PBUFRADIX-MINIORADIX */
565 #define HAMMER2_FREEMAP_HEUR_TYPES 8
566 #define HAMMER2_FREEMAP_HEUR (HAMMER2_FREEMAP_HEUR_NRADIX * \
567 HAMMER2_FREEMAP_HEUR_TYPES)
569 #define HAMMER2_CLUSTER_COPY_NOCHAINS 0x0001 /* do not copy or ref chains */
570 #define HAMMER2_CLUSTER_COPY_NOREF 0x0002 /* do not ref chains or cl */
573 * Transaction Rendezvous
575 TAILQ_HEAD(hammer2_trans_queue, hammer2_trans);
577 struct hammer2_trans_manage {
578 hammer2_xid_t flush_xid; /* last flush transaction */
579 hammer2_xid_t alloc_xid;
580 struct lock translk; /* lockmgr lock */
581 struct hammer2_trans_queue transq; /* modifying transactions */
582 int flushcnt; /* track flush trans */
585 typedef struct hammer2_trans_manage hammer2_trans_manage_t;
588 * Global (per device) mount structure for device (aka vp->v_mount->hmp)
590 struct hammer2_mount {
591 struct vnode *devvp; /* device vnode */
592 int ronly; /* read-only mount */
593 int pmp_count; /* PFS mounts backed by us */
594 TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
596 struct malloc_type *mchain;
599 kdmsg_iocom_t iocom; /* volume-level dmsg interface */
600 struct spinlock io_spin; /* iotree access */
601 struct hammer2_io_tree iotree;
603 hammer2_chain_t vchain; /* anchor chain (topology) */
604 hammer2_chain_t fchain; /* anchor chain (freemap) */
605 struct spinlock list_spin;
606 struct h2_flush_list flushq; /* flush seeds */
607 struct hammer2_pfsmount *spmp; /* super-root pmp for transactions */
608 struct lock vollk; /* lockmgr lock */
609 hammer2_off_t heur_freemap[HAMMER2_FREEMAP_HEUR];
610 int volhdrno; /* last volhdrno written */
611 hammer2_volume_data_t voldata;
612 hammer2_volume_data_t volsync; /* synchronized voldata */
615 typedef struct hammer2_mount hammer2_mount_t;
618 * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
619 * This has a 1:1 correspondence to struct mount (note that the
620 * hammer2_mount structure has a N:1 correspondence).
622 * This structure represents a cluster mount and not necessarily a
623 * PFS under a specific device mount (HMP). The distinction is important
624 * because the elements backing a cluster mount can change on the fly.
626 * Usually the first element under the cluster represents the original
627 * user-requested mount that bootstraps the whole mess. In significant
628 * setups the original is usually just a read-only media image (or
629 * representitive file) that simply contains a bootstrap volume header
630 * listing the configuration.
632 struct hammer2_pfsmount {
634 TAILQ_ENTRY(hammer2_pfsmount) mntentry; /* hammer2_pfslist */
637 hammer2_mount_t *spmp_hmp; /* (spmp only) */
638 hammer2_inode_t *iroot; /* PFS root inode */
639 hammer2_inode_t *ihidden; /* PFS hidden directory */
640 struct lock lock; /* PFS lock for certain ops */
641 hammer2_off_t inode_count; /* copy of inode_count */
642 ccms_domain_t ccms_dom;
643 struct netexport export; /* nfs export */
644 int ronly; /* read-only mount */
645 struct malloc_type *minode;
646 struct malloc_type *mmsg;
647 struct spinlock inum_spin; /* inumber lookup */
648 struct hammer2_inode_tree inum_tree; /* (not applicable to spmp) */
649 hammer2_tid_t alloc_tid;
650 hammer2_tid_t flush_tid;
651 hammer2_tid_t inode_tid;
653 uint32_t inmem_dirty_chains;
654 int count_lwinprog; /* logical write in prog */
655 struct spinlock list_spin;
656 struct h2_unlk_list unlinkq; /* last-close unlink */
657 thread_t wthread_td; /* write thread td */
658 struct bio_queue_head wthread_bioq; /* logical buffer bioq */
659 struct mtx wthread_mtx; /* interlock */
660 int wthread_destroy;/* termination sequencing */
663 typedef struct hammer2_pfsmount hammer2_pfsmount_t;
665 #define HAMMER2_DIRTYCHAIN_WAITING 0x80000000
666 #define HAMMER2_DIRTYCHAIN_MASK 0x7FFFFFFF
668 #define HAMMER2_LWINPROG_WAITING 0x80000000
669 #define HAMMER2_LWINPROG_MASK 0x7FFFFFFF
674 #define HAMMER2_BULK_ABORT 0x00000001
681 MALLOC_DECLARE(M_HAMMER2);
683 #define VTOI(vp) ((hammer2_inode_t *)(vp)->v_data)
684 #define ITOV(ip) ((ip)->vp)
687 * Currently locked chains retain the locked buffer cache buffer for
688 * indirect blocks, and indirect blocks can be one of two sizes. The
689 * device buffer has to match the case to avoid deadlocking recursive
690 * chains that might otherwise try to access different offsets within
691 * the same device buffer.
695 hammer2_devblkradix(int radix)
697 if (radix <= HAMMER2_LBUFRADIX) {
698 return (HAMMER2_LBUFRADIX);
700 return (HAMMER2_PBUFRADIX);
706 hammer2_devblksize(size_t bytes)
708 if (bytes <= HAMMER2_LBUFSIZE) {
709 return(HAMMER2_LBUFSIZE);
711 KKASSERT(bytes <= HAMMER2_PBUFSIZE &&
712 (bytes ^ (bytes - 1)) == ((bytes << 1) - 1));
713 return (HAMMER2_PBUFSIZE);
720 MPTOPMP(struct mount *mp)
722 return ((hammer2_pfsmount_t *)mp->mnt_data);
725 #define LOCKSTART int __nlocks = curthread->td_locks
726 #define LOCKENTER (++curthread->td_locks)
727 #define LOCKEXIT (--curthread->td_locks)
728 #define LOCKSTOP KKASSERT(curthread->td_locks == __nlocks)
730 extern struct vop_ops hammer2_vnode_vops;
731 extern struct vop_ops hammer2_spec_vops;
732 extern struct vop_ops hammer2_fifo_vops;
734 extern int hammer2_debug;
735 extern int hammer2_cluster_enable;
736 extern int hammer2_hardlink_enable;
737 extern int hammer2_flush_pipe;
738 extern int hammer2_synchronous_flush;
739 extern int hammer2_dio_count;
740 extern long hammer2_limit_dirty_chains;
741 extern long hammer2_iod_file_read;
742 extern long hammer2_iod_meta_read;
743 extern long hammer2_iod_indr_read;
744 extern long hammer2_iod_fmap_read;
745 extern long hammer2_iod_volu_read;
746 extern long hammer2_iod_file_write;
747 extern long hammer2_iod_meta_write;
748 extern long hammer2_iod_indr_write;
749 extern long hammer2_iod_fmap_write;
750 extern long hammer2_iod_volu_write;
751 extern long hammer2_ioa_file_read;
752 extern long hammer2_ioa_meta_read;
753 extern long hammer2_ioa_indr_read;
754 extern long hammer2_ioa_fmap_read;
755 extern long hammer2_ioa_volu_read;
756 extern long hammer2_ioa_file_write;
757 extern long hammer2_ioa_meta_write;
758 extern long hammer2_ioa_indr_write;
759 extern long hammer2_ioa_fmap_write;
760 extern long hammer2_ioa_volu_write;
762 extern struct objcache *cache_buffer_read;
763 extern struct objcache *cache_buffer_write;
766 extern int write_thread_wakeup;
768 extern mtx_t thread_protect;
773 #define hammer2_icrc32(buf, size) iscsi_crc32((buf), (size))
774 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc))
776 int hammer2_signal_check(time_t *timep);
777 hammer2_cluster_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
778 hammer2_cluster_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
779 void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *chain);
780 void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *chain);
781 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
782 void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate);
783 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
784 void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate);
786 void hammer2_mount_exlock(hammer2_mount_t *hmp);
787 void hammer2_mount_shlock(hammer2_mount_t *hmp);
788 void hammer2_mount_unlock(hammer2_mount_t *hmp);
790 int hammer2_get_dtype(const hammer2_inode_data_t *ipdata);
791 int hammer2_get_vtype(const hammer2_inode_data_t *ipdata);
792 u_int8_t hammer2_get_obj_type(enum vtype vtype);
793 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
794 u_int64_t hammer2_timespec_to_time(const struct timespec *ts);
795 u_int32_t hammer2_to_unix_xid(const uuid_t *uuid);
796 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
797 hammer2_xid_t hammer2_trans_newxid(hammer2_pfsmount_t *pmp);
798 void hammer2_trans_manage_init(void);
800 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
801 int hammer2_getradix(size_t bytes);
803 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
804 hammer2_key_t *lbasep, hammer2_key_t *leofp);
805 int hammer2_calc_physical(hammer2_inode_t *ip,
806 const hammer2_inode_data_t *ipdata,
807 hammer2_key_t lbase);
808 void hammer2_update_time(uint64_t *timep);
809 void hammer2_adjreadcounter(hammer2_blockref_t *bref, size_t bytes);
814 struct vnode *hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent,
816 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
817 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
818 hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp,
820 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
821 hammer2_inode_t *dip, hammer2_cluster_t *cluster);
822 void hammer2_inode_free(hammer2_inode_t *ip);
823 void hammer2_inode_ref(hammer2_inode_t *ip);
824 void hammer2_inode_drop(hammer2_inode_t *ip);
825 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
826 hammer2_cluster_t *cluster);
827 void hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp);
829 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
830 hammer2_inode_t *dip,
831 struct vattr *vap, struct ucred *cred,
832 const uint8_t *name, size_t name_len,
833 hammer2_cluster_t **clusterp, int *errorp);
834 int hammer2_inode_connect(hammer2_trans_t *trans,
835 hammer2_cluster_t **clusterp, int hlink,
836 hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
837 const uint8_t *name, size_t name_len,
839 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip,
840 hammer2_inode_t *tdip);
841 void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
842 hammer2_cluster_t *cparent);
843 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
844 const uint8_t *name, size_t name_len, int isdir,
845 int *hlinkp, struct nchandle *nch, int nlinks);
846 int hammer2_hardlink_consolidate(hammer2_trans_t *trans,
847 hammer2_inode_t *ip, hammer2_cluster_t **clusterp,
848 hammer2_inode_t *cdip, hammer2_cluster_t *cdcluster,
850 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip,
851 hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
852 int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t **cparentp,
853 hammer2_cluster_t *cluster);
854 int hammer2_parent_find(hammer2_cluster_t **cparentp,
855 hammer2_cluster_t *cluster);
856 void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp);
861 void hammer2_voldata_lock(hammer2_mount_t *hmp);
862 void hammer2_voldata_unlock(hammer2_mount_t *hmp);
863 void hammer2_voldata_modify(hammer2_mount_t *hmp);
864 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
865 hammer2_pfsmount_t *pmp,
866 hammer2_trans_t *trans,
867 hammer2_blockref_t *bref);
868 void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain);
869 void hammer2_chain_ref(hammer2_chain_t *chain);
870 void hammer2_chain_drop(hammer2_chain_t *chain);
871 int hammer2_chain_lock(hammer2_chain_t *chain, int how);
872 const hammer2_media_data_t *hammer2_chain_rdata(hammer2_chain_t *chain);
873 hammer2_media_data_t *hammer2_chain_wdata(hammer2_chain_t *chain);
875 void hammer2_cluster_load_async(hammer2_cluster_t *cluster,
876 void (*callback)(hammer2_iocb_t *iocb),
878 void hammer2_chain_moved(hammer2_chain_t *chain);
879 void hammer2_chain_modify(hammer2_trans_t *trans,
880 hammer2_chain_t *chain, int flags);
881 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
882 hammer2_chain_t *parent,
883 hammer2_chain_t *chain,
884 int nradix, int flags);
885 void hammer2_chain_unlock(hammer2_chain_t *chain);
886 void hammer2_chain_wait(hammer2_chain_t *chain);
887 hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent, int generation,
888 hammer2_blockref_t *bref);
889 hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags);
890 void hammer2_chain_lookup_done(hammer2_chain_t *parent);
891 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp,
892 hammer2_key_t *key_nextp,
893 hammer2_key_t key_beg, hammer2_key_t key_end,
894 int *cache_indexp, int flags, int *ddflagp);
895 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp,
896 hammer2_chain_t *chain,
897 hammer2_key_t *key_nextp,
898 hammer2_key_t key_beg, hammer2_key_t key_end,
899 int *cache_indexp, int flags);
900 hammer2_chain_t *hammer2_chain_scan(hammer2_chain_t *parent,
901 hammer2_chain_t *chain,
902 int *cache_indexp, int flags);
904 int hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
905 hammer2_chain_t **chainp,
906 hammer2_pfsmount_t *pmp,
907 hammer2_key_t key, int keybits,
908 int type, size_t bytes, int flags);
909 void hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
910 hammer2_chain_t **parentp,
911 hammer2_chain_t *chain, int flags);
912 int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **chainp,
913 hammer2_ioc_pfs_t *pfs);
914 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *parent,
915 hammer2_chain_t *chain, int flags);
916 void hammer2_chain_delete_duplicate(hammer2_trans_t *trans,
917 hammer2_chain_t **chainp, int flags);
918 void hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t *chain);
919 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain);
920 void hammer2_chain_setflush(hammer2_trans_t *trans, hammer2_chain_t *chain);
921 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
922 hammer2_blockref_t *base, int count);
924 void hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata);
925 int hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata);
928 void hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp);
929 void hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp);
930 void hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp);
932 void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
933 hammer2_blockref_t *base, int count,
934 int *cache_indexp, hammer2_chain_t *child);
935 void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain,
936 hammer2_blockref_t *base, int count,
937 int *cache_indexp, hammer2_chain_t *child);
942 void hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp,
944 void hammer2_trans_spmp(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp);
945 void hammer2_trans_done(hammer2_trans_t *trans);
950 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
951 int fflag, struct ucred *cred);
956 void hammer2_io_putblk(hammer2_io_t **diop);
957 void hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree);
958 char *hammer2_io_data(hammer2_io_t *dio, off_t lbase);
959 void hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize,
960 hammer2_iocb_t *iocb);
961 void hammer2_io_complete(hammer2_iocb_t *iocb);
962 void hammer2_io_callback(struct bio *bio);
963 void hammer2_iocb_wait(hammer2_iocb_t *iocb);
964 int hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
965 hammer2_io_t **diop);
966 int hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
967 hammer2_io_t **diop);
968 int hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
969 hammer2_io_t **diop);
970 int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
971 hammer2_io_t **diop);
972 void hammer2_io_bawrite(hammer2_io_t **diop);
973 void hammer2_io_bdwrite(hammer2_io_t **diop);
974 int hammer2_io_bwrite(hammer2_io_t **diop);
975 int hammer2_io_isdirty(hammer2_io_t *dio);
976 void hammer2_io_setdirty(hammer2_io_t *dio);
977 void hammer2_io_setinval(hammer2_io_t *dio, u_int bytes);
978 void hammer2_io_brelse(hammer2_io_t **diop);
979 void hammer2_io_bqrelse(hammer2_io_t **diop);
984 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg);
985 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
990 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
991 void hammer2_volconf_update(hammer2_mount_t *hmp, int index);
992 void hammer2_cluster_reconnect(hammer2_mount_t *hmp, struct file *fp);
993 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx);
994 void hammer2_bioq_sync(hammer2_pfsmount_t *pmp);
995 int hammer2_vfs_sync(struct mount *mp, int waitflags);
996 void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp);
997 void hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp);
998 void hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp);
1003 int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain,
1005 void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
1006 hammer2_blockref_t *bref, int how);
1011 int hammer2_cluster_need_resize(hammer2_cluster_t *cluster, int bytes);
1012 uint8_t hammer2_cluster_type(hammer2_cluster_t *cluster);
1013 const hammer2_media_data_t *hammer2_cluster_rdata(hammer2_cluster_t *cluster);
1014 hammer2_media_data_t *hammer2_cluster_wdata(hammer2_cluster_t *cluster);
1015 hammer2_cluster_t *hammer2_cluster_from_chain(hammer2_chain_t *chain);
1016 int hammer2_cluster_modified(hammer2_cluster_t *cluster);
1017 int hammer2_cluster_duplicated(hammer2_cluster_t *cluster);
1018 void hammer2_cluster_set_chainflags(hammer2_cluster_t *cluster, uint32_t flags);
1019 void hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref);
1020 void hammer2_cluster_setflush(hammer2_trans_t *trans,
1021 hammer2_cluster_t *cluster);
1022 void hammer2_cluster_setmethod_check(hammer2_trans_t *trans,
1023 hammer2_cluster_t *cluster, int check_algo);
1024 hammer2_cluster_t *hammer2_cluster_alloc(hammer2_pfsmount_t *pmp,
1025 hammer2_trans_t *trans,
1026 hammer2_blockref_t *bref);
1027 void hammer2_cluster_ref(hammer2_cluster_t *cluster);
1028 void hammer2_cluster_drop(hammer2_cluster_t *cluster);
1029 void hammer2_cluster_wait(hammer2_cluster_t *cluster);
1030 int hammer2_cluster_lock(hammer2_cluster_t *cluster, int how);
1031 void hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src);
1032 void hammer2_cluster_replace_locked(hammer2_cluster_t *dst,
1033 hammer2_cluster_t *src);
1034 hammer2_cluster_t *hammer2_cluster_copy(hammer2_cluster_t *ocluster,
1036 void hammer2_cluster_unlock(hammer2_cluster_t *cluster);
1037 void hammer2_cluster_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
1038 hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
1039 int nradix, int flags);
1040 hammer2_inode_data_t *hammer2_cluster_modify_ip(hammer2_trans_t *trans,
1041 hammer2_inode_t *ip, hammer2_cluster_t *cluster,
1043 void hammer2_cluster_modify(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
1045 void hammer2_cluster_modsync(hammer2_cluster_t *cluster);
1046 hammer2_cluster_t *hammer2_cluster_lookup_init(hammer2_cluster_t *cparent,
1048 void hammer2_cluster_lookup_done(hammer2_cluster_t *cparent);
1049 hammer2_cluster_t *hammer2_cluster_lookup(hammer2_cluster_t *cparent,
1050 hammer2_key_t *key_nextp,
1051 hammer2_key_t key_beg, hammer2_key_t key_end,
1052 int flags, int *ddflagp);
1053 hammer2_cluster_t *hammer2_cluster_next(hammer2_cluster_t *cparent,
1054 hammer2_cluster_t *cluster,
1055 hammer2_key_t *key_nextp,
1056 hammer2_key_t key_beg, hammer2_key_t key_end,
1058 hammer2_cluster_t *hammer2_cluster_scan(hammer2_cluster_t *cparent,
1059 hammer2_cluster_t *cluster, int flags);
1060 int hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
1061 hammer2_cluster_t **clusterp,
1062 hammer2_key_t key, int keybits,
1063 int type, size_t bytes, int flags);
1064 void hammer2_cluster_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
1065 hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
1067 void hammer2_cluster_delete(hammer2_trans_t *trans, hammer2_cluster_t *pcluster,
1068 hammer2_cluster_t *cluster, int flags);
1069 int hammer2_cluster_snapshot(hammer2_trans_t *trans,
1070 hammer2_cluster_t *ocluster, hammer2_ioc_pfs_t *pfs);
1071 hammer2_cluster_t *hammer2_cluster_parent(hammer2_cluster_t *cluster);
1073 int hammer2_bulk_scan(hammer2_trans_t *trans, hammer2_chain_t *parent,
1074 int (*func)(hammer2_chain_t *chain, void *info),
1076 int hammer2_bulkfree_pass(hammer2_mount_t *hmp,
1077 struct hammer2_ioc_bulkfree *bfi);
1079 #endif /* !_KERNEL */
1080 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */