2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * This header file contains structures used internally by the HAMMER2
38 * implementation. See hammer2_disk.h for on-disk structures.
41 #ifndef _VFS_HAMMER2_HAMMER2_H_
42 #define _VFS_HAMMER2_HAMMER2_H_
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
48 #include <sys/systm.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
54 #include <sys/mountctl.h>
57 #include <sys/thread.h>
58 #include <sys/globaldata.h>
59 #include <sys/lockf.h>
61 #include <sys/queue.h>
62 #include <sys/limits.h>
64 #include <sys/signal2.h>
68 #include "hammer2_disk.h"
69 #include "hammer2_mount.h"
70 #include "hammer2_ioctl.h"
71 #include "hammer2_ccms.h"
76 struct hammer2_pfsmount;
82 * The chain structure tracks blockref recursions all the way to the root
83 * volume. These consist of indirect blocks, inodes, and eventually the
84 * volume header itself.
86 * In situations where a duplicate is needed to represent different snapshots
87 * or flush points a new chain will be allocated but associated with the
88 * same shared chain_core. The RBTREE is contained in the shared chain_core
89 * and entries in the RBTREE are versioned.
91 * Duplication can occur whenever a chain must be modified. Note that
92 * a deletion is not considered a modification.
94 * (a) General modifications at data leafs
95 * (b) When a chain is resized
96 * (c) When a chain's blockref array is updated
97 * (d) When a chain is renamed
98 * (e) When a chain is moved (when an indirect block is split)
102 * (1) Fully coherent snapshots can be taken without requiring
103 * a pre-flush, resulting in extremely fast (sub-millisecond)
106 * (2) Multiple synchronization points can be in-flight at the same
107 * time, representing multiple snapshots or flushes.
109 * (3) The algorithms needed to keep track of everything are actually
112 * Special Considerations:
114 * A chain is ref-counted on a per-chain basis, but the chain's lock
115 * is associated with the shared chain_core and is not per-chain.
117 * Each chain is representative of a filesystem topology. Even
118 * though the shared chain_core's are effectively multi-homed, the
119 * chain structure is not.
121 * chain->parent is a stable pointer and can be iterated without locking
122 * as long as either the chain or *any* deep child under the chain
125 RB_HEAD(hammer2_chain_tree, hammer2_chain);
126 TAILQ_HEAD(flush_deferral_list, hammer2_chain);
128 struct hammer2_chain_core {
131 struct hammer2_chain_tree rbtree;
134 typedef struct hammer2_chain_core hammer2_chain_core_t;
136 struct hammer2_chain {
137 RB_ENTRY(hammer2_chain) rbnode;
138 hammer2_blockref_t bref;
139 hammer2_chain_core_t *core;
140 struct hammer2_chain *parent;
141 struct hammer2_state *state; /* if active cache msg */
142 struct hammer2_mount *hmp;
143 struct hammer2_chain *duplink; /* duplication link */
145 hammer2_tid_t modify_tid; /* snapshot/flush filter */
146 hammer2_tid_t delete_tid;
147 struct buf *bp; /* physical data buffer */
148 u_int bytes; /* physical data size */
149 int index; /* blockref index in parent */
152 hammer2_media_data_t *data; /* data pointer shortcut */
153 TAILQ_ENTRY(hammer2_chain) flush_node; /* flush deferral list */
156 typedef struct hammer2_chain hammer2_chain_t;
158 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
159 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
161 #define HAMMER2_CHAIN_MODIFIED 0x00000001 /* dirty chain data */
162 #define HAMMER2_CHAIN_ALLOCATED 0x00000002 /* kmalloc'd chain */
163 #define HAMMER2_CHAIN_DIRTYBP 0x00000004 /* dirty on unlock */
164 #define HAMMER2_CHAIN_SUBMODIFIED 0x00000008 /* 1+ subs modified */
165 #define HAMMER2_CHAIN_DELETED 0x00000010 /* deleted chain */
166 #define HAMMER2_CHAIN_INITIAL 0x00000020 /* initial create */
167 #define HAMMER2_CHAIN_FLUSHED 0x00000040 /* flush on unlock */
168 #define HAMMER2_CHAIN_MOVED 0x00000080 /* bref changed */
169 #define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */
170 #define HAMMER2_CHAIN_DEFERRED 0x00000200 /* on a deferral list */
171 #define HAMMER2_CHAIN_DESTROYED 0x00000400 /* destroying inode */
172 #define HAMMER2_CHAIN_VOLUMESYNC 0x00000800 /* needs volume sync */
173 #define HAMMER2_CHAIN_UNUSED1000 0x00001000
174 #define HAMMER2_CHAIN_MOUNTED 0x00002000 /* PFS is mounted */
175 #define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */
178 * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
180 #define HAMMER2_LOOKUP_NOLOCK 0x00000001 /* ref only */
181 #define HAMMER2_LOOKUP_NODATA 0x00000002 /* data left NULL */
182 #define HAMMER2_LOOKUP_SHARED 0x00000100
185 * Flags passed to hammer2_chain_modify() and hammer2_chain_resize()
187 * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT
188 * blocks in the INITIAL-create state.
190 #define HAMMER2_MODIFY_NOSUB 0x00000001 /* do not set SUBMOD */
191 #define HAMMER2_MODIFY_OPTDATA 0x00000002 /* data can be NULL */
192 #define HAMMER2_MODIFY_NO_MODIFY_TID 0x00000004
195 * Flags passed to hammer2_chain_lock()
197 #define HAMMER2_RESOLVE_NEVER 1
198 #define HAMMER2_RESOLVE_MAYBE 2
199 #define HAMMER2_RESOLVE_ALWAYS 3
200 #define HAMMER2_RESOLVE_MASK 0x0F
202 #define HAMMER2_RESOLVE_SHARED 0x10
203 #define HAMMER2_RESOLVE_NOREF 0x20
206 * Cluster different types of storage together for allocations
208 #define HAMMER2_FREECACHE_INODE 0
209 #define HAMMER2_FREECACHE_INDIR 1
210 #define HAMMER2_FREECACHE_DATA 2
211 #define HAMMER2_FREECACHE_UNUSED3 3
212 #define HAMMER2_FREECACHE_TYPES 4
215 * BMAP read-ahead maximum parameters
217 #define HAMMER2_BMAP_COUNT 16 /* max bmap read-ahead */
218 #define HAMMER2_BMAP_BYTES (HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT)
223 #define HAMMER2_FLUSH_DEPTH_LIMIT 40 /* stack recursion limit */
226 * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
228 * There is an in-memory representation of all on-media data structure.
230 * When accessed read-only the data will be mapped to the related buffer
233 * When accessed read-write (marked modified) a kmalloc()'d copy of the
234 * is created which can then be modified. The copy is destroyed when a
235 * filesystem block is allocated to replace it.
237 * Active inodes (those with vnodes attached) will maintain the kmalloc()'d
238 * copy for both the read-only and the read-write case. The combination of
239 * (bp) and (data) determines whether (data) was allocated or not.
241 * The in-memory representation may remain cached (for example in order to
242 * placemark clustering locks) even after the related data has been
249 * NOTE: The inode's attribute CST which is also used to lock the inode
250 * is embedded in the chain (chain.cst) and aliased w/ attr_cst.
252 struct hammer2_inode {
253 ccms_cst_t topo_cst; /* directory topology cst */
254 struct hammer2_mount *hmp; /* Global mount */
255 struct hammer2_pfsmount *pmp; /* PFS mount */
256 struct hammer2_inode *pip; /* parent inode */
258 hammer2_chain_t *chain; /* NOTE: rehomed on rename */
259 struct lockf advlock;
261 u_int refs; /* +vpref, +flushref */
264 typedef struct hammer2_inode hammer2_inode_t;
266 #define HAMMER2_INODE_MODIFIED 0x0001
267 #define HAMMER2_INODE_DIRTYEMBED 0x0002
268 #define HAMMER2_INODE_RENAME_INPROG 0x0004
271 * A hammer2 transaction placeholder.
273 * This structure is required for all modifying operations, including
274 * flushes. It holds the transaction id allocated for the modifying
275 * operation and is also used to interlock flushes and snapshots.
277 struct hammer2_trans {
278 struct hammer2_mount *hmp;
279 hammer2_tid_t sync_tid;
280 uint8_t inodes_created;
284 typedef struct hammer2_trans hammer2_trans_t;
289 struct hammer2_freecache {
291 hammer2_off_t single;
294 typedef struct hammer2_freecache hammer2_freecache_t;
297 * Global (per device) mount structure for device (aka vp->v_mount->hmp)
299 struct hammer2_mount {
300 struct vnode *devvp; /* device vnode */
301 int ronly; /* read-only mount */
302 int pmp_count; /* PFS mounts backed by us */
303 TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
305 struct malloc_type *minode;
309 struct malloc_type *mchain;
312 hammer2_chain_t vchain; /* anchor chain */
313 hammer2_chain_t *schain; /* super-root */
314 hammer2_inode_t *sroot; /* super-root inode */
315 struct lock alloclk; /* lockmgr lock */
316 struct lock voldatalk; /* lockmgr lock */
318 int volhdrno; /* last volhdrno written */
319 hammer2_volume_data_t voldata;
320 hammer2_volume_data_t volsync; /* synchronized voldata */
321 hammer2_freecache_t freecache[HAMMER2_FREECACHE_TYPES]
322 [HAMMER2_MAX_RADIX+1];
325 typedef struct hammer2_mount hammer2_mount_t;
328 * Per-PFS mount structure for device (aka vp->v_mount)
330 struct hammer2_pfsmount {
331 struct mount *mp; /* kernel mount */
332 struct hammer2_mount *hmp; /* device global mount */
333 hammer2_chain_t *rchain; /* PFS root chain */
334 hammer2_inode_t *iroot; /* PFS root inode */
335 hammer2_off_t inode_count; /* copy of inode_count */
336 ccms_domain_t ccms_dom;
337 struct netexport export; /* nfs export */
338 int ronly; /* read-only mount */
339 struct malloc_type *mmsg;
343 typedef struct hammer2_pfsmount hammer2_pfsmount_t;
347 MALLOC_DECLARE(M_HAMMER2);
349 #define VTOI(vp) ((hammer2_inode_t *)(vp)->v_data)
350 #define ITOV(ip) ((ip)->vp)
354 MPTOPMP(struct mount *mp)
356 return ((hammer2_pfsmount_t *)mp->mnt_data);
361 MPTOHMP(struct mount *mp)
363 return (((hammer2_pfsmount_t *)mp->mnt_data)->hmp);
366 extern struct vop_ops hammer2_vnode_vops;
367 extern struct vop_ops hammer2_spec_vops;
368 extern struct vop_ops hammer2_fifo_vops;
370 extern int hammer2_debug;
371 extern int hammer2_cluster_enable;
372 extern int hammer2_hardlink_enable;
373 extern long hammer2_iod_file_read;
374 extern long hammer2_iod_meta_read;
375 extern long hammer2_iod_indr_read;
376 extern long hammer2_iod_file_write;
377 extern long hammer2_iod_meta_write;
378 extern long hammer2_iod_indr_write;
379 extern long hammer2_iod_fmap_write;
380 extern long hammer2_iod_volu_write;
381 extern long hammer2_ioa_file_read;
382 extern long hammer2_ioa_meta_read;
383 extern long hammer2_ioa_indr_read;
384 extern long hammer2_ioa_file_write;
385 extern long hammer2_ioa_meta_write;
386 extern long hammer2_ioa_indr_write;
387 extern long hammer2_ioa_fmap_write;
388 extern long hammer2_ioa_volu_write;
393 #define hammer2_icrc32(buf, size) iscsi_crc32((buf), (size))
394 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc))
396 void hammer2_inode_lock_ex(hammer2_inode_t *ip);
397 void hammer2_inode_lock_sh(hammer2_inode_t *ip);
398 void hammer2_inode_unlock_ex(hammer2_inode_t *ip);
399 void hammer2_inode_unlock_sh(hammer2_inode_t *ip);
400 void hammer2_voldata_lock(hammer2_mount_t *hmp);
401 void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify);
402 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
403 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
404 void hammer2_inode_lock_restore(hammer2_inode_t *ip, ccms_state_t ostate);
406 void hammer2_mount_exlock(hammer2_mount_t *hmp);
407 void hammer2_mount_shlock(hammer2_mount_t *hmp);
408 void hammer2_mount_unlock(hammer2_mount_t *hmp);
410 int hammer2_get_dtype(hammer2_chain_t *chain);
411 int hammer2_get_vtype(hammer2_chain_t *chain);
412 u_int8_t hammer2_get_obj_type(enum vtype vtype);
413 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
414 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
415 u_int32_t hammer2_to_unix_xid(uuid_t *uuid);
416 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
418 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
419 int hammer2_allocsize(size_t bytes);
421 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
422 hammer2_key_t *lbasep, hammer2_key_t *leofp);
423 void hammer2_update_time(uint64_t *timep);
428 struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp);
430 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
431 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
432 hammer2_inode_t *hammer2_inode_get(hammer2_mount_t *hmp,
433 hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
434 hammer2_chain_t *chain);
435 void hammer2_inode_put(hammer2_inode_t *ip);
436 void hammer2_inode_free(hammer2_inode_t *ip);
437 void hammer2_inode_ref(hammer2_inode_t *ip);
438 void hammer2_inode_drop(hammer2_inode_t *ip);
439 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
440 hammer2_chain_t *chain);
441 int hammer2_inode_calc_alloc(hammer2_key_t filesize);
443 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
444 hammer2_inode_t *dip,
445 struct vattr *vap, struct ucred *cred,
446 const uint8_t *name, size_t name_len,
448 hammer2_chain_t *hammer2_inode_duplicate(hammer2_trans_t *trans,
449 hammer2_chain_t *ochain,
450 hammer2_inode_t *dip, int *errorp);
451 int hammer2_inode_connect(hammer2_trans_t *trans,
452 hammer2_inode_t *dip,
453 hammer2_chain_t **chainp,
454 const uint8_t *name, size_t name_len);
455 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip,
456 hammer2_inode_t *tdip);
458 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
459 const uint8_t *name, size_t name_len, int isdir);
460 int hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip,
461 hammer2_chain_t **chainp,
462 hammer2_inode_t *tdip, int linkcnt);
463 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip,
464 hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
465 int hammer2_hardlink_find(hammer2_inode_t *dip,
466 hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
471 void hammer2_modify_volume(hammer2_mount_t *hmp);
472 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
473 hammer2_blockref_t *bref);
474 void hammer2_chain_core_alloc(hammer2_chain_t *chain,
475 hammer2_chain_core_t *core);
476 void hammer2_chain_free(hammer2_chain_t *chain);
477 void hammer2_chain_ref(hammer2_chain_t *chain);
478 void hammer2_chain_drop(hammer2_chain_t *chain);
479 int hammer2_chain_lock(hammer2_chain_t *chain, int how);
480 void hammer2_chain_moved(hammer2_chain_t *chain);
481 void hammer2_chain_modify(hammer2_trans_t *trans,
482 hammer2_chain_t *chain, int flags);
483 hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans,
484 hammer2_inode_t *ip, int flags);
485 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
487 hammer2_chain_t *parent,
488 hammer2_chain_t **chainp,
489 int nradix, int flags);
490 void hammer2_chain_unlock(hammer2_chain_t *chain);
491 void hammer2_chain_wait(hammer2_chain_t *chain);
492 hammer2_chain_t *hammer2_chain_find(hammer2_chain_t *parent, int index);
493 hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent, int index,
495 hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags);
496 void hammer2_chain_lookup_done(hammer2_chain_t *parent);
497 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp,
498 hammer2_key_t key_beg, hammer2_key_t key_end,
500 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp,
501 hammer2_chain_t *chain,
502 hammer2_key_t key_beg, hammer2_key_t key_end,
504 int hammer2_chain_create(hammer2_trans_t *trans,
505 hammer2_chain_t *parent,
506 hammer2_chain_t **chainp,
507 hammer2_key_t key, int keybits,
508 int type, size_t bytes);
509 void hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent,
511 hammer2_chain_t **chainp,
512 hammer2_blockref_t *bref);
513 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *parent,
514 hammer2_chain_t *chain);
515 void hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain);
516 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain);
517 void hammer2_chain_parent_setsubmod(hammer2_chain_t *chain);
522 void hammer2_trans_init(hammer2_trans_t *trans, hammer2_mount_t *hmp);
523 void hammer2_trans_done(hammer2_trans_t *trans);
528 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
529 int fflag, struct ucred *cred);
534 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg);
535 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
540 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
541 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index);
542 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp);
547 hammer2_off_t hammer2_freemap_alloc(hammer2_mount_t *hmp,
548 int type, size_t bytes);
549 void hammer2_freemap_free(hammer2_mount_t *hmp, hammer2_off_t data_off,
552 #endif /* !_KERNEL */
553 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */