2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.12 2007/11/30 00:16:56 dillon Exp $
37 * This header file contains structures used internally by the HAMMERFS
38 * implementation. See hammer_disk.h for on-disk structures.
41 #include <sys/param.h>
42 #include <sys/types.h>
43 #include <sys/kernel.h>
44 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/globaldata.h>
50 #include <sys/lockf.h>
52 #include <sys/queue.h>
53 #include <sys/globaldata.h>
56 #include "hammer_alist.h"
57 #include "hammer_disk.h"
58 #include "hammer_mount.h"
60 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
62 MALLOC_DECLARE(M_HAMMER);
67 * Key structure used for custom RB tree inode lookups. This prototypes
68 * the function hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).
70 typedef struct hammer_inode_info {
71 u_int64_t obj_id; /* (key) object identifier */
72 hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
73 } *hammer_inode_info_t;
76 * HAMMER Transaction tracking
78 struct hammer_transaction {
79 struct hammer_mount *hmp;
81 struct hammer_volume *rootvol;
84 typedef struct hammer_transaction *hammer_transaction_t;
93 struct thread *locktd;
97 hammer_islocked(struct hammer_lock *lock)
99 return(lock->lockcount != 0);
103 hammer_islastref(struct hammer_lock *lock)
105 return(lock->refs == 1);
109 * Structure used to represent an inode in-memory.
111 * The record and data associated with an inode may be out of sync with
112 * the disk (xDIRTY flags), or not even on the disk at all (ONDISK flag
115 * An inode may also hold a cache of unsynchronized records, used for
116 * database and directories only. Unsynchronized regular file data is
117 * stored in the buffer cache.
119 * NOTE: A file which is created and destroyed within the initial
120 * synchronization period can wind up not doing any disk I/O at all.
122 * Finally, an inode may cache numerous disk-referencing B-Tree cursors.
124 struct hammer_ino_rb_tree;
126 RB_HEAD(hammer_ino_rb_tree, hammer_inode);
127 RB_PROTOTYPEX(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
128 hammer_ino_rb_compare, hammer_inode_info_t);
130 struct hammer_rec_rb_tree;
131 struct hammer_record;
132 RB_HEAD(hammer_rec_rb_tree, hammer_record);
133 RB_PROTOTYPEX(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
134 hammer_rec_rb_compare, hammer_base_elm_t);
136 TAILQ_HEAD(hammer_node_list, hammer_node);
138 struct hammer_inode {
139 RB_ENTRY(hammer_inode) rb_node;
140 u_int64_t obj_id; /* (key) object identifier */
141 hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
142 hammer_tid_t last_tid; /* last modified tid (for fsync) */
143 struct hammer_mount *hmp;
146 struct lockf advlock;
147 struct hammer_lock lock;
148 struct hammer_inode_record ino_rec;
149 struct hammer_inode_data ino_data;
150 struct hammer_rec_rb_tree rec_tree; /* red-black record tree */
151 struct hammer_node *cache; /* cached B-Tree node shortcut */
154 typedef struct hammer_inode *hammer_inode_t;
156 #define VTOI(vp) ((struct hammer_inode *)(vp)->v_data)
158 #define HAMMER_INODE_DDIRTY 0x0001 /* in-memory ino_data is dirty */
159 #define HAMMER_INODE_RDIRTY 0x0002 /* in-memory ino_rec is dirty */
160 #define HAMMER_INODE_ITIMES 0x0004 /* in-memory mtime/atime modified */
161 #define HAMMER_INODE_ONDISK 0x0010 /* inode is on-disk (else not yet) */
162 #define HAMMER_INODE_FLUSH 0x0020 /* flush on last ref */
163 #define HAMMER_INODE_TID 0x0040 /* update in-memory last_tid */
164 #define HAMMER_INODE_DELETED 0x0080 /* inode ready for deletion */
165 #define HAMMER_INODE_DELONDISK 0x0100 /* delete synchronized to disk */
167 #define HAMMER_MAX_INODE_CURSORS 4
170 * Structure used to represent an unsynchronized record in-memory. This
171 * structure is orgranized in a per-inode RB-tree. If the inode is not
172 * on disk then neither are any records and the in-memory record tree
173 * represents the entire contents of the inode. If the inode is on disk
174 * then the on-disk B-Tree is scanned in parallel with the in-memory
175 * RB-Tree to synthesize the current state of the file.
177 * Only current (delete_tid == 0) unsynchronized records are kept in-memory.
179 struct hammer_record {
180 RB_ENTRY(hammer_record) rb_node;
181 struct hammer_lock lock;
182 struct hammer_inode *ip;
183 union hammer_record_ondisk rec;
184 union hammer_data_ondisk *data;
188 typedef struct hammer_record *hammer_record_t;
190 #define HAMMER_RECF_ALLOCDATA 0x0001
191 #define HAMMER_RECF_ONRBTREE 0x0002
192 #define HAMMER_RECF_DELETED 0x0004
193 #define HAMMER_RECF_EMBEDDED_DATA 0x0008
196 * Structures used to internally represent a volume and a cluster
198 struct hammer_volume;
199 struct hammer_cluster;
200 struct hammer_supercl;
201 struct hammer_buffer;
203 RB_HEAD(hammer_vol_rb_tree, hammer_volume);
204 RB_HEAD(hammer_clu_rb_tree, hammer_cluster);
205 RB_HEAD(hammer_scl_rb_tree, hammer_supercl);
206 RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
207 RB_HEAD(hammer_nod_rb_tree, hammer_node);
209 RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
210 hammer_vol_rb_compare, int32_t);
211 RB_PROTOTYPE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
212 hammer_clu_rb_compare, int32_t);
213 RB_PROTOTYPE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
214 hammer_scl_rb_compare, int32_t);
215 RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
216 hammer_buf_rb_compare, int32_t);
217 RB_PROTOTYPE2(hammer_nod_rb_tree, hammer_node, rb_node,
218 hammer_nod_rb_compare, int32_t);
221 * IO management - embedded at the head of various in-memory structures
223 enum hammer_io_type { HAMMER_STRUCTURE_VOLUME,
224 HAMMER_STRUCTURE_SUPERCL,
225 HAMMER_STRUCTURE_CLUSTER,
226 HAMMER_STRUCTURE_BUFFER };
228 union hammer_io_structure;
231 LIST_ENTRY(worklist) node;
235 struct worklist worklist;
236 struct hammer_lock lock;
237 enum hammer_io_type type;
240 u_int modified : 1; /* bp's data was modified */
241 u_int released : 1; /* bp released (w/ B_LOCKED set) */
244 typedef struct hammer_io *hammer_io_t;
247 * In-memory volume representing on-disk buffer
249 struct hammer_volume {
251 RB_ENTRY(hammer_volume) rb_node;
252 struct hammer_clu_rb_tree rb_clus_root;
253 struct hammer_scl_rb_tree rb_scls_root;
254 struct hammer_volume_ondisk *ondisk;
255 struct hammer_alist_live alist;
258 int64_t cluster_base; /* base offset of cluster 0 */
261 struct hammer_mount *hmp;
265 typedef struct hammer_volume *hammer_volume_t;
268 * In-memory super-cluster representing on-disk buffer
270 struct hammer_supercl {
272 RB_ENTRY(hammer_supercl) rb_node;
273 struct hammer_supercl_ondisk *ondisk;
274 struct hammer_volume *volume;
275 struct hammer_alist_live alist;
279 typedef struct hammer_supercl *hammer_supercl_t;
282 * In-memory cluster representing on-disk buffer
284 * The cluster's indexing range is cached in hammer_cluster, separate
285 * from the ondisk info in order to allow cursors to point to it.
287 struct hammer_cluster {
289 RB_ENTRY(hammer_cluster) rb_node;
290 struct hammer_buf_rb_tree rb_bufs_root;
291 struct hammer_cluster_ondisk *ondisk;
292 struct hammer_volume *volume;
293 struct hammer_alist_live alist_master;
294 struct hammer_alist_live alist_btree;
295 struct hammer_alist_live alist_record;
296 struct hammer_alist_live alist_mdata;
297 struct hammer_nod_rb_tree rb_nods_root; /* cursors in cluster */
298 struct hammer_base_elm clu_btree_beg; /* copy of on-disk info */
299 struct hammer_base_elm clu_btree_end; /* copy of on-disk info */
303 typedef struct hammer_cluster *hammer_cluster_t;
306 * In-memory buffer (other then volume, super-cluster, or cluster),
307 * representing an on-disk buffer.
309 struct hammer_buffer {
311 RB_ENTRY(hammer_buffer) rb_node;
312 hammer_fsbuf_ondisk_t ondisk;
313 struct hammer_volume *volume;
314 struct hammer_cluster *cluster;
317 struct hammer_alist_live alist;
318 struct hammer_node_list clist;
319 struct hammer_node *save_scan;
322 typedef struct hammer_buffer *hammer_buffer_t;
325 * In-memory B-Tree node, representing an on-disk B-Tree node.
327 * This is a hang-on structure which is backed by a hammer_buffer,
328 * indexed by a hammer_cluster, and used for fine-grained locking of
329 * B-Tree nodes in order to properly control lock ordering. A hammer_buffer
330 * can contain multiple nodes representing wildly disassociated portions
331 * of the B-Tree so locking cannot be done on a buffer-by-buffer basis.
333 * This structure uses a cluster-relative index to reduce the number
334 * of layers required to access it, and also because all on-disk B-Tree
335 * references are cluster-relative offsets.
338 struct hammer_lock lock; /* node-by-node lock */
339 TAILQ_ENTRY(hammer_node) entry; /* per-buffer linkage */
340 RB_ENTRY(hammer_node) rb_node; /* per-cluster linkage */
341 int32_t node_offset; /* cluster-rel offset */
342 struct hammer_cluster *cluster;
343 struct hammer_buffer *buffer; /* backing buffer */
344 hammer_node_ondisk_t ondisk; /* ptr to on-disk structure */
345 struct hammer_node **cache1; /* passive cache(s) */
346 struct hammer_node **cache2;
349 typedef struct hammer_node *hammer_node_t;
352 * Common I/O management structure - embedded in in-memory structures
353 * which are backed by filesystem buffers.
355 union hammer_io_structure {
357 struct hammer_volume volume;
358 struct hammer_supercl supercl;
359 struct hammer_cluster cluster;
360 struct hammer_buffer buffer;
363 #define HAMFS_CLUSTER_DIRTY 0x0001
365 #include "hammer_cursor.h"
368 * Internal hammer mount data structure
370 struct hammer_mount {
372 /*struct vnode *rootvp;*/
373 struct hammer_ino_rb_tree rb_inos_root;
374 struct hammer_vol_rb_tree rb_vols_root;
375 struct hammer_volume *rootvol;
376 struct hammer_cluster *rootcl;
377 char *zbuf; /* HAMMER_BUFSIZE bytes worth of all-zeros */
384 u_int32_t namekey_iterator;
387 typedef struct hammer_mount *hammer_mount_t;
393 extern struct vop_ops hammer_vnode_vops;
394 extern struct hammer_alist_config Buf_alist_config;
395 extern struct hammer_alist_config Vol_normal_alist_config;
396 extern struct hammer_alist_config Vol_super_alist_config;
397 extern struct hammer_alist_config Supercl_alist_config;
398 extern struct hammer_alist_config Clu_master_alist_config;
399 extern struct hammer_alist_config Clu_slave_alist_config;
400 extern struct bio_ops hammer_bioops;
402 int hammer_vop_inactive(struct vop_inactive_args *);
403 int hammer_vop_reclaim(struct vop_reclaim_args *);
404 int hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
405 int hammer_get_vnode(struct hammer_inode *ip, int lktype,
407 struct hammer_inode *hammer_get_inode(hammer_mount_t hmp,
408 u_int64_t obj_id, hammer_tid_t asof, int *errorp);
409 int hammer_update_inode(hammer_inode_t ip);
410 void hammer_put_inode(struct hammer_inode *ip);
411 void hammer_put_inode_ref(struct hammer_inode *ip);
413 int hammer_unload_inode(hammer_inode_t ip, void *data __unused);
414 int hammer_unload_volume(hammer_volume_t volume, void *data __unused);
415 int hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused);
416 int hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused);
417 int hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused);
418 int hammer_install_volume(hammer_mount_t hmp, const char *volname);
420 int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip);
421 int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip);
422 int hammer_ip_next(hammer_cursor_t cursor);
423 int hammer_ip_resolve_data(hammer_cursor_t cursor);
424 int hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid);
427 hammer_alloc_mem_record(hammer_inode_t ip);
428 void hammer_rel_mem_record(struct hammer_record **recordp);
429 void hammer_free_mem_record(hammer_record_t record);
431 int hammer_cursor_up(hammer_cursor_t cursor, int nonblock);
432 int hammer_cursor_toroot(hammer_cursor_t cursor);
433 int hammer_cursor_down(hammer_cursor_t cursor);
435 void hammer_lock_ex(struct hammer_lock *lock);
436 int hammer_lock_ex_try(struct hammer_lock *lock);
437 void hammer_lock_sh(struct hammer_lock *lock);
438 void hammer_unlock(struct hammer_lock *lock);
439 void hammer_ref(struct hammer_lock *lock);
440 void hammer_unref(struct hammer_lock *lock);
441 void hammer_downgrade(struct hammer_lock *lock);
443 u_int32_t hammer_to_unix_xid(uuid_t *uuid);
444 void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
445 void hammer_to_timespec(hammer_tid_t tid, struct timespec *ts);
446 hammer_tid_t hammer_timespec_to_transid(struct timespec *ts);
447 hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans);
448 hammer_tid_t hammer_now_tid(void);
449 hammer_tid_t hammer_alloc_recid(hammer_transaction_t trans);
451 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
452 int hammer_get_dtype(u_int8_t obj_type);
453 u_int8_t hammer_get_obj_type(enum vtype vtype);
454 int64_t hammer_directory_namekey(void *name, int len);
456 int hammer_init_cursor_hmp(hammer_cursor_t cursor, hammer_mount_t hmp);
457 int hammer_init_cursor_ip(hammer_cursor_t cursor, hammer_inode_t ip);
459 void hammer_done_cursor(hammer_cursor_t cursor);
460 void hammer_mem_done(hammer_cursor_t cursor);
462 int hammer_btree_lookup(hammer_cursor_t cursor);
463 int hammer_btree_extract(hammer_cursor_t cursor, int flags);
464 int hammer_btree_iterate(hammer_cursor_t cursor);
465 int hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm);
466 int hammer_btree_delete(hammer_cursor_t cursor);
467 int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
468 int hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2);
469 void hammer_print_btree_node(hammer_node_ondisk_t ondisk);
470 void hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i);
472 void *hammer_bread(struct hammer_cluster *cluster, int32_t cloff,
473 u_int64_t buf_type, int *errorp,
474 struct hammer_buffer **bufferp);
476 hammer_volume_t hammer_get_root_volume(hammer_mount_t hmp, int *errorp);
477 hammer_cluster_t hammer_get_root_cluster(hammer_mount_t hmp, int *errorp);
479 hammer_volume_t hammer_get_volume(hammer_mount_t hmp,
480 int32_t vol_no, int *errorp);
481 hammer_supercl_t hammer_get_supercl(hammer_volume_t volume,
482 int32_t scl_no, int *errorp, int isnew);
483 hammer_cluster_t hammer_get_cluster(hammer_volume_t volume,
484 int32_t clu_no, int *errorp, int isnew);
485 hammer_buffer_t hammer_get_buffer(hammer_cluster_t cluster,
486 int32_t buf_no, u_int64_t buf_type, int *errorp);
488 int hammer_ref_cluster(hammer_cluster_t cluster);
489 int hammer_ref_buffer(hammer_buffer_t buffer);
490 void hammer_flush_buffer_nodes(hammer_buffer_t buffer);
493 void hammer_rel_volume(hammer_volume_t volume, int flush);
494 void hammer_rel_supercl(hammer_supercl_t supercl, int flush);
495 void hammer_rel_cluster(hammer_cluster_t cluster, int flush);
496 void hammer_rel_buffer(hammer_buffer_t buffer, int flush);
498 hammer_node_t hammer_get_node(hammer_cluster_t cluster,
499 int32_t node_offset, int *errorp);
500 int hammer_ref_node(hammer_node_t node);
501 void hammer_rel_node(hammer_node_t node);
502 void hammer_cache_node(hammer_node_t node,
503 struct hammer_node **cache);
504 void hammer_uncache_node(struct hammer_node **cache);
505 void hammer_flush_node(hammer_node_t node);
507 void hammer_dup_buffer(struct hammer_buffer **bufferp,
508 struct hammer_buffer *buffer);
509 void hammer_dup_cluster(struct hammer_cluster **clusterp,
510 struct hammer_cluster *cluster);
511 hammer_node_t hammer_alloc_btree(struct hammer_cluster *cluster, int *errorp);
512 void *hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes,
513 int *errorp, struct hammer_buffer **bufferp);
514 void *hammer_alloc_record(struct hammer_cluster *cluster,
515 int *errorp, struct hammer_buffer **bufferp);
516 void hammer_free_data_ptr(struct hammer_buffer *buffer,
517 void *data, int bytes);
518 void hammer_free_record_ptr(struct hammer_buffer *buffer,
519 union hammer_record_ondisk *rec);
520 void hammer_free_btree(struct hammer_cluster *cluster, int32_t bclu_offset);
521 void hammer_free_data(struct hammer_cluster *cluster, int32_t bclu_offset,
523 void hammer_free_record(struct hammer_cluster *cluster, int32_t bclu_offset);
525 void hammer_put_volume(struct hammer_volume *volume, int flush);
526 void hammer_put_supercl(struct hammer_supercl *supercl, int flush);
527 void hammer_put_cluster(struct hammer_cluster *cluster, int flush);
528 void hammer_put_buffer(struct hammer_buffer *buffer, int flush);
530 void hammer_init_alist_config(void);
532 void hammer_start_transaction(struct hammer_transaction *trans,
533 struct hammer_mount *hmp);
534 void hammer_commit_transaction(struct hammer_transaction *trans);
535 void hammer_abort_transaction(struct hammer_transaction *trans);
537 void hammer_modify_inode(struct hammer_transaction *trans,
538 hammer_inode_t ip, int flags);
539 int hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap,
540 struct ucred *cred, struct hammer_inode *dip,
541 struct hammer_inode **ipp);
542 void hammer_rel_inode(hammer_inode_t ip, int flush);
543 int hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete);
545 int hammer_ip_add_directory(struct hammer_transaction *trans,
546 hammer_inode_t dip, struct namecache *ncp,
548 int hammer_ip_del_directory(struct hammer_transaction *trans,
549 hammer_cursor_t cursor, hammer_inode_t dip,
551 int hammer_ip_delete_range(struct hammer_transaction *trans,
552 hammer_inode_t ip, int64_t ran_beg, int64_t ran_end);
553 int hammer_ip_sync_data(struct hammer_transaction *trans,
554 hammer_inode_t ip, int64_t offset,
555 void *data, int bytes);
556 int hammer_ip_sync_record(hammer_record_t rec);
558 int hammer_io_read(struct vnode *devvp, struct hammer_io *io);
559 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
560 void hammer_io_release(struct hammer_io *io, int flush);
561 int hammer_io_checkflush(hammer_io_t io);
566 * Inline support functions (not kernel specific)
569 hammer_modify_volume(struct hammer_volume *volume)
571 volume->io.modified = 1;
575 hammer_modify_supercl(struct hammer_supercl *supercl)
577 supercl->io.modified = 1;
581 hammer_modify_cluster(struct hammer_cluster *cluster)
583 cluster->io.modified = 1;
587 hammer_modify_buffer(struct hammer_buffer *buffer)
589 buffer->io.modified = 1;
593 hammer_modify_node(struct hammer_node *node)
595 node->buffer->io.modified = 1;
599 * Return the cluster-relative byte offset of an element within a buffer
602 hammer_bclu_offset(struct hammer_buffer *buffer, void *ptr)
606 bclu_offset = buffer->buf_no * HAMMER_BUFSIZE +
607 ((char *)ptr - (char *)buffer->ondisk);