2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.9 2007/11/26 05:03:11 dillon Exp $
37 * This header file contains structures used internally by the HAMMERFS
38 * implementation. See hammer_disk.h for on-disk structures.
41 #include <sys/param.h>
42 #include <sys/types.h>
43 #include <sys/kernel.h>
44 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/globaldata.h>
50 #include <sys/lockf.h>
52 #include <sys/queue.h>
53 #include <sys/globaldata.h>
56 #include "hammer_alist.h"
57 #include "hammer_disk.h"
58 #include "hammer_mount.h"
60 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
62 MALLOC_DECLARE(M_HAMMER);
67 * Key structure used for custom RB tree inode lookups. This prototypes
68 * the function hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).
70 typedef struct hammer_inode_info {
71 u_int64_t obj_id; /* (key) object identifier */
72 hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
73 } *hammer_inode_info_t;
76 * HAMMER Transaction tracking
78 struct hammer_transaction {
79 struct hammer_mount *hmp;
81 struct hammer_volume *rootvol;
84 typedef struct hammer_transaction *hammer_transaction_t;
93 struct thread *locktd;
97 hammer_islocked(struct hammer_lock *lock)
99 return(lock->lockcount != 0);
103 hammer_islastref(struct hammer_lock *lock)
105 return(lock->refs == 1);
109 * Structure used to represent an inode in-memory.
111 * The record and data associated with an inode may be out of sync with
112 * the disk (xDIRTY flags), or not even on the disk at all (ONDISK flag
115 * An inode may also hold a cache of unsynchronized records, used for
116 * database and directories only. Unsynchronized regular file data is
117 * stored in the buffer cache.
119 * NOTE: A file which is created and destroyed within the initial
120 * synchronization period can wind up not doing any disk I/O at all.
122 * Finally, an inode may cache numerous disk-referencing B-Tree cursors.
124 struct hammer_ino_rb_tree;
126 RB_HEAD(hammer_ino_rb_tree, hammer_inode);
127 RB_PROTOTYPEX(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
128 hammer_ino_rb_compare, hammer_inode_info_t);
130 struct hammer_rec_rb_tree;
131 struct hammer_record;
132 RB_HEAD(hammer_rec_rb_tree, hammer_record);
133 RB_PROTOTYPEX(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
134 hammer_rec_rb_compare, hammer_base_elm_t);
136 TAILQ_HEAD(hammer_node_list, hammer_node);
138 struct hammer_inode {
139 RB_ENTRY(hammer_inode) rb_node;
140 u_int64_t obj_id; /* (key) object identifier */
141 hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
142 hammer_tid_t last_tid; /* last modified tid (for fsync) */
143 struct hammer_mount *hmp;
146 struct lockf advlock;
147 struct hammer_lock lock;
148 struct hammer_inode_record ino_rec;
149 struct hammer_inode_data ino_data;
150 struct hammer_rec_rb_tree rec_tree; /* red-black record tree */
151 struct hammer_node *cache; /* cached B-Tree node shortcut */
154 typedef struct hammer_inode *hammer_inode_t;
156 #define VTOI(vp) ((struct hammer_inode *)(vp)->v_data)
158 #define HAMMER_INODE_DDIRTY 0x0001 /* in-memory ino_data is dirty */
159 #define HAMMER_INODE_RDIRTY 0x0002 /* in-memory ino_rec is dirty */
160 #define HAMMER_INODE_ITIMES 0x0004 /* in-memory mtime/atime modified */
161 #define HAMMER_INODE_ONDISK 0x0010 /* inode is on-disk (else not yet) */
162 #define HAMMER_INODE_FLUSH 0x0020 /* flush on last ref */
163 #define HAMMER_INODE_TID 0x0040 /* update in-memory last_tid */
164 #define HAMMER_INODE_DELETED 0x0080 /* inode ready for deletion */
166 #define HAMMER_MAX_INODE_CURSORS 4
169 * Structure used to represent an unsynchronized record in-memory. This
170 * structure is orgranized in a per-inode RB-tree. If the inode is not
171 * on disk then neither are any records and the in-memory record tree
172 * represents the entire contents of the inode. If the inode is on disk
173 * then the on-disk B-Tree is scanned in parallel with the in-memory
174 * RB-Tree to synthesize the current state of the file.
176 * Only current (delete_tid == 0) unsynchronized records are kept in-memory.
178 struct hammer_record {
179 RB_ENTRY(hammer_record) rb_node;
180 struct hammer_lock lock;
181 struct hammer_inode *ip;
182 union hammer_record_ondisk rec;
183 union hammer_data_ondisk *data;
187 typedef struct hammer_record *hammer_record_t;
189 #define HAMMER_RECF_ALLOCDATA 0x0001
190 #define HAMMER_RECF_ONRBTREE 0x0002
191 #define HAMMER_RECF_DELETED 0x0004
192 #define HAMMER_RECF_EMBEDDED_DATA 0x0008
195 * Structures used to internally represent a volume and a cluster
197 struct hammer_volume;
198 struct hammer_cluster;
199 struct hammer_supercl;
200 struct hammer_buffer;
202 RB_HEAD(hammer_vol_rb_tree, hammer_volume);
203 RB_HEAD(hammer_clu_rb_tree, hammer_cluster);
204 RB_HEAD(hammer_scl_rb_tree, hammer_supercl);
205 RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
206 RB_HEAD(hammer_nod_rb_tree, hammer_node);
208 RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
209 hammer_vol_rb_compare, int32_t);
210 RB_PROTOTYPE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
211 hammer_clu_rb_compare, int32_t);
212 RB_PROTOTYPE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
213 hammer_scl_rb_compare, int32_t);
214 RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
215 hammer_buf_rb_compare, int32_t);
216 RB_PROTOTYPE2(hammer_nod_rb_tree, hammer_node, rb_node,
217 hammer_nod_rb_compare, int32_t);
220 * IO management - embedded at the head of various in-memory structures
222 enum hammer_io_type { HAMMER_STRUCTURE_VOLUME,
223 HAMMER_STRUCTURE_SUPERCL,
224 HAMMER_STRUCTURE_CLUSTER,
225 HAMMER_STRUCTURE_BUFFER };
227 union hammer_io_structure;
230 LIST_ENTRY(worklist) node;
234 struct worklist worklist;
235 struct hammer_lock lock;
236 enum hammer_io_type type;
239 u_int modified : 1; /* bp's data was modified */
240 u_int released : 1; /* bp released (w/ B_LOCKED set) */
243 typedef struct hammer_io *hammer_io_t;
246 * In-memory volume representing on-disk buffer
248 struct hammer_volume {
250 RB_ENTRY(hammer_volume) rb_node;
251 struct hammer_clu_rb_tree rb_clus_root;
252 struct hammer_scl_rb_tree rb_scls_root;
253 struct hammer_volume_ondisk *ondisk;
254 struct hammer_alist_live alist;
257 int64_t cluster_base; /* base offset of cluster 0 */
260 struct hammer_mount *hmp;
264 typedef struct hammer_volume *hammer_volume_t;
267 * In-memory super-cluster representing on-disk buffer
269 struct hammer_supercl {
271 RB_ENTRY(hammer_supercl) rb_node;
272 struct hammer_supercl_ondisk *ondisk;
273 struct hammer_volume *volume;
274 struct hammer_alist_live alist;
278 typedef struct hammer_supercl *hammer_supercl_t;
281 * In-memory cluster representing on-disk buffer
283 * The cluster's indexing range is cached in hammer_cluster, separate
284 * from the ondisk info in order to allow cursors to point to it.
286 struct hammer_cluster {
288 RB_ENTRY(hammer_cluster) rb_node;
289 struct hammer_buf_rb_tree rb_bufs_root;
290 struct hammer_cluster_ondisk *ondisk;
291 struct hammer_volume *volume;
292 struct hammer_alist_live alist_master;
293 struct hammer_alist_live alist_btree;
294 struct hammer_alist_live alist_record;
295 struct hammer_alist_live alist_mdata;
296 struct hammer_nod_rb_tree rb_nods_root; /* cursors in cluster */
297 struct hammer_base_elm clu_btree_beg; /* copy of on-disk info */
298 struct hammer_base_elm clu_btree_end; /* copy of on-disk info */
302 typedef struct hammer_cluster *hammer_cluster_t;
305 * In-memory buffer (other then volume, super-cluster, or cluster),
306 * representing an on-disk buffer.
308 struct hammer_buffer {
310 RB_ENTRY(hammer_buffer) rb_node;
311 hammer_fsbuf_ondisk_t ondisk;
312 struct hammer_volume *volume;
313 struct hammer_cluster *cluster;
316 struct hammer_alist_live alist;
317 struct hammer_node_list clist;
318 struct hammer_node *save_scan;
321 typedef struct hammer_buffer *hammer_buffer_t;
324 * In-memory B-Tree node, representing an on-disk B-Tree node.
326 * This is a hang-on structure which is backed by a hammer_buffer,
327 * indexed by a hammer_cluster, and used for fine-grained locking of
328 * B-Tree nodes in order to properly control lock ordering. A hammer_buffer
329 * can contain multiple nodes representing wildly disassociated portions
330 * of the B-Tree so locking cannot be done on a buffer-by-buffer basis.
332 * This structure uses a cluster-relative index to reduce the number
333 * of layers required to access it, and also because all on-disk B-Tree
334 * references are cluster-relative offsets.
337 struct hammer_lock lock; /* node-by-node lock */
338 TAILQ_ENTRY(hammer_node) entry; /* per-buffer linkage */
339 RB_ENTRY(hammer_node) rb_node; /* per-cluster linkage */
340 int32_t node_offset; /* cluster-rel offset */
341 struct hammer_cluster *cluster;
342 struct hammer_buffer *buffer; /* backing buffer */
343 hammer_node_ondisk_t ondisk; /* ptr to on-disk structure */
344 struct hammer_node **cache1; /* passive cache(s) */
345 struct hammer_node **cache2;
348 typedef struct hammer_node *hammer_node_t;
351 * Common I/O management structure - embedded in in-memory structures
352 * which are backed by filesystem buffers.
354 union hammer_io_structure {
356 struct hammer_volume volume;
357 struct hammer_supercl supercl;
358 struct hammer_cluster cluster;
359 struct hammer_buffer buffer;
362 #define HAMFS_CLUSTER_DIRTY 0x0001
364 #include "hammer_cursor.h"
367 * Internal hammer mount data structure
369 struct hammer_mount {
371 /*struct vnode *rootvp;*/
372 struct hammer_ino_rb_tree rb_inos_root;
373 struct hammer_vol_rb_tree rb_vols_root;
374 struct hammer_volume *rootvol;
375 struct hammer_cluster *rootcl;
376 char *zbuf; /* HAMMER_BUFSIZE bytes worth of all-zeros */
379 u_int32_t namekey_iterator;
382 typedef struct hammer_mount *hammer_mount_t;
388 extern struct vop_ops hammer_vnode_vops;
389 extern struct hammer_alist_config Buf_alist_config;
390 extern struct hammer_alist_config Vol_normal_alist_config;
391 extern struct hammer_alist_config Vol_super_alist_config;
392 extern struct hammer_alist_config Supercl_alist_config;
393 extern struct hammer_alist_config Clu_master_alist_config;
394 extern struct hammer_alist_config Clu_slave_alist_config;
395 extern struct bio_ops hammer_bioops;
397 int hammer_vop_inactive(struct vop_inactive_args *);
398 int hammer_vop_reclaim(struct vop_reclaim_args *);
399 int hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
401 int hammer_get_vnode(struct hammer_inode *ip, int lktype,
403 struct hammer_inode *hammer_get_inode(hammer_mount_t hmp,
404 u_int64_t obj_id, int *errorp);
405 int hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip);
406 void hammer_put_inode(struct hammer_inode *ip);
407 void hammer_put_inode_ref(struct hammer_inode *ip);
409 int hammer_unload_inode(hammer_inode_t ip, void *data __unused);
410 int hammer_unload_volume(hammer_volume_t volume, void *data __unused);
411 int hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused);
412 int hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused);
413 int hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused);
414 int hammer_install_volume(hammer_mount_t hmp, const char *volname);
416 int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip);
417 int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip);
418 int hammer_ip_next(hammer_cursor_t cursor);
419 int hammer_ip_resolve_data(hammer_cursor_t cursor);
421 hammer_alloc_mem_record(struct hammer_transaction *trans,
423 void hammer_rel_mem_record(struct hammer_record **recordp);
424 void hammer_free_mem_record(hammer_record_t record);
426 int hammer_cursor_up(hammer_cursor_t cursor);
427 int hammer_cursor_toroot(hammer_cursor_t cursor);
428 int hammer_cursor_down(hammer_cursor_t cursor);
430 void hammer_lock_ex(struct hammer_lock *lock);
431 int hammer_lock_ex_try(struct hammer_lock *lock);
432 void hammer_lock_sh(struct hammer_lock *lock);
433 void hammer_unlock(struct hammer_lock *lock);
434 void hammer_ref(struct hammer_lock *lock);
435 void hammer_unref(struct hammer_lock *lock);
436 void hammer_downgrade(struct hammer_lock *lock);
438 u_int32_t hammer_to_unix_xid(uuid_t *uuid);
439 void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
440 void hammer_to_timespec(hammer_tid_t tid, struct timespec *ts);
441 hammer_tid_t hammer_timespec_to_transid(struct timespec *ts);
442 hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans);
443 hammer_tid_t hammer_alloc_recid(hammer_transaction_t trans);
445 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
446 int hammer_get_dtype(u_int8_t obj_type);
447 u_int8_t hammer_get_obj_type(enum vtype vtype);
448 int64_t hammer_directory_namekey(void *name, int len);
450 int hammer_init_cursor_hmp(hammer_cursor_t cursor, hammer_mount_t hmp);
451 int hammer_init_cursor_ip(hammer_cursor_t cursor, hammer_inode_t ip);
452 void hammer_done_cursor(hammer_cursor_t cursor);
453 void hammer_mem_done(hammer_cursor_t cursor);
455 int hammer_btree_lookup(hammer_cursor_t cursor);
456 int hammer_btree_extract(hammer_cursor_t cursor, int flags);
457 int hammer_btree_iterate(hammer_cursor_t cursor);
458 int hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm);
459 int hammer_btree_delete(hammer_cursor_t cursor);
460 int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
461 int hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2);
462 void hammer_print_btree_node(hammer_node_ondisk_t ondisk);
463 void hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i);
465 void *hammer_bread(struct hammer_cluster *cluster, int32_t cloff,
466 u_int64_t buf_type, int *errorp,
467 struct hammer_buffer **bufferp);
469 hammer_volume_t hammer_get_root_volume(hammer_mount_t hmp, int *errorp);
470 hammer_cluster_t hammer_get_root_cluster(hammer_mount_t hmp, int *errorp);
472 hammer_volume_t hammer_get_volume(hammer_mount_t hmp,
473 int32_t vol_no, int *errorp);
474 hammer_supercl_t hammer_get_supercl(hammer_volume_t volume,
475 int32_t scl_no, int *errorp, int isnew);
476 hammer_cluster_t hammer_get_cluster(hammer_volume_t volume,
477 int32_t clu_no, int *errorp, int isnew);
478 hammer_buffer_t hammer_get_buffer(hammer_cluster_t cluster,
479 int32_t buf_no, u_int64_t buf_type, int *errorp);
481 int hammer_ref_cluster(hammer_cluster_t cluster);
482 int hammer_ref_buffer(hammer_buffer_t buffer);
483 void hammer_flush_buffer_nodes(hammer_buffer_t buffer);
486 void hammer_rel_volume(hammer_volume_t volume, int flush);
487 void hammer_rel_supercl(hammer_supercl_t supercl, int flush);
488 void hammer_rel_cluster(hammer_cluster_t cluster, int flush);
489 void hammer_rel_buffer(hammer_buffer_t buffer, int flush);
491 hammer_node_t hammer_get_node(hammer_cluster_t cluster,
492 int32_t node_offset, int *errorp);
493 int hammer_ref_node(hammer_node_t node);
494 void hammer_rel_node(hammer_node_t node);
495 void hammer_cache_node(hammer_node_t node,
496 struct hammer_node **cache);
497 void hammer_uncache_node(struct hammer_node **cache);
498 void hammer_flush_node(hammer_node_t node);
500 void hammer_dup_buffer(struct hammer_buffer **bufferp,
501 struct hammer_buffer *buffer);
502 void hammer_dup_cluster(struct hammer_cluster **clusterp,
503 struct hammer_cluster *cluster);
504 hammer_node_t hammer_alloc_btree(struct hammer_cluster *cluster, int *errorp);
505 void *hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes,
506 int *errorp, struct hammer_buffer **bufferp);
507 void *hammer_alloc_record(struct hammer_cluster *cluster,
508 int *errorp, struct hammer_buffer **bufferp);
509 void hammer_free_btree_ptr(struct hammer_buffer *buffer,
510 hammer_node_ondisk_t node);
511 void hammer_free_data_ptr(struct hammer_buffer *buffer,
512 void *data, int bytes);
513 void hammer_free_record_ptr(struct hammer_buffer *buffer,
514 union hammer_record_ondisk *rec);
515 void hammer_free_btree(struct hammer_cluster *cluster, int32_t bclu_offset);
516 void hammer_free_data(struct hammer_cluster *cluster, int32_t bclu_offset,
518 void hammer_free_record(struct hammer_cluster *cluster, int32_t bclu_offset);
520 void hammer_put_volume(struct hammer_volume *volume, int flush);
521 void hammer_put_supercl(struct hammer_supercl *supercl, int flush);
522 void hammer_put_cluster(struct hammer_cluster *cluster, int flush);
523 void hammer_put_buffer(struct hammer_buffer *buffer, int flush);
525 void hammer_init_alist_config(void);
527 void hammer_start_transaction(struct hammer_transaction *trans,
528 struct hammer_mount *hmp);
529 void hammer_commit_transaction(struct hammer_transaction *trans);
530 void hammer_abort_transaction(struct hammer_transaction *trans);
532 void hammer_modify_inode(struct hammer_transaction *trans,
533 hammer_inode_t ip, int flags);
534 int hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap,
535 struct ucred *cred, struct hammer_inode *dip,
536 struct hammer_inode **ipp);
537 void hammer_rel_inode(hammer_inode_t ip, int flush);
538 int hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete);
540 int hammer_ip_add_directory(struct hammer_transaction *trans,
541 hammer_inode_t dip, struct namecache *ncp,
543 int hammer_ip_del_directory(struct hammer_transaction *trans,
544 hammer_cursor_t cursor, hammer_inode_t dip,
546 int hammer_ip_delete_range(struct hammer_transaction *trans,
547 hammer_inode_t ip, int64_t ran_beg, int64_t ran_end);
548 int hammer_ip_sync_data(struct hammer_transaction *trans,
549 hammer_inode_t ip, int64_t offset,
550 void *data, int bytes);
551 int hammer_ip_sync_record(hammer_record_t rec);
553 int hammer_io_read(struct vnode *devvp, struct hammer_io *io);
554 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
555 void hammer_io_release(struct hammer_io *io, int flush);
556 int hammer_io_checkflush(hammer_io_t io);
561 * Inline support functions (not kernel specific)
564 hammer_modify_volume(struct hammer_volume *volume)
566 volume->io.modified = 1;
570 hammer_modify_supercl(struct hammer_supercl *supercl)
572 supercl->io.modified = 1;
576 hammer_modify_cluster(struct hammer_cluster *cluster)
578 cluster->io.modified = 1;
582 hammer_modify_buffer(struct hammer_buffer *buffer)
584 buffer->io.modified = 1;
588 hammer_modify_node(struct hammer_node *node)
590 node->buffer->io.modified = 1;
594 * Return the cluster-relative byte offset of an element within a buffer
597 hammer_bclu_offset(struct hammer_buffer *buffer, void *ptr)
601 bclu_offset = buffer->buf_no * HAMMER_BUFSIZE +
602 ((char *)ptr - (char *)buffer->ondisk);