2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.8 2007/11/20 22:55:40 dillon Exp $
37 * This header file contains structures used internally by the HAMMERFS
38 * implementation. See hammer_disk.h for on-disk structures.
41 #include <sys/param.h>
42 #include <sys/types.h>
43 #include <sys/kernel.h>
44 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/globaldata.h>
50 #include <sys/lockf.h>
52 #include <sys/queue.h>
53 #include <sys/globaldata.h>
56 #include "hammer_alist.h"
57 #include "hammer_disk.h"
58 #include "hammer_mount.h"
60 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
62 MALLOC_DECLARE(M_HAMMER);
67 * Key structure used for custom RB tree inode lookups. This prototypes
68 * the function hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).
70 typedef struct hammer_inode_info {
71 u_int64_t obj_id; /* (key) object identifier */
72 hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
73 } *hammer_inode_info_t;
76 * HAMMER Transaction tracking
78 struct hammer_transaction {
79 struct hammer_mount *hmp;
81 struct hammer_volume *rootvol;
84 typedef struct hammer_transaction *hammer_transaction_t;
93 struct thread *locktd;
97 hammer_islocked(struct hammer_lock *lock)
99 return(lock->lockcount != 0);
103 hammer_islastref(struct hammer_lock *lock)
105 return(lock->refs == 1);
109 * Structure used to represent an inode in-memory.
111 * The record and data associated with an inode may be out of sync with
112 * the disk (xDIRTY flags), or not even on the disk at all (ONDISK flag
115 * An inode may also hold a cache of unsynchronized records, used for
116 * database and directories only. Unsynchronized regular file data is
117 * stored in the buffer cache.
119 * NOTE: A file which is created and destroyed within the initial
120 * synchronization period can wind up not doing any disk I/O at all.
122 * Finally, an inode may cache numerous disk-referencing B-Tree cursors.
124 struct hammer_ino_rb_tree;
126 RB_HEAD(hammer_ino_rb_tree, hammer_inode);
127 RB_PROTOTYPEX(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
128 hammer_ino_rb_compare, hammer_inode_info_t);
130 struct hammer_rec_rb_tree;
131 struct hammer_record;
132 RB_HEAD(hammer_rec_rb_tree, hammer_record);
133 RB_PROTOTYPEX(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
134 hammer_rec_rb_compare, hammer_base_elm_t);
136 TAILQ_HEAD(hammer_node_list, hammer_node);
138 struct hammer_inode {
139 RB_ENTRY(hammer_inode) rb_node;
140 u_int64_t obj_id; /* (key) object identifier */
141 hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */
142 hammer_tid_t last_tid; /* last modified tid (for fsync) */
143 struct hammer_mount *hmp;
146 struct lockf advlock;
147 struct hammer_lock lock;
148 struct hammer_inode_record ino_rec;
149 struct hammer_inode_data ino_data;
150 struct hammer_rec_rb_tree rec_tree; /* red-black record tree */
151 struct hammer_node *cache; /* cached B-Tree node shortcut */
154 typedef struct hammer_inode *hammer_inode_t;
156 #define VTOI(vp) ((struct hammer_inode *)(vp)->v_data)
158 #define HAMMER_INODE_DDIRTY 0x0001 /* in-memory ino_data is dirty */
159 #define HAMMER_INODE_RDIRTY 0x0002 /* in-memory ino_rec is dirty */
160 #define HAMMER_INODE_ITIMES 0x0004 /* in-memory mtime/atime modified */
161 #define HAMMER_INODE_ONDISK 0x0010 /* inode is on-disk (else not yet) */
162 #define HAMMER_INODE_FLUSH 0x0020 /* flush on last ref */
164 #define HAMMER_MAX_INODE_CURSORS 4
167 * Structure used to represent an unsynchronized record in-memory. This
168 * structure is orgranized in a per-inode RB-tree. If the inode is not
169 * on disk then neither are any records and the in-memory record tree
170 * represents the entire contents of the inode. If the inode is on disk
171 * then the on-disk B-Tree is scanned in parallel with the in-memory
172 * RB-Tree to synthesize the current state of the file.
174 * Only current (delete_tid == 0) unsynchronized records are kept in-memory.
176 struct hammer_record {
177 RB_ENTRY(hammer_record) rb_node;
178 struct hammer_lock lock;
179 struct hammer_inode *ip;
180 union hammer_record_ondisk rec;
181 union hammer_data_ondisk *data;
185 typedef struct hammer_record *hammer_record_t;
187 #define HAMMER_RECF_ALLOCDATA 0x0001
188 #define HAMMER_RECF_ONRBTREE 0x0002
189 #define HAMMER_RECF_DELETED 0x0004
192 * Structures used to internally represent a volume and a cluster
194 struct hammer_volume;
195 struct hammer_cluster;
196 struct hammer_supercl;
197 struct hammer_buffer;
199 RB_HEAD(hammer_vol_rb_tree, hammer_volume);
200 RB_HEAD(hammer_clu_rb_tree, hammer_cluster);
201 RB_HEAD(hammer_scl_rb_tree, hammer_supercl);
202 RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
203 RB_HEAD(hammer_nod_rb_tree, hammer_node);
205 RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
206 hammer_vol_rb_compare, int32_t);
207 RB_PROTOTYPE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
208 hammer_clu_rb_compare, int32_t);
209 RB_PROTOTYPE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
210 hammer_scl_rb_compare, int32_t);
211 RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
212 hammer_buf_rb_compare, int32_t);
213 RB_PROTOTYPE2(hammer_nod_rb_tree, hammer_node, rb_node,
214 hammer_nod_rb_compare, int32_t);
217 * IO management - embedded at the head of various in-memory structures
219 enum hammer_io_type { HAMMER_STRUCTURE_VOLUME,
220 HAMMER_STRUCTURE_SUPERCL,
221 HAMMER_STRUCTURE_CLUSTER,
222 HAMMER_STRUCTURE_BUFFER };
224 union hammer_io_structure;
227 LIST_ENTRY(worklist) node;
231 struct worklist worklist;
232 struct hammer_lock lock;
233 enum hammer_io_type type;
236 u_int modified : 1; /* bp's data was modified */
237 u_int released : 1; /* bp released (w/ B_LOCKED set) */
240 typedef struct hammer_io *hammer_io_t;
243 * In-memory volume representing on-disk buffer
245 struct hammer_volume {
247 RB_ENTRY(hammer_volume) rb_node;
248 struct hammer_clu_rb_tree rb_clus_root;
249 struct hammer_scl_rb_tree rb_scls_root;
250 struct hammer_volume_ondisk *ondisk;
251 struct hammer_alist_live alist;
254 int64_t cluster_base; /* base offset of cluster 0 */
257 struct hammer_mount *hmp;
261 typedef struct hammer_volume *hammer_volume_t;
264 * In-memory super-cluster representing on-disk buffer
266 struct hammer_supercl {
268 RB_ENTRY(hammer_supercl) rb_node;
269 struct hammer_supercl_ondisk *ondisk;
270 struct hammer_volume *volume;
271 struct hammer_alist_live alist;
275 typedef struct hammer_supercl *hammer_supercl_t;
278 * In-memory cluster representing on-disk buffer
280 * The cluster's indexing range is cached in hammer_cluster, separate
281 * from the ondisk info in order to allow cursors to point to it.
283 struct hammer_cluster {
285 RB_ENTRY(hammer_cluster) rb_node;
286 struct hammer_buf_rb_tree rb_bufs_root;
287 struct hammer_cluster_ondisk *ondisk;
288 struct hammer_volume *volume;
289 struct hammer_alist_live alist_master;
290 struct hammer_alist_live alist_btree;
291 struct hammer_alist_live alist_record;
292 struct hammer_alist_live alist_mdata;
293 struct hammer_nod_rb_tree rb_nods_root; /* cursors in cluster */
294 struct hammer_base_elm clu_btree_beg; /* copy of on-disk info */
295 struct hammer_base_elm clu_btree_end; /* copy of on-disk info */
299 typedef struct hammer_cluster *hammer_cluster_t;
302 * In-memory buffer (other then volume, super-cluster, or cluster),
303 * representing an on-disk buffer.
305 struct hammer_buffer {
307 RB_ENTRY(hammer_buffer) rb_node;
308 hammer_fsbuf_ondisk_t ondisk;
309 struct hammer_volume *volume;
310 struct hammer_cluster *cluster;
313 struct hammer_alist_live alist;
314 struct hammer_node_list clist;
315 struct hammer_node *save_scan;
318 typedef struct hammer_buffer *hammer_buffer_t;
321 * In-memory B-Tree node, representing an on-disk B-Tree node.
323 * This is a hang-on structure which is backed by a hammer_buffer,
324 * indexed by a hammer_cluster, and used for fine-grained locking of
325 * B-Tree nodes in order to properly control lock ordering. A hammer_buffer
326 * can contain multiple nodes representing wildly disassociated portions
327 * of the B-Tree so locking cannot be done on a buffer-by-buffer basis.
329 * This structure uses a cluster-relative index to reduce the number
330 * of layers required to access it, and also because all on-disk B-Tree
331 * references are cluster-relative offsets.
334 struct hammer_lock lock; /* node-by-node lock */
335 TAILQ_ENTRY(hammer_node) entry; /* per-buffer linkage */
336 RB_ENTRY(hammer_node) rb_node; /* per-cluster linkage */
337 int32_t node_offset; /* cluster-rel offset */
338 struct hammer_cluster *cluster;
339 struct hammer_buffer *buffer; /* backing buffer */
340 hammer_node_ondisk_t ondisk; /* ptr to on-disk structure */
341 struct hammer_node **cache1; /* passive cache(s) */
342 struct hammer_node **cache2;
345 typedef struct hammer_node *hammer_node_t;
348 * Common I/O management structure - embedded in in-memory structures
349 * which are backed by filesystem buffers.
351 union hammer_io_structure {
353 struct hammer_volume volume;
354 struct hammer_supercl supercl;
355 struct hammer_cluster cluster;
356 struct hammer_buffer buffer;
359 #define HAMFS_CLUSTER_DIRTY 0x0001
361 #include "hammer_cursor.h"
364 * Internal hammer mount data structure
366 struct hammer_mount {
368 /*struct vnode *rootvp;*/
369 struct hammer_ino_rb_tree rb_inos_root;
370 struct hammer_vol_rb_tree rb_vols_root;
371 struct hammer_volume *rootvol;
372 struct hammer_cluster *rootcl;
373 char *zbuf; /* HAMMER_BUFSIZE bytes worth of all-zeros */
376 u_int32_t namekey_iterator;
379 typedef struct hammer_mount *hammer_mount_t;
385 extern struct vop_ops hammer_vnode_vops;
386 extern struct hammer_alist_config Buf_alist_config;
387 extern struct hammer_alist_config Vol_normal_alist_config;
388 extern struct hammer_alist_config Vol_super_alist_config;
389 extern struct hammer_alist_config Supercl_alist_config;
390 extern struct hammer_alist_config Clu_master_alist_config;
391 extern struct hammer_alist_config Clu_slave_alist_config;
392 extern struct bio_ops hammer_bioops;
394 int hammer_vop_inactive(struct vop_inactive_args *);
395 int hammer_vop_reclaim(struct vop_reclaim_args *);
396 int hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
398 int hammer_get_vnode(struct hammer_inode *ip, int lktype,
400 struct hammer_inode *hammer_get_inode(hammer_mount_t hmp,
401 u_int64_t obj_id, int *errorp);
402 void hammer_put_inode(struct hammer_inode *ip);
403 void hammer_put_inode_ref(struct hammer_inode *ip);
405 int hammer_unload_inode(hammer_inode_t ip, void *data __unused);
406 int hammer_unload_volume(hammer_volume_t volume, void *data __unused);
407 int hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused);
408 int hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused);
409 int hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused);
410 int hammer_install_volume(hammer_mount_t hmp, const char *volname);
412 int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip);
413 int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip);
414 int hammer_ip_next(hammer_cursor_t cursor);
415 int hammer_ip_resolve_data(hammer_cursor_t cursor);
417 hammer_alloc_mem_record(struct hammer_transaction *trans,
419 void hammer_rel_mem_record(struct hammer_record **recordp);
420 void hammer_free_mem_record(hammer_record_t record);
422 int hammer_cursor_up(hammer_cursor_t cursor);
423 int hammer_cursor_toroot(hammer_cursor_t cursor);
424 int hammer_cursor_down(hammer_cursor_t cursor);
426 void hammer_lock_ex(struct hammer_lock *lock);
427 int hammer_lock_ex_try(struct hammer_lock *lock);
428 void hammer_lock_sh(struct hammer_lock *lock);
429 void hammer_unlock(struct hammer_lock *lock);
430 void hammer_ref(struct hammer_lock *lock);
431 void hammer_unref(struct hammer_lock *lock);
432 void hammer_downgrade(struct hammer_lock *lock);
434 u_int32_t hammer_to_unix_xid(uuid_t *uuid);
435 void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
436 void hammer_to_timespec(hammer_tid_t tid, struct timespec *ts);
437 hammer_tid_t hammer_timespec_to_transid(struct timespec *ts);
438 hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans);
439 hammer_tid_t hammer_alloc_recid(hammer_transaction_t trans);
441 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
442 int hammer_get_dtype(u_int8_t obj_type);
443 u_int8_t hammer_get_obj_type(enum vtype vtype);
444 int64_t hammer_directory_namekey(void *name, int len);
446 int hammer_init_cursor_hmp(hammer_cursor_t cursor, hammer_mount_t hmp);
447 int hammer_init_cursor_ip(hammer_cursor_t cursor, hammer_inode_t ip);
448 void hammer_done_cursor(hammer_cursor_t cursor);
449 void hammer_mem_done(hammer_cursor_t cursor);
451 int hammer_btree_lookup(hammer_cursor_t cursor);
452 int hammer_btree_extract(hammer_cursor_t cursor, int flags);
453 int hammer_btree_iterate(hammer_cursor_t cursor);
454 int hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm);
455 int hammer_btree_delete(hammer_cursor_t cursor);
456 int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
458 void *hammer_bread(struct hammer_cluster *cluster, int32_t cloff,
459 u_int64_t buf_type, int *errorp,
460 struct hammer_buffer **bufferp);
462 hammer_volume_t hammer_get_root_volume(hammer_mount_t hmp, int *errorp);
463 hammer_cluster_t hammer_get_root_cluster(hammer_mount_t hmp, int *errorp);
465 hammer_volume_t hammer_get_volume(hammer_mount_t hmp,
466 int32_t vol_no, int *errorp);
467 hammer_supercl_t hammer_get_supercl(hammer_volume_t volume,
468 int32_t scl_no, int *errorp, int isnew);
469 hammer_cluster_t hammer_get_cluster(hammer_volume_t volume,
470 int32_t clu_no, int *errorp, int isnew);
471 hammer_buffer_t hammer_get_buffer(hammer_cluster_t cluster,
472 int32_t buf_no, u_int64_t buf_type, int *errorp);
474 int hammer_ref_cluster(hammer_cluster_t cluster);
475 int hammer_ref_buffer(hammer_buffer_t buffer);
476 void hammer_flush_buffer_nodes(hammer_buffer_t buffer);
479 void hammer_rel_volume(hammer_volume_t volume, int flush);
480 void hammer_rel_supercl(hammer_supercl_t supercl, int flush);
481 void hammer_rel_cluster(hammer_cluster_t cluster, int flush);
482 void hammer_rel_buffer(hammer_buffer_t buffer, int flush);
484 hammer_node_t hammer_get_node(hammer_cluster_t cluster,
485 int32_t node_offset, int *errorp);
486 int hammer_ref_node(hammer_node_t node);
487 void hammer_rel_node(hammer_node_t node);
488 void hammer_cache_node(hammer_node_t node,
489 struct hammer_node **cache);
490 void hammer_uncache_node(struct hammer_node **cache);
491 void hammer_flush_node(hammer_node_t node);
493 void hammer_dup_buffer(struct hammer_buffer **bufferp,
494 struct hammer_buffer *buffer);
495 void hammer_dup_cluster(struct hammer_cluster **clusterp,
496 struct hammer_cluster *cluster);
497 hammer_node_t hammer_alloc_btree(struct hammer_cluster *cluster, int *errorp);
498 void *hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes,
499 int *errorp, struct hammer_buffer **bufferp);
500 void *hammer_alloc_record(struct hammer_cluster *cluster,
501 int *errorp, struct hammer_buffer **bufferp);
502 void hammer_free_btree_ptr(struct hammer_buffer *buffer,
503 hammer_node_ondisk_t node);
504 void hammer_free_data_ptr(struct hammer_buffer *buffer,
505 void *data, int bytes);
506 void hammer_free_record_ptr(struct hammer_buffer *buffer,
507 union hammer_record_ondisk *rec);
508 void hammer_free_btree(struct hammer_cluster *cluster, int32_t bclu_offset);
509 void hammer_free_data(struct hammer_cluster *cluster, int32_t bclu_offset,
511 void hammer_free_record(struct hammer_cluster *cluster, int32_t bclu_offset);
513 void hammer_put_volume(struct hammer_volume *volume, int flush);
514 void hammer_put_supercl(struct hammer_supercl *supercl, int flush);
515 void hammer_put_cluster(struct hammer_cluster *cluster, int flush);
516 void hammer_put_buffer(struct hammer_buffer *buffer, int flush);
518 void hammer_init_alist_config(void);
520 void hammer_start_transaction(struct hammer_transaction *trans,
521 struct hammer_mount *hmp);
522 void hammer_commit_transaction(struct hammer_transaction *trans);
523 void hammer_abort_transaction(struct hammer_transaction *trans);
525 void hammer_modify_inode(struct hammer_transaction *trans,
526 hammer_inode_t ip, int flags);
527 int hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap,
528 struct ucred *cred, struct hammer_inode *dip,
529 struct hammer_inode **ipp);
530 void hammer_rel_inode(hammer_inode_t ip, int flush);
532 int hammer_ip_add_directory(struct hammer_transaction *trans,
533 hammer_inode_t dip, struct namecache *ncp,
535 int hammer_ip_del_directory(struct hammer_transaction *trans,
536 hammer_cursor_t cursor, hammer_inode_t dip,
538 int hammer_ip_delete_range(struct hammer_transaction *trans,
539 hammer_inode_t ip, int64_t ran_beg, int64_t ran_end);
540 int hammer_ip_add_data(struct hammer_transaction *trans,
541 hammer_inode_t ip, int64_t offset,
542 void *data, int bytes);
544 int hammer_io_read(struct vnode *devvp, struct hammer_io *io);
545 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
546 void hammer_io_release(struct hammer_io *io, int flush);
547 int hammer_io_checkflush(hammer_io_t io);
552 * Inline support functions (not kernel specific)
555 hammer_modify_volume(struct hammer_volume *volume)
557 volume->io.modified = 1;
561 hammer_modify_supercl(struct hammer_supercl *supercl)
563 supercl->io.modified = 1;
567 hammer_modify_cluster(struct hammer_cluster *cluster)
569 cluster->io.modified = 1;
573 hammer_modify_buffer(struct hammer_buffer *buffer)
575 buffer->io.modified = 1;
579 hammer_modify_node(struct hammer_node *node)
581 node->buffer->io.modified = 1;
585 * Return the cluster-relative byte offset of an element within a buffer
588 hammer_bclu_offset(struct hammer_buffer *buffer, void *ptr)
592 bclu_offset = buffer->buf_no * HAMMER_BUFSIZE +
593 ((char *)ptr - (char *)buffer->ondisk);