| 1 | /* |
| 2 | * Copyright (c) 2007 The DragonFly Project. All rights reserved. |
| 3 | * |
| 4 | * This code is derived from software contributed to The DragonFly Project |
| 5 | * by Matthew Dillon <dillon@backplane.com> |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in |
| 15 | * the documentation and/or other materials provided with the |
| 16 | * distribution. |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its |
| 18 | * contributors may be used to endorse or promote products derived |
| 19 | * from this software without specific, prior written permission. |
| 20 | * |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 32 | * SUCH DAMAGE. |
| 33 | * |
| 34 | * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.33 2008/05/18 01:48:50 dillon Exp $ |
| 35 | */ |
| 36 | |
| 37 | #ifndef VFS_HAMMER_DISK_H_ |
| 38 | #define VFS_HAMMER_DISK_H_ |
| 39 | |
| 40 | #ifndef _SYS_UUID_H_ |
| 41 | #include <sys/uuid.h> |
| 42 | #endif |
| 43 | |
| 44 | /* |
| 45 | * The structures below represent the on-disk format for a HAMMER |
| 46 | * filesystem. Note that all fields for on-disk structures are naturally |
| 47 | * aligned. The host endian format is used - compatibility is possible |
| 48 | * if the implementation detects reversed endian and adjusts data accordingly. |
| 49 | * |
| 50 | * Most of HAMMER revolves around the concept of an object identifier. An |
| 51 | * obj_id is a 64 bit quantity which uniquely identifies a filesystem object |
| 52 | * FOR THE ENTIRE LIFE OF THE FILESYSTEM. This uniqueness allows backups |
| 53 | * and mirrors to retain varying amounts of filesystem history by removing |
| 54 | * any possibility of conflict through identifier reuse. |
| 55 | * |
| 56 | * A HAMMER filesystem may span multiple volumes. |
| 57 | * |
| 58 | * A HAMMER filesystem uses a 16K filesystem buffer size. All filesystem |
| 59 | * I/O is done in multiples of 16K. Most buffer-sized headers such as those |
| 60 | * used by volumes, super-clusters, clusters, and basic filesystem buffers |
| 61 | * use fixed-sized A-lists which are heavily dependant on HAMMER_BUFSIZE. |
| 62 | * |
| 63 | * Per-volume storage limit: 52 bits 4096 TB |
| 64 | * Per-Zone storage limit: 59 bits 512 KTB (due to blockmap) |
| 65 | * Per-filesystem storage limit: 60 bits 1 MTB |
| 66 | */ |
| 67 | #define HAMMER_BUFSIZE 16384 |
| 68 | #define HAMMER_BUFMASK (HAMMER_BUFSIZE - 1) |
| 69 | #define HAMMER_MAXDATA (256*1024) |
| 70 | #define HAMMER_BUFFER_BITS 14 |
| 71 | |
| 72 | #if (1 << HAMMER_BUFFER_BITS) != HAMMER_BUFSIZE |
| 73 | #error "HAMMER_BUFFER_BITS BROKEN" |
| 74 | #endif |
| 75 | |
| 76 | #define HAMMER_BUFSIZE64 ((u_int64_t)HAMMER_BUFSIZE) |
| 77 | #define HAMMER_BUFMASK64 ((u_int64_t)HAMMER_BUFMASK) |
| 78 | |
| 79 | #define HAMMER_OFF_ZONE_MASK 0xF000000000000000ULL /* zone portion */ |
| 80 | #define HAMMER_OFF_VOL_MASK 0x0FF0000000000000ULL /* volume portion */ |
| 81 | #define HAMMER_OFF_SHORT_MASK 0x000FFFFFFFFFFFFFULL /* offset portion */ |
| 82 | #define HAMMER_OFF_LONG_MASK 0x0FFFFFFFFFFFFFFFULL /* offset portion */ |
| 83 | #define HAMMER_OFF_SHORT_REC_MASK 0x000FFFFFFF000000ULL /* recovery boundary */ |
| 84 | #define HAMMER_OFF_LONG_REC_MASK 0x0FFFFFFFFF000000ULL /* recovery boundary */ |
| 85 | #define HAMMER_RECOVERY_BND 0x0000000001000000ULL |
| 86 | |
| 87 | /* |
| 88 | * Hammer transction ids are 64 bit unsigned integers and are usually |
| 89 | * synchronized with the time of day in nanoseconds. |
| 90 | * |
| 91 | * Hammer offsets are used for FIFO indexing and embed a cycle counter |
| 92 | * and volume number in addition to the offset. Most offsets are required |
| 93 | * to be 64-byte aligned. |
| 94 | */ |
| 95 | typedef u_int64_t hammer_tid_t; |
| 96 | typedef u_int64_t hammer_off_t; |
| 97 | typedef u_int32_t hammer_seq_t; |
| 98 | typedef u_int32_t hammer_crc_t; |
| 99 | |
| 100 | #define HAMMER_MIN_TID 0ULL /* unsigned */ |
| 101 | #define HAMMER_MAX_TID 0xFFFFFFFFFFFFFFFFULL /* unsigned */ |
| 102 | #define HAMMER_MIN_KEY -0x8000000000000000LL /* signed */ |
| 103 | #define HAMMER_MAX_KEY 0x7FFFFFFFFFFFFFFFLL /* signed */ |
| 104 | #define HAMMER_MIN_OBJID HAMMER_MIN_KEY /* signed */ |
| 105 | #define HAMMER_MAX_OBJID HAMMER_MAX_KEY /* signed */ |
| 106 | #define HAMMER_MIN_RECTYPE 0x0U /* unsigned */ |
| 107 | #define HAMMER_MAX_RECTYPE 0xFFFFU /* unsigned */ |
| 108 | #define HAMMER_MIN_OFFSET 0ULL /* unsigned */ |
| 109 | #define HAMMER_MAX_OFFSET 0xFFFFFFFFFFFFFFFFULL /* unsigned */ |
| 110 | |
| 111 | /* |
| 112 | * hammer_off_t has several different encodings. Note that not all zones |
| 113 | * encode a vol_no. |
| 114 | * |
| 115 | * zone 0 (z,v,o): reserved (for sanity) |
| 116 | * zone 1 (z,v,o): raw volume relative (offset 0 is the volume header) |
| 117 | * zone 2 (z,v,o): raw buffer relative (offset 0 is the first buffer) |
| 118 | * zone 3 (z,o): undo fifo - fixed layer2 array in root vol hdr |
| 119 | * zone 4 (z,v,o): freemap - freemap-backed self-mapping special |
| 120 | * cased layering. |
| 121 | * |
| 122 | * zone 8 (z,o): B-Tree - blkmap-backed |
| 123 | * zone 9 (z,o): Record - blkmap-backed |
| 124 | * zone 10 (z,o): Large-data - blkmap-backed |
| 125 | */ |
| 126 | |
| 127 | #define HAMMER_ZONE_RAW_VOLUME 0x1000000000000000ULL |
| 128 | #define HAMMER_ZONE_RAW_BUFFER 0x2000000000000000ULL |
| 129 | #define HAMMER_ZONE_UNDO 0x3000000000000000ULL |
| 130 | #define HAMMER_ZONE_FREEMAP 0x4000000000000000ULL |
| 131 | #define HAMMER_ZONE_RESERVED05 0x5000000000000000ULL |
| 132 | #define HAMMER_ZONE_RESERVED06 0x6000000000000000ULL |
| 133 | #define HAMMER_ZONE_RESERVED07 0x7000000000000000ULL |
| 134 | #define HAMMER_ZONE_BTREE 0x8000000000000000ULL |
| 135 | #define HAMMER_ZONE_RECORD 0x9000000000000000ULL |
| 136 | #define HAMMER_ZONE_LARGE_DATA 0xA000000000000000ULL |
| 137 | #define HAMMER_ZONE_SMALL_DATA 0xB000000000000000ULL |
| 138 | #define HAMMER_ZONE_RESERVED0C 0xC000000000000000ULL |
| 139 | #define HAMMER_ZONE_RESERVED0D 0xD000000000000000ULL |
| 140 | #define HAMMER_ZONE_RESERVED0E 0xE000000000000000ULL |
| 141 | #define HAMMER_ZONE_RESERVED0F 0xF000000000000000ULL |
| 142 | |
| 143 | #define HAMMER_ZONE_RAW_VOLUME_INDEX 1 |
| 144 | #define HAMMER_ZONE_RAW_BUFFER_INDEX 2 |
| 145 | #define HAMMER_ZONE_UNDO_INDEX 3 |
| 146 | #define HAMMER_ZONE_FREEMAP_INDEX 4 |
| 147 | #define HAMMER_ZONE_BTREE_INDEX 8 |
| 148 | #define HAMMER_ZONE_RECORD_INDEX 9 |
| 149 | #define HAMMER_ZONE_LARGE_DATA_INDEX 10 |
| 150 | #define HAMMER_ZONE_SMALL_DATA_INDEX 11 |
| 151 | |
| 152 | /* |
| 153 | * Per-zone size limitation. This just makes the iterator easier |
| 154 | * to deal with by preventing an iterator overflow. |
| 155 | */ |
| 156 | #define HAMMER_ZONE_LIMIT \ |
| 157 | (0x1000000000000000ULL - HAMMER_BLOCKMAP_LAYER2 * 2) |
| 158 | |
| 159 | #define HAMMER_MAX_ZONES 16 |
| 160 | |
| 161 | #define HAMMER_VOL_ENCODE(vol_no) \ |
| 162 | ((hammer_off_t)((vol_no) & 255) << 52) |
| 163 | #define HAMMER_VOL_DECODE(ham_off) \ |
| 164 | (int32_t)(((hammer_off_t)(ham_off) >> 52) & 255) |
| 165 | #define HAMMER_ZONE_DECODE(ham_off) \ |
| 166 | (int32_t)(((hammer_off_t)(ham_off) >> 60)) |
| 167 | #define HAMMER_ZONE_ENCODE(zone, ham_off) \ |
| 168 | (((hammer_off_t)(zone) << 60) | (ham_off)) |
| 169 | #define HAMMER_SHORT_OFF_ENCODE(offset) \ |
| 170 | ((hammer_off_t)(offset) & HAMMER_OFF_SHORT_MASK) |
| 171 | #define HAMMER_LONG_OFF_ENCODE(offset) \ |
| 172 | ((hammer_off_t)(offset) & HAMMER_OFF_LONG_MASK) |
| 173 | |
| 174 | #define HAMMER_ENCODE_RAW_VOLUME(vol_no, offset) \ |
| 175 | (HAMMER_ZONE_RAW_VOLUME | \ |
| 176 | HAMMER_VOL_ENCODE(vol_no) | \ |
| 177 | HAMMER_SHORT_OFF_ENCODE(offset)) |
| 178 | |
| 179 | #define HAMMER_ENCODE_RAW_BUFFER(vol_no, offset) \ |
| 180 | (HAMMER_ZONE_RAW_BUFFER | \ |
| 181 | HAMMER_VOL_ENCODE(vol_no) | \ |
| 182 | HAMMER_SHORT_OFF_ENCODE(offset)) |
| 183 | |
| 184 | #define HAMMER_ENCODE_FREEMAP(vol_no, offset) \ |
| 185 | (HAMMER_ZONE_FREEMAP | \ |
| 186 | HAMMER_VOL_ENCODE(vol_no) | \ |
| 187 | HAMMER_SHORT_OFF_ENCODE(offset)) |
| 188 | |
| 189 | /* |
| 190 | * Large-Block backing store |
| 191 | * |
| 192 | * A blockmap is a two-level map which translates a blockmap-backed zone |
| 193 | * offset into a raw zone 2 offset. Each layer handles 18 bits. The 8M |
| 194 | * large-block size is 23 bits so two layers gives us 23+18+18 = 59 bits |
| 195 | * of address space. |
| 196 | */ |
| 197 | #define HAMMER_LARGEBLOCK_SIZE (8192 * 1024) |
| 198 | #define HAMMER_LARGEBLOCK_SIZE64 ((u_int64_t)HAMMER_LARGEBLOCK_SIZE) |
| 199 | #define HAMMER_LARGEBLOCK_MASK (HAMMER_LARGEBLOCK_SIZE - 1) |
| 200 | #define HAMMER_LARGEBLOCK_MASK64 ((u_int64_t)HAMMER_LARGEBLOCK_SIZE - 1) |
| 201 | #define HAMMER_LARGEBLOCK_BITS 23 |
| 202 | #if (1 << HAMMER_LARGEBLOCK_BITS) != HAMMER_LARGEBLOCK_SIZE |
| 203 | #error "HAMMER_LARGEBLOCK_BITS BROKEN" |
| 204 | #endif |
| 205 | |
| 206 | #define HAMMER_BUFFERS_PER_LARGEBLOCK \ |
| 207 | (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE) |
| 208 | #define HAMMER_BUFFERS_PER_LARGEBLOCK_MASK \ |
| 209 | (HAMMER_BUFFERS_PER_LARGEBLOCK - 1) |
| 210 | #define HAMMER_BUFFERS_PER_LARGEBLOCK_MASK64 \ |
| 211 | ((hammer_off_t)HAMMER_BUFFERS_PER_LARGEBLOCK_MASK) |
| 212 | |
| 213 | /* |
| 214 | * Every blockmap has this root structure in the root volume header. |
| 215 | * |
| 216 | * NOTE: zone 3 (the undo FIFO) does not use phys_offset. first and next |
| 217 | * offsets represent the FIFO. |
| 218 | */ |
| 219 | struct hammer_blockmap { |
| 220 | hammer_off_t phys_offset; /* zone-2 physical offset */ |
| 221 | hammer_off_t first_offset; /* zone-X logical offset (zone 3) */ |
| 222 | hammer_off_t next_offset; /* zone-X logical offset */ |
| 223 | hammer_off_t alloc_offset; /* zone-X logical offset */ |
| 224 | u_int32_t reserved01; |
| 225 | hammer_crc_t entry_crc; |
| 226 | }; |
| 227 | |
| 228 | typedef struct hammer_blockmap *hammer_blockmap_t; |
| 229 | |
| 230 | #define HAMMER_BLOCKMAP_CRCSIZE \ |
| 231 | offsetof(struct hammer_blockmap, entry_crc) |
| 232 | |
| 233 | /* |
| 234 | * The blockmap is a 2-layer entity made up of big-blocks. The first layer |
| 235 | * contains 262144 32-byte entries (18 bits), the second layer contains |
| 236 | * 524288 16-byte entries (19 bits), representing 8MB (23 bit) blockmaps. |
| 237 | * 18+19+23 = 60 bits. The top four bits are the zone id. |
| 238 | * |
| 239 | * Layer 2 encodes the physical bigblock mapping for a blockmap. The freemap |
| 240 | * uses this field to encode the virtual blockmap offset that allocated the |
| 241 | * physical block. |
| 242 | * |
| 243 | * NOTE: The freemap maps the vol_no in the upper 8 bits of layer1. |
| 244 | * |
| 245 | * zone-4 blockmap offset: [z:4][layer1:18][layer2:19][bigblock:23] |
| 246 | */ |
| 247 | struct hammer_blockmap_layer1 { |
| 248 | hammer_off_t blocks_free; /* big-blocks free */ |
| 249 | hammer_off_t phys_offset; /* UNAVAIL or zone-2 */ |
| 250 | hammer_off_t reserved01; |
| 251 | hammer_crc_t layer2_crc; /* xor'd crc's of HAMMER_BLOCKSIZE */ |
| 252 | /* (not yet used) */ |
| 253 | hammer_crc_t layer1_crc; /* MUST BE LAST FIELD OF STRUCTURE*/ |
| 254 | }; |
| 255 | |
| 256 | #define HAMMER_LAYER1_CRCSIZE \ |
| 257 | offsetof(struct hammer_blockmap_layer1, layer1_crc) |
| 258 | |
| 259 | struct hammer_blockmap_layer2 { |
| 260 | union { |
| 261 | hammer_off_t owner; /* used by freemap */ |
| 262 | hammer_off_t phys_offset; /* used by blockmap */ |
| 263 | } u; |
| 264 | u_int32_t bytes_free; /* bytes free within this bigblock */ |
| 265 | hammer_crc_t entry_crc; |
| 266 | }; |
| 267 | |
| 268 | #define HAMMER_LAYER2_CRCSIZE \ |
| 269 | offsetof(struct hammer_blockmap_layer2, entry_crc) |
| 270 | |
| 271 | #define HAMMER_BLOCKMAP_FREE 0ULL |
| 272 | #define HAMMER_BLOCKMAP_UNAVAIL ((hammer_off_t)-1LL) |
| 273 | |
| 274 | #define HAMMER_BLOCKMAP_RADIX1 /* 262144 (18) */ \ |
| 275 | (HAMMER_LARGEBLOCK_SIZE / sizeof(struct hammer_blockmap_layer1)) |
| 276 | #define HAMMER_BLOCKMAP_RADIX2 /* 524288 (19) */ \ |
| 277 | (HAMMER_LARGEBLOCK_SIZE / sizeof(struct hammer_blockmap_layer2)) |
| 278 | |
| 279 | #define HAMMER_BLOCKMAP_RADIX1_PERBUFFER \ |
| 280 | (HAMMER_BLOCKMAP_RADIX1 / (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE)) |
| 281 | #define HAMMER_BLOCKMAP_RADIX2_PERBUFFER \ |
| 282 | (HAMMER_BLOCKMAP_RADIX2 / (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE)) |
| 283 | |
| 284 | #define HAMMER_BLOCKMAP_LAYER1 /* 18+19+23 */ \ |
| 285 | (HAMMER_BLOCKMAP_RADIX1 * HAMMER_BLOCKMAP_LAYER2) |
| 286 | #define HAMMER_BLOCKMAP_LAYER2 /* 19+23 */ \ |
| 287 | (HAMMER_BLOCKMAP_RADIX2 * HAMMER_LARGEBLOCK_SIZE64) |
| 288 | |
| 289 | #define HAMMER_BLOCKMAP_LAYER1_MASK (HAMMER_BLOCKMAP_LAYER1 - 1) |
| 290 | #define HAMMER_BLOCKMAP_LAYER2_MASK (HAMMER_BLOCKMAP_LAYER2 - 1) |
| 291 | |
| 292 | /* |
| 293 | * byte offset within layer1 or layer2 big-block for the entry representing |
| 294 | * a zone-2 physical offset. |
| 295 | */ |
| 296 | #define HAMMER_BLOCKMAP_LAYER1_OFFSET(zone2_offset) \ |
| 297 | (((zone2_offset) & HAMMER_BLOCKMAP_LAYER1_MASK) / \ |
| 298 | HAMMER_BLOCKMAP_LAYER2 * sizeof(struct hammer_blockmap_layer1)) |
| 299 | |
| 300 | #define HAMMER_BLOCKMAP_LAYER2_OFFSET(zone2_offset) \ |
| 301 | (((zone2_offset) & HAMMER_BLOCKMAP_LAYER2_MASK) / \ |
| 302 | HAMMER_LARGEBLOCK_SIZE64 * sizeof(struct hammer_blockmap_layer2)) |
| 303 | |
| 304 | /* |
| 305 | * HAMMER UNDO parameters. The UNDO fifo is mapped directly in the volume |
| 306 | * header with an array of layer2 structures. A maximum of (64x8MB) = 512MB |
| 307 | * may be reserved. The size of the undo fifo is usually set a newfs time |
| 308 | * but can be adjusted if the filesystem is taken offline. |
| 309 | */ |
| 310 | |
| 311 | #define HAMMER_UNDO_LAYER2 64 /* max layer2 undo mapping entries */ |
| 312 | |
| 313 | /* |
| 314 | * All on-disk HAMMER structures which make up elements of the UNDO FIFO |
| 315 | * contain a hammer_fifo_head and hammer_fifo_tail structure. This structure |
| 316 | * contains all the information required to validate the fifo element |
| 317 | * and to scan the fifo in either direction. The head is typically embedded |
| 318 | * in higher level hammer on-disk structures while the tail is typically |
| 319 | * out-of-band. hdr_size is the size of the whole mess, including the tail. |
| 320 | * |
| 321 | * All undo structures are guaranteed to not cross a 16K filesystem |
| 322 | * buffer boundary. Most undo structures are fairly small. Data spaces |
| 323 | * are not immediately reused by HAMMER so file data is not usually recorded |
| 324 | * as part of an UNDO. |
| 325 | * |
| 326 | * PAD elements are allowed to take up only 8 bytes of space as a special |
| 327 | * case, containing only hdr_signature, hdr_type, and hdr_size fields, |
| 328 | * and with the tail overloaded onto the head structure for 8 bytes total. |
| 329 | * |
| 330 | * Every undo record has a sequence number. This number is unrelated to |
| 331 | * transaction ids and instead collects the undo transactions associated |
| 332 | * with a single atomic operation. A larger transactional operation, such |
| 333 | * as a remove(), may consist of several smaller atomic operations |
| 334 | * representing raw meta-data operations. |
| 335 | */ |
| 336 | #define HAMMER_HEAD_ONDISK_SIZE 32 |
| 337 | #define HAMMER_HEAD_ALIGN 8 |
| 338 | #define HAMMER_HEAD_ALIGN_MASK (HAMMER_HEAD_ALIGN - 1) |
| 339 | #define HAMMER_TAIL_ONDISK_SIZE 8 |
| 340 | |
| 341 | struct hammer_fifo_head { |
| 342 | u_int16_t hdr_signature; |
| 343 | u_int16_t hdr_type; |
| 344 | u_int32_t hdr_size; /* aligned size of the whole mess */ |
| 345 | u_int32_t reserved01; /* (0) reserved for future use */ |
| 346 | hammer_crc_t hdr_crc; /* XOR crc up to field w/ crc after field */ |
| 347 | }; |
| 348 | |
| 349 | #define HAMMER_FIFO_HEAD_CRCOFF offsetof(struct hammer_fifo_head, hdr_crc) |
| 350 | |
| 351 | struct hammer_fifo_tail { |
| 352 | u_int16_t tail_signature; |
| 353 | u_int16_t tail_type; |
| 354 | u_int32_t tail_size; /* aligned size of the whole mess */ |
| 355 | }; |
| 356 | |
| 357 | typedef struct hammer_fifo_head *hammer_fifo_head_t; |
| 358 | typedef struct hammer_fifo_tail *hammer_fifo_tail_t; |
| 359 | |
| 360 | /* |
| 361 | * Fifo header types. |
| 362 | */ |
| 363 | #define HAMMER_HEAD_TYPE_PAD (0x0040U|HAMMER_HEAD_FLAG_FREE) |
| 364 | #define HAMMER_HEAD_TYPE_VOL 0x0041U /* Volume (dummy header) */ |
| 365 | #define HAMMER_HEAD_TYPE_BTREE 0x0042U /* B-Tree node */ |
| 366 | #define HAMMER_HEAD_TYPE_UNDO 0x0043U /* random UNDO information */ |
| 367 | #define HAMMER_HEAD_TYPE_DELETE 0x0044U /* record deletion */ |
| 368 | #define HAMMER_HEAD_TYPE_RECORD 0x0045U /* Filesystem record */ |
| 369 | |
| 370 | #define HAMMER_HEAD_FLAG_FREE 0x8000U /* Indicates object freed */ |
| 371 | |
| 372 | #define HAMMER_HEAD_SIGNATURE 0xC84EU |
| 373 | #define HAMMER_TAIL_SIGNATURE 0xC74FU |
| 374 | |
| 375 | #define HAMMER_HEAD_SEQ_BEG 0x80000000U |
| 376 | #define HAMMER_HEAD_SEQ_END 0x40000000U |
| 377 | #define HAMMER_HEAD_SEQ_MASK 0x3FFFFFFFU |
| 378 | |
| 379 | /* |
| 380 | * Misc FIFO structures. |
| 381 | */ |
| 382 | struct hammer_fifo_undo { |
| 383 | struct hammer_fifo_head head; |
| 384 | hammer_off_t undo_offset; /* zone-1 offset */ |
| 385 | int32_t undo_data_bytes; |
| 386 | int32_t undo_reserved01; |
| 387 | /* followed by data */ |
| 388 | }; |
| 389 | |
| 390 | typedef struct hammer_fifo_undo *hammer_fifo_undo_t; |
| 391 | |
| 392 | struct hammer_fifo_buf_commit { |
| 393 | hammer_off_t undo_offset; |
| 394 | }; |
| 395 | |
| 396 | /* |
| 397 | * Volume header types |
| 398 | */ |
| 399 | #define HAMMER_FSBUF_VOLUME 0xC8414D4DC5523031ULL /* HAMMER01 */ |
| 400 | #define HAMMER_FSBUF_VOLUME_REV 0x313052C54D4D41C8ULL /* (reverse endian) */ |
| 401 | |
| 402 | /* |
| 403 | * The B-Tree structures need hammer_fsbuf_head. |
| 404 | */ |
| 405 | #include "hammer_btree.h" |
| 406 | |
| 407 | /* |
| 408 | * HAMMER Volume header |
| 409 | * |
| 410 | * A HAMMER filesystem is built from any number of block devices, Each block |
| 411 | * device contains a volume header followed by however many buffers fit |
| 412 | * into the volume. |
| 413 | * |
| 414 | * One of the volumes making up a HAMMER filesystem is the master, the |
| 415 | * rest are slaves. It does not have to be volume #0. |
| 416 | * |
| 417 | * The volume header takes up an entire 16K filesystem buffer and may |
| 418 | * represent up to 64KTB (65536 TB) of space. |
| 419 | * |
| 420 | * Special field notes: |
| 421 | * |
| 422 | * vol_bot_beg - offset of boot area (mem_beg - bot_beg bytes) |
| 423 | * vol_mem_beg - offset of memory log (clu_beg - mem_beg bytes) |
| 424 | * vol_buf_beg - offset of the first buffer. |
| 425 | * |
| 426 | * The memory log area allows a kernel to cache new records and data |
| 427 | * in memory without allocating space in the actual filesystem to hold |
| 428 | * the records and data. In the event that a filesystem becomes full, |
| 429 | * any records remaining in memory can be flushed to the memory log |
| 430 | * area. This allows the kernel to immediately return success. |
| 431 | */ |
| 432 | |
| 433 | #define HAMMER_BOOT_MINBYTES (32*1024) |
| 434 | #define HAMMER_BOOT_NOMBYTES (64LL*1024*1024) |
| 435 | #define HAMMER_BOOT_MAXBYTES (256LL*1024*1024) |
| 436 | |
| 437 | #define HAMMER_MEM_MINBYTES (256*1024) |
| 438 | #define HAMMER_MEM_NOMBYTES (1LL*1024*1024*1024) |
| 439 | #define HAMMER_MEM_MAXBYTES (64LL*1024*1024*1024) |
| 440 | |
| 441 | struct hammer_volume_ondisk { |
| 442 | u_int64_t vol_signature;/* Signature */ |
| 443 | |
| 444 | int64_t vol_bot_beg; /* byte offset of boot area or 0 */ |
| 445 | int64_t vol_mem_beg; /* byte offset of memory log or 0 */ |
| 446 | int64_t vol_buf_beg; /* byte offset of first buffer in volume */ |
| 447 | int64_t vol_buf_end; /* byte offset of volume EOF (on buf bndry) */ |
| 448 | int64_t vol_locked; /* reserved clusters are >= this offset */ |
| 449 | |
| 450 | uuid_t vol_fsid; /* identify filesystem */ |
| 451 | uuid_t vol_fstype; /* identify filesystem type */ |
| 452 | char vol_name[64]; /* Name of volume */ |
| 453 | |
| 454 | int32_t vol_no; /* volume number within filesystem */ |
| 455 | int32_t vol_count; /* number of volumes making up FS */ |
| 456 | |
| 457 | u_int32_t vol_version; /* version control information */ |
| 458 | hammer_crc_t vol_crc; /* header crc */ |
| 459 | u_int32_t vol_flags; /* volume flags */ |
| 460 | u_int32_t vol_rootvol; /* which volume is the root volume? */ |
| 461 | |
| 462 | int32_t vol_reserved04; |
| 463 | int32_t vol_reserved05; |
| 464 | u_int32_t vol_reserved06; |
| 465 | u_int32_t vol_reserved07; |
| 466 | |
| 467 | int32_t vol_blocksize; /* for statfs only */ |
| 468 | int32_t vol_reserved08; |
| 469 | int64_t vol_nblocks; /* total allocatable hammer bufs */ |
| 470 | |
| 471 | /* |
| 472 | * These fields are initialized and space is reserved in every |
| 473 | * volume making up a HAMMER filesytem, but only the master volume |
| 474 | * contains valid data. |
| 475 | */ |
| 476 | int64_t vol0_stat_bigblocks; /* total bigblocks when fs is empty */ |
| 477 | int64_t vol0_stat_freebigblocks;/* number of free bigblocks */ |
| 478 | int64_t vol0_stat_bytes; /* for statfs only */ |
| 479 | int64_t vol0_stat_inodes; /* for statfs only */ |
| 480 | int64_t vol0_stat_records; /* total records in filesystem */ |
| 481 | hammer_off_t vol0_btree_root; /* B-Tree root */ |
| 482 | hammer_tid_t vol0_next_tid; /* highest synchronized TID */ |
| 483 | hammer_off_t vol0_zone_limit; /* limit the zone size */ |
| 484 | |
| 485 | /* |
| 486 | * Blockmaps for zones. Not all zones use a blockmap. Note that |
| 487 | * the entire root blockmap is cached in the hammer_mount structure. |
| 488 | */ |
| 489 | struct hammer_blockmap vol0_blockmap[HAMMER_MAX_ZONES]; |
| 490 | |
| 491 | /* |
| 492 | * Layer-2 array for undo fifo |
| 493 | */ |
| 494 | struct hammer_blockmap_layer2 vol0_undo_array[HAMMER_UNDO_LAYER2]; |
| 495 | |
| 496 | }; |
| 497 | |
| 498 | typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t; |
| 499 | |
| 500 | #define HAMMER_VOLF_VALID 0x0001 /* valid entry */ |
| 501 | #define HAMMER_VOLF_OPEN 0x0002 /* volume is open */ |
| 502 | |
| 503 | #define HAMMER_VOL_CRCSIZE1 \ |
| 504 | offsetof(struct hammer_volume_ondisk, vol_crc) |
| 505 | #define HAMMER_VOL_CRCSIZE2 \ |
| 506 | (sizeof(struct hammer_volume_ondisk) - HAMMER_VOL_CRCSIZE1 - \ |
| 507 | sizeof(hammer_crc_t)) |
| 508 | |
| 509 | /* |
| 510 | * Record types are fairly straightforward. The B-Tree includes the record |
| 511 | * type in its index sort. |
| 512 | * |
| 513 | * In particular please note that it is possible to create a pseudo- |
| 514 | * filesystem within a HAMMER filesystem by creating a special object |
| 515 | * type within a directory. Pseudo-filesystems are used as replication |
| 516 | * targets and even though they are built within a HAMMER filesystem they |
| 517 | * get their own obj_id space (and thus can serve as a replication target) |
| 518 | * and look like a mount point to the system. |
| 519 | * |
| 520 | * NOTE: hammer_ip_delete_range_all() deletes all record types greater |
| 521 | * then HAMMER_RECTYPE_INODE. |
| 522 | */ |
| 523 | #define HAMMER_RECTYPE_UNKNOWN 0 |
| 524 | #define HAMMER_RECTYPE_LOWEST 1 /* lowest record type avail */ |
| 525 | #define HAMMER_RECTYPE_INODE 1 /* inode in obj_id space */ |
| 526 | #define HAMMER_RECTYPE_PSEUDO_INODE 2 /* pseudo filesysem */ |
| 527 | #define HAMMER_RECTYPE_CLUSTER 3 /* inter-cluster reference */ |
| 528 | #define HAMMER_RECTYPE_DATA 0x0010 |
| 529 | #define HAMMER_RECTYPE_DIRENTRY 0x0011 |
| 530 | #define HAMMER_RECTYPE_DB 0x0012 |
| 531 | #define HAMMER_RECTYPE_EXT 0x0013 /* ext attributes */ |
| 532 | #define HAMMER_RECTYPE_FIX 0x0014 /* fixed attribute */ |
| 533 | #define HAMMER_RECTYPE_MOVED 0x8000 /* special recovery flag */ |
| 534 | |
| 535 | #define HAMMER_FIXKEY_SYMLINK 1 |
| 536 | |
| 537 | #define HAMMER_OBJTYPE_UNKNOWN 0 /* (never exists on-disk) */ |
| 538 | #define HAMMER_OBJTYPE_DIRECTORY 1 |
| 539 | #define HAMMER_OBJTYPE_REGFILE 2 |
| 540 | #define HAMMER_OBJTYPE_DBFILE 3 |
| 541 | #define HAMMER_OBJTYPE_FIFO 4 |
| 542 | #define HAMMER_OBJTYPE_CDEV 5 |
| 543 | #define HAMMER_OBJTYPE_BDEV 6 |
| 544 | #define HAMMER_OBJTYPE_SOFTLINK 7 |
| 545 | #define HAMMER_OBJTYPE_PSEUDOFS 8 /* pseudo filesystem obj */ |
| 546 | |
| 547 | /* |
| 548 | * HAMMER inode attribute data |
| 549 | * |
| 550 | * The data reference for a HAMMER inode points to this structure. Any |
| 551 | * modifications to the contents of this structure will result in a |
| 552 | * replacement operation. |
| 553 | * |
| 554 | * parent_obj_id is only valid for directories (which cannot be hard-linked), |
| 555 | * and specifies the parent directory obj_id. This field will also be set |
| 556 | * for non-directory inodes as a recovery aid, but can wind up specifying |
| 557 | * stale information. However, since object id's are not reused, the worse |
| 558 | * that happens is that the recovery code is unable to use it. |
| 559 | * |
| 560 | * NOTE: atime is stored in the inode's B-Tree element and not in the inode |
| 561 | * data. This allows the atime to be updated without having to lay down a |
| 562 | * new record. |
| 563 | */ |
| 564 | struct hammer_inode_data { |
| 565 | u_int16_t version; /* inode data version */ |
| 566 | u_int16_t mode; /* basic unix permissions */ |
| 567 | u_int32_t uflags; /* chflags */ |
| 568 | u_int32_t rmajor; /* used by device nodes */ |
| 569 | u_int32_t rminor; /* used by device nodes */ |
| 570 | u_int64_t ctime; |
| 571 | u_int64_t parent_obj_id;/* parent directory obj_id */ |
| 572 | uuid_t uid; |
| 573 | uuid_t gid; |
| 574 | |
| 575 | u_int8_t obj_type; |
| 576 | u_int8_t reserved01; |
| 577 | u_int16_t reserved02; |
| 578 | u_int32_t reserved03; |
| 579 | u_int64_t mtime; |
| 580 | u_int64_t size; /* filesystem object size */ |
| 581 | u_int64_t nlinks; /* hard links */ |
| 582 | u_int64_t reserved04; |
| 583 | union { |
| 584 | char reserved06[24]; |
| 585 | char symlink[24]; /* HAMMER_INODE_BASESYMLEN */ |
| 586 | } ext; |
| 587 | }; |
| 588 | |
| 589 | #define HAMMER_INODE_DATA_VERSION 1 |
| 590 | #define HAMMER_OBJID_ROOT 1 |
| 591 | #define HAMMER_INODE_BASESYMLEN 24 |
| 592 | |
| 593 | /* |
| 594 | * A directory entry specifies the HAMMER filesystem object id, a copy of |
| 595 | * the file type, and file name (either embedded or as out-of-band data). |
| 596 | * If the file name is short enough to fit into den_name[] (including a |
| 597 | * terminating nul) then it will be embedded in the record, otherwise it |
| 598 | * is stored out-of-band. The base record's data reference always points |
| 599 | * to the nul-terminated filename regardless. |
| 600 | * |
| 601 | * Directory entries are indexed with a 128 bit namekey rather then an |
| 602 | * offset. A portion of the namekey is an iterator or randomizer to deal |
| 603 | * with collisions. |
| 604 | * |
| 605 | * NOTE: base.base.obj_type holds the filesystem object type of obj_id, |
| 606 | * e.g. a den_type equivalent. |
| 607 | * |
| 608 | * NOTE: den_name / the filename data reference is NOT terminated with \0. |
| 609 | * |
| 610 | */ |
| 611 | struct hammer_entry_data { |
| 612 | u_int64_t obj_id; /* object being referenced */ |
| 613 | u_int64_t reserved01; |
| 614 | char name[16]; /* name (extended) */ |
| 615 | }; |
| 616 | |
| 617 | #define HAMMER_ENTRY_NAME_OFF offsetof(struct hammer_entry_data, name[0]) |
| 618 | #define HAMMER_ENTRY_SIZE(nlen) offsetof(struct hammer_entry_data, name[nlen]) |
| 619 | |
| 620 | struct hammer_symlink_data { |
| 621 | char name[16]; |
| 622 | }; |
| 623 | |
| 624 | #define HAMMER_SYMLINK_NAME_OFF offsetof(struct hammer_symlink_data, name[0]) |
| 625 | |
| 626 | /* |
| 627 | * Rollup various structures embedded as record data |
| 628 | */ |
| 629 | union hammer_data_ondisk { |
| 630 | struct hammer_entry_data entry; |
| 631 | struct hammer_inode_data inode; |
| 632 | struct hammer_symlink_data symlink; |
| 633 | }; |
| 634 | |
| 635 | typedef union hammer_data_ondisk *hammer_data_ondisk_t; |
| 636 | |
| 637 | #endif |