HAMMER 28/many: Implement zoned blockmap
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
CommitLineData
427e5fc6
MD
1/*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
40043e7f 34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.29 2008/02/10 09:51:01 dillon Exp $
427e5fc6
MD
35 */
36/*
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
40 */
41
42#include "hammer.h"
43#include <sys/fcntl.h>
44#include <sys/nlookup.h>
45#include <sys/buf.h>
46#include <sys/buf2.h>
47
8cd0a023
MD
48static void hammer_free_volume(hammer_volume_t volume);
49static int hammer_load_volume(hammer_volume_t volume);
47197d71 50static int hammer_load_buffer(hammer_buffer_t buffer, int isnew);
055f5ff8 51static int hammer_load_node(hammer_node_t node);
40043e7f 52#if 0
47197d71
MD
53static hammer_off_t hammer_advance_fifo(hammer_volume_t volume,
54 hammer_off_t off, int32_t bytes);
55
56static hammer_off_t hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len,
57 int32_t data_len, struct hammer_buffer **rec_bufferp,
58 u_int16_t hdr_type, int can_cross,
59 struct hammer_buffer **data2_bufferp, int *errorp);
40043e7f 60#endif
427e5fc6
MD
61
62/*
63 * Red-Black tree support for various structures
64 */
65static int
8cd0a023 66hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
427e5fc6
MD
67{
68 if (ip1->obj_id < ip2->obj_id)
69 return(-1);
70 if (ip1->obj_id > ip2->obj_id)
71 return(1);
72 if (ip1->obj_asof < ip2->obj_asof)
73 return(-1);
74 if (ip1->obj_asof > ip2->obj_asof)
75 return(1);
76 return(0);
77}
78
79static int
8cd0a023 80hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
427e5fc6
MD
81{
82 if (info->obj_id < ip->obj_id)
83 return(-1);
84 if (info->obj_id > ip->obj_id)
85 return(1);
86 if (info->obj_asof < ip->obj_asof)
87 return(-1);
88 if (info->obj_asof > ip->obj_asof)
89 return(1);
90 return(0);
91}
92
93static int
8cd0a023 94hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
427e5fc6
MD
95{
96 if (vol1->vol_no < vol2->vol_no)
97 return(-1);
98 if (vol1->vol_no > vol2->vol_no)
99 return(1);
100 return(0);
101}
102
427e5fc6 103static int
8cd0a023 104hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
427e5fc6 105{
47197d71 106 if (buf1->buf_offset < buf2->buf_offset)
427e5fc6 107 return(-1);
47197d71 108 if (buf1->buf_offset > buf2->buf_offset)
427e5fc6
MD
109 return(1);
110 return(0);
111}
112
8cd0a023
MD
113static int
114hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
115{
116 if (node1->node_offset < node2->node_offset)
117 return(-1);
c0ade690 118 if (node1->node_offset > node2->node_offset)
8cd0a023
MD
119 return(1);
120 return(0);
121}
122
427e5fc6
MD
123/*
124 * Note: The lookup function for hammer_ino_rb_tree winds up being named
125 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
47197d71 126 * functions are normal, e.g. hammer_buf_rb_tree_RB_LOOKUP(root, buf_offset).
427e5fc6
MD
127 */
128RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
129RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
130 hammer_inode_info_cmp, hammer_inode_info_t);
131RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
132 hammer_vol_rb_compare, int32_t, vol_no);
427e5fc6 133RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
47197d71 134 hammer_buf_rb_compare, hammer_off_t, buf_offset);
8cd0a023 135RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
47197d71 136 hammer_nod_rb_compare, hammer_off_t, node_offset);
427e5fc6 137
8cd0a023
MD
138/************************************************************************
139 * VOLUMES *
140 ************************************************************************
141 *
427e5fc6
MD
142 * Load a HAMMER volume by name. Returns 0 on success or a positive error
143 * code on failure. Volumes must be loaded at mount time, get_volume() will
144 * not load a new volume.
145 *
146 * Calls made to hammer_load_volume() or single-threaded
147 */
148int
8cd0a023 149hammer_install_volume(struct hammer_mount *hmp, const char *volname)
427e5fc6
MD
150{
151 struct mount *mp;
8cd0a023 152 hammer_volume_t volume;
427e5fc6
MD
153 struct hammer_volume_ondisk *ondisk;
154 struct nlookupdata nd;
155 struct buf *bp = NULL;
156 int error;
157 int ronly;
158
159 mp = hmp->mp;
160 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
161
162 /*
163 * Allocate a volume structure
164 */
b3deaf57 165 ++hammer_count_volumes;
427e5fc6
MD
166 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
167 volume->vol_name = kstrdup(volname, M_HAMMER);
168 volume->hmp = hmp;
055f5ff8 169 hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
66325755 170 volume->io.offset = 0LL;
427e5fc6
MD
171
172 /*
173 * Get the device vnode
174 */
175 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
176 if (error == 0)
177 error = nlookup(&nd);
178 if (error == 0)
179 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
180 nlookup_done(&nd);
181 if (error == 0) {
42c7d26b
MD
182 if (vn_isdisk(volume->devvp, &error)) {
183 error = vfs_mountedon(volume->devvp);
184 }
185 }
186 if (error == 0 &&
187 count_udev(volume->devvp->v_umajor, volume->devvp->v_uminor) > 0) {
188 error = EBUSY;
427e5fc6
MD
189 }
190 if (error == 0) {
191 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
42c7d26b
MD
192 error = vinvalbuf(volume->devvp, V_SAVE, 0, 0);
193 if (error == 0) {
194 error = VOP_OPEN(volume->devvp,
195 (ronly ? FREAD : FREAD|FWRITE),
196 FSCRED, NULL);
197 }
427e5fc6
MD
198 vn_unlock(volume->devvp);
199 }
200 if (error) {
201 hammer_free_volume(volume);
202 return(error);
203 }
42c7d26b 204 volume->devvp->v_rdev->si_mountpoint = mp;
427e5fc6
MD
205
206 /*
207 * Extract the volume number from the volume header and do various
208 * sanity checks.
209 */
210 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
211 if (error)
212 goto late_failure;
213 ondisk = (void *)bp->b_data;
47197d71 214 if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
427e5fc6
MD
215 kprintf("hammer_mount: volume %s has an invalid header\n",
216 volume->vol_name);
217 error = EFTYPE;
218 goto late_failure;
219 }
220 volume->vol_no = ondisk->vol_no;
47197d71 221 volume->buffer_base = ondisk->vol_buf_beg;
427e5fc6 222 volume->vol_flags = ondisk->vol_flags;
fbc6e32a 223 volume->nblocks = ondisk->vol_nblocks;
47197d71
MD
224 volume->maxbuf_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
225 ondisk->vol_buf_end - ondisk->vol_buf_beg);
226 RB_INIT(&volume->rb_bufs_root);
427e5fc6 227
fbc6e32a
MD
228 hmp->mp->mnt_stat.f_blocks += volume->nblocks;
229
427e5fc6
MD
230 if (RB_EMPTY(&hmp->rb_vols_root)) {
231 hmp->fsid = ondisk->vol_fsid;
232 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
233 kprintf("hammer_mount: volume %s's fsid does not match "
234 "other volumes\n", volume->vol_name);
235 error = EFTYPE;
236 goto late_failure;
237 }
238
239 /*
240 * Insert the volume structure into the red-black tree.
241 */
242 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
243 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
244 volume->vol_name, volume->vol_no);
245 error = EEXIST;
246 }
247
248 /*
47197d71 249 * Set the root volume . HAMMER special cases rootvol the structure.
8cd0a023
MD
250 * We do not hold a ref because this would prevent related I/O
251 * from being flushed.
427e5fc6
MD
252 */
253 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
254 hmp->rootvol = volume;
9944ae54
MD
255 if (bp) {
256 brelse(bp);
257 bp = NULL;
258 }
66325755 259 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
427e5fc6
MD
260 }
261late_failure:
262 if (bp)
263 brelse(bp);
264 if (error) {
265 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
266 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
267 hammer_free_volume(volume);
268 }
269 return (error);
270}
271
272/*
273 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
274 * so returns -1 on failure.
275 */
276int
8cd0a023 277hammer_unload_volume(hammer_volume_t volume, void *data __unused)
427e5fc6
MD
278{
279 struct hammer_mount *hmp = volume->hmp;
66325755 280 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
427e5fc6
MD
281
282 /*
283 * Sync clusters, sync volume
284 */
27ea2398 285
fbc6e32a 286 hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
a89aec1b 287
27ea2398 288 /*
47197d71 289 * Clean up the root volume pointer, which is held unlocked in hmp.
27ea2398 290 */
47197d71 291 if (hmp->rootvol == volume)
27ea2398 292 hmp->rootvol = NULL;
27ea2398 293
66325755 294 /*
a89aec1b
MD
295 * Unload clusters and super-clusters. Unloading a super-cluster
296 * also unloads related clusters, but the filesystem may not be
297 * using super-clusters so unload clusters anyway.
298 */
47197d71
MD
299 RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL,
300 hammer_unload_buffer, NULL);
055f5ff8 301 hammer_io_waitdep(&volume->io);
a89aec1b
MD
302
303 /*
304 * Release our buffer and flush anything left in the buffer cache.
66325755 305 */
055f5ff8 306 hammer_io_release(&volume->io, 2);
a89aec1b
MD
307
308 /*
fbc6e32a
MD
309 * There should be no references on the volume, no clusters, and
310 * no super-clusters.
a89aec1b
MD
311 */
312 KKASSERT(volume->io.lock.refs == 0);
47197d71 313 KKASSERT(RB_EMPTY(&volume->rb_bufs_root));
a89aec1b 314
66325755
MD
315 volume->ondisk = NULL;
316 if (volume->devvp) {
317 if (ronly) {
318 vinvalbuf(volume->devvp, 0, 0, 0);
319 VOP_CLOSE(volume->devvp, FREAD);
320 } else {
321 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
322 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
323 }
324 }
427e5fc6
MD
325
326 /*
327 * Destroy the structure
328 */
329 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
330 hammer_free_volume(volume);
331 return(0);
332}
333
334static
335void
8cd0a023 336hammer_free_volume(hammer_volume_t volume)
427e5fc6
MD
337{
338 if (volume->vol_name) {
339 kfree(volume->vol_name, M_HAMMER);
340 volume->vol_name = NULL;
341 }
342 if (volume->devvp) {
42c7d26b
MD
343 if (vn_isdisk(volume->devvp, NULL) &&
344 volume->devvp->v_rdev &&
345 volume->devvp->v_rdev->si_mountpoint == volume->hmp->mp
346 ) {
347 volume->devvp->v_rdev->si_mountpoint = NULL;
348 }
427e5fc6
MD
349 vrele(volume->devvp);
350 volume->devvp = NULL;
351 }
b3deaf57 352 --hammer_count_volumes;
427e5fc6
MD
353 kfree(volume, M_HAMMER);
354}
355
356/*
357 * Get a HAMMER volume. The volume must already exist.
358 */
8cd0a023 359hammer_volume_t
427e5fc6
MD
360hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
361{
362 struct hammer_volume *volume;
427e5fc6
MD
363
364 /*
365 * Locate the volume structure
366 */
367 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
368 if (volume == NULL) {
369 *errorp = ENOENT;
370 return(NULL);
371 }
8cd0a023 372 hammer_ref(&volume->io.lock);
427e5fc6
MD
373
374 /*
8cd0a023 375 * Deal with on-disk info
427e5fc6 376 */
b33e2cc0 377 if (volume->ondisk == NULL || volume->io.loading) {
8cd0a023 378 *errorp = hammer_load_volume(volume);
427e5fc6 379 if (*errorp) {
8cd0a023
MD
380 hammer_rel_volume(volume, 1);
381 volume = NULL;
382 }
383 } else {
384 *errorp = 0;
385 }
386 return(volume);
387}
388
fbc6e32a
MD
389int
390hammer_ref_volume(hammer_volume_t volume)
391{
392 int error;
393
394 hammer_ref(&volume->io.lock);
395
396 /*
397 * Deal with on-disk info
398 */
b33e2cc0 399 if (volume->ondisk == NULL || volume->io.loading) {
fbc6e32a
MD
400 error = hammer_load_volume(volume);
401 if (error)
402 hammer_rel_volume(volume, 1);
403 } else {
404 error = 0;
405 }
406 return (error);
407}
408
8cd0a023
MD
409hammer_volume_t
410hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
411{
412 hammer_volume_t volume;
413
414 volume = hmp->rootvol;
415 KKASSERT(volume != NULL);
416 hammer_ref(&volume->io.lock);
417
418 /*
419 * Deal with on-disk info
420 */
b33e2cc0 421 if (volume->ondisk == NULL || volume->io.loading) {
8cd0a023
MD
422 *errorp = hammer_load_volume(volume);
423 if (*errorp) {
424 hammer_rel_volume(volume, 1);
425 volume = NULL;
426 }
427 } else {
428 *errorp = 0;
429 }
430 return (volume);
431}
432
433/*
434 * Load a volume's on-disk information. The volume must be referenced and
435 * not locked. We temporarily acquire an exclusive lock to interlock
436 * against releases or multiple get's.
437 */
438static int
439hammer_load_volume(hammer_volume_t volume)
440{
441 struct hammer_volume_ondisk *ondisk;
442 int error;
443
444 hammer_lock_ex(&volume->io.lock);
b33e2cc0 445 KKASSERT(volume->io.loading == 0);
eaeff70d 446 volume->io.loading = 1;
b33e2cc0 447
8cd0a023
MD
448 if (volume->ondisk == NULL) {
449 error = hammer_io_read(volume->devvp, &volume->io);
450 if (error) {
eaeff70d 451 volume->io.loading = 0;
66325755 452 hammer_unlock(&volume->io.lock);
8cd0a023 453 return (error);
427e5fc6 454 }
66325755 455 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
8cd0a023
MD
456 } else {
457 error = 0;
427e5fc6 458 }
eaeff70d 459 volume->io.loading = 0;
8cd0a023
MD
460 hammer_unlock(&volume->io.lock);
461 return(0);
427e5fc6
MD
462}
463
66325755 464/*
8cd0a023
MD
465 * Release a volume. Call hammer_io_release on the last reference. We have
466 * to acquire an exclusive lock to interlock against volume->ondisk tests
fbc6e32a
MD
467 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
468 * lock to be held.
469 *
470 * Volumes are not unloaded from memory during normal operation.
66325755 471 */
427e5fc6 472void
8cd0a023 473hammer_rel_volume(hammer_volume_t volume, int flush)
427e5fc6 474{
fbc6e32a 475 if (volume->io.lock.refs == 1) {
8cd0a023 476 hammer_lock_ex(&volume->io.lock);
fbc6e32a 477 if (volume->io.lock.refs == 1) {
8cd0a023
MD
478 volume->ondisk = NULL;
479 hammer_io_release(&volume->io, flush);
055f5ff8
MD
480 } else if (flush) {
481 hammer_io_flush(&volume->io);
8cd0a023 482 }
66325755 483 hammer_unlock(&volume->io.lock);
427e5fc6 484 }
8cd0a023 485 hammer_unref(&volume->io.lock);
427e5fc6
MD
486}
487
8cd0a023 488/************************************************************************
47197d71 489 * BUFFERS *
8cd0a023
MD
490 ************************************************************************
491 *
40043e7f
MD
492 * Manage buffers. Currently all blockmap-backed zones are translated
493 * to zone-2 buffer offsets.
8cd0a023 494 */
47197d71
MD
495hammer_buffer_t
496hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset,
497 int isnew, int *errorp)
61aeeb33 498{
47197d71
MD
499 hammer_buffer_t buffer;
500 hammer_volume_t volume;
501 int vol_no;
40043e7f 502 int zone;
61aeeb33 503
40043e7f
MD
504 zone = HAMMER_ZONE_DECODE(buf_offset);
505 if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) {
506 buf_offset = hammer_blockmap_lookup(hmp, buf_offset, errorp);
507 KKASSERT(*errorp == 0);
508 }
47197d71 509 buf_offset &= ~HAMMER_BUFMASK64;
40043e7f
MD
510 KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) ==
511 HAMMER_ZONE_RAW_BUFFER);
47197d71
MD
512 vol_no = HAMMER_VOL_DECODE(buf_offset);
513 volume = hammer_get_volume(hmp, vol_no, errorp);
514 if (volume == NULL)
515 return(NULL);
40043e7f 516
47197d71
MD
517 /*
518 * NOTE: buf_offset and maxbuf_off are both full offset
519 * specifications.
520 */
521 KKASSERT(buf_offset < volume->maxbuf_off);
427e5fc6
MD
522
523 /*
47197d71 524 * Locate and lock the buffer structure, creating one if necessary.
427e5fc6
MD
525 */
526again:
47197d71
MD
527 buffer = RB_LOOKUP(hammer_buf_rb_tree, &volume->rb_bufs_root,
528 buf_offset);
529 if (buffer == NULL) {
530 ++hammer_count_buffers;
531 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
532 buffer->buf_offset = buf_offset;
533 buffer->volume = volume;
534 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
535 buffer->io.offset = volume->ondisk->vol_buf_beg +
536 (buf_offset & HAMMER_OFF_SHORT_MASK);
537 TAILQ_INIT(&buffer->clist);
538 hammer_ref(&buffer->io.lock);
427e5fc6
MD
539
540 /*
47197d71 541 * Insert the buffer into the RB tree and handle late
427e5fc6
MD
542 * collisions.
543 */
47197d71
MD
544 if (RB_INSERT(hammer_buf_rb_tree, &volume->rb_bufs_root, buffer)) {
545 hammer_unref(&buffer->io.lock);
546 --hammer_count_buffers;
547 kfree(buffer, M_HAMMER);
427e5fc6
MD
548 goto again;
549 }
66325755 550 hammer_ref(&volume->io.lock);
427e5fc6 551 } else {
47197d71 552 hammer_ref(&buffer->io.lock);
427e5fc6
MD
553 }
554
555 /*
8cd0a023 556 * Deal with on-disk info
427e5fc6 557 */
47197d71
MD
558 if (buffer->ondisk == NULL || buffer->io.loading) {
559 *errorp = hammer_load_buffer(buffer, isnew);
8cd0a023 560 if (*errorp) {
47197d71
MD
561 hammer_rel_buffer(buffer, 1);
562 buffer = NULL;
8cd0a023
MD
563 }
564 } else {
565 *errorp = 0;
566 }
47197d71
MD
567 hammer_rel_volume(volume, 0);
568 return(buffer);
8cd0a023
MD
569}
570
571static int
47197d71 572hammer_load_buffer(hammer_buffer_t buffer, int isnew)
8cd0a023 573{
47197d71
MD
574 hammer_volume_t volume;
575 void *ondisk;
8cd0a023
MD
576 int error;
577
47197d71
MD
578 /*
579 * Load the buffer's on-disk info
580 */
581 volume = buffer->volume;
582 hammer_lock_ex(&buffer->io.lock);
583 KKASSERT(buffer->io.loading == 0);
584 buffer->io.loading = 1;
b33e2cc0 585
47197d71
MD
586 if (buffer->ondisk == NULL) {
587 if (isnew) {
588 error = hammer_io_new(volume->devvp, &buffer->io);
589 } else {
590 error = hammer_io_read(volume->devvp, &buffer->io);
591 }
8cd0a023 592 if (error) {
47197d71
MD
593 buffer->io.loading = 0;
594 hammer_unlock(&buffer->io.lock);
8cd0a023 595 return (error);
427e5fc6 596 }
47197d71 597 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
7f7c1f84 598 } else if (isnew) {
47197d71 599 error = hammer_io_new(volume->devvp, &buffer->io);
7f7c1f84
MD
600 } else {
601 error = 0;
602 }
603 if (error == 0 && isnew) {
47197d71
MD
604 hammer_modify_buffer(buffer, NULL, 0);
605 /* additional initialization goes here */
427e5fc6 606 }
47197d71
MD
607 buffer->io.loading = 0;
608 hammer_unlock(&buffer->io.lock);
8cd0a023 609 return (error);
427e5fc6
MD
610}
611
a89aec1b
MD
612/*
613 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
614 */
615int
47197d71 616hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
a89aec1b 617{
47197d71
MD
618 hammer_ref(&buffer->io.lock);
619 hammer_flush_buffer_nodes(buffer);
620 KKASSERT(buffer->io.lock.refs == 1);
621 hammer_rel_buffer(buffer, 2);
a89aec1b
MD
622 return(0);
623}
624
8cd0a023 625/*
47197d71
MD
626 * Reference a buffer that is either already referenced or via a specially
627 * handled pointer (aka cursor->buffer).
628 */
629int
630hammer_ref_buffer(hammer_buffer_t buffer)
631{
632 int error;
633
634 hammer_ref(&buffer->io.lock);
635 if (buffer->ondisk == NULL || buffer->io.loading) {
636 error = hammer_load_buffer(buffer, 0);
637 if (error) {
638 hammer_rel_buffer(buffer, 1);
639 /*
640 * NOTE: buffer pointer can become stale after
641 * the above release.
642 */
643 }
644 } else {
645 error = 0;
646 }
647 return(error);
648}
649
650/*
651 * Release a buffer. We have to deal with several places where
652 * another thread can ref the buffer.
8cd0a023
MD
653 *
654 * Only destroy the structure itself if the related buffer cache buffer
655 * was disassociated from it. This ties the management of the structure
47197d71
MD
656 * to the buffer cache subsystem. buffer->ondisk determines whether the
657 * embedded io is referenced or not.
8cd0a023 658 */
427e5fc6 659void
47197d71 660hammer_rel_buffer(hammer_buffer_t buffer, int flush)
427e5fc6 661{
8cd0a023 662 hammer_volume_t volume;
66325755 663
47197d71
MD
664 if (buffer->io.lock.refs == 1) {
665 hammer_lock_ex(&buffer->io.lock);
666 if (buffer->io.lock.refs == 1) {
667 hammer_io_release(&buffer->io, flush);
668
669 if (buffer->io.bp == NULL &&
670 buffer->io.lock.refs == 1) {
671 hammer_flush_buffer_nodes(buffer);
672 KKASSERT(TAILQ_EMPTY(&buffer->clist));
673 volume = buffer->volume;
674 RB_REMOVE(hammer_buf_rb_tree,
675 &volume->rb_bufs_root, buffer);
676 buffer->volume = NULL; /* sanity */
677 --hammer_count_buffers;
678 kfree(buffer, M_HAMMER);
8cd0a023
MD
679 hammer_rel_volume(volume, 0);
680 return;
427e5fc6 681 }
055f5ff8 682 } else if (flush) {
47197d71 683 hammer_io_flush(&buffer->io);
427e5fc6 684 }
47197d71 685 hammer_unlock(&buffer->io.lock);
427e5fc6 686 }
47197d71 687 hammer_unref(&buffer->io.lock);
427e5fc6
MD
688}
689
47197d71
MD
690/*
691 * Access the filesystem buffer containing the specified hammer offset.
692 * buf_offset is a conglomeration of the volume number and vol_buf_beg
693 * relative buffer offset. It must also have bit 55 set to be valid.
694 * (see hammer_off_t in hammer_disk.h).
8cd0a023 695 *
47197d71
MD
696 * Any prior buffer in *bufferp will be released and replaced by the
697 * requested buffer.
8cd0a023 698 */
47197d71
MD
699void *
700hammer_bread(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp,
701 struct hammer_buffer **bufferp)
427e5fc6 702{
47197d71
MD
703 hammer_buffer_t buffer;
704 int32_t xoff = (int32_t)buf_offset & HAMMER_BUFMASK;
427e5fc6 705
47197d71 706 buf_offset &= ~HAMMER_BUFMASK64;
427e5fc6 707
47197d71
MD
708 buffer = *bufferp;
709 if (buffer == NULL || buffer->buf_offset != buf_offset) {
710 if (buffer)
711 hammer_rel_buffer(buffer, 0);
712 buffer = hammer_get_buffer(hmp, buf_offset, 0, errorp);
713 *bufferp = buffer;
427e5fc6 714 } else {
47197d71 715 *errorp = 0;
427e5fc6 716 }
8cd0a023
MD
717
718 /*
47197d71 719 * Return a pointer to the buffer data.
8cd0a023 720 */
47197d71
MD
721 if (buffer == NULL)
722 return(NULL);
723 else
724 return((char *)buffer->ondisk + xoff);
66325755
MD
725}
726
47197d71
MD
727/*
728 * Access the filesystem buffer containing the specified hammer offset.
729 * No disk read operation occurs. The result buffer may contain garbage.
730 *
731 * Any prior buffer in *bufferp will be released and replaced by the
732 * requested buffer.
733 */
734void *
735hammer_bnew(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp,
736 struct hammer_buffer **bufferp)
66325755 737{
47197d71
MD
738 hammer_buffer_t buffer;
739 int32_t xoff = (int32_t)buf_offset & HAMMER_BUFMASK;
66325755 740
47197d71 741 buf_offset &= ~HAMMER_BUFMASK64;
66325755 742
47197d71
MD
743 buffer = *bufferp;
744 if (buffer == NULL || buffer->buf_offset != buf_offset) {
745 if (buffer)
746 hammer_rel_buffer(buffer, 0);
747 buffer = hammer_get_buffer(hmp, buf_offset, 1, errorp);
748 *bufferp = buffer;
8cd0a023
MD
749 } else {
750 *errorp = 0;
751 }
47197d71
MD
752
753 /*
754 * Return a pointer to the buffer data.
755 */
756 if (buffer == NULL)
757 return(NULL);
758 else
759 return((char *)buffer->ondisk + xoff);
8cd0a023 760}
66325755 761
47197d71
MD
762/************************************************************************
763 * NODES *
764 ************************************************************************
765 *
766 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
767 * method used by the HAMMER filesystem.
768 *
769 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
770 * associated with its buffer, and will only referenced the buffer while
771 * the node itself is referenced.
772 *
773 * A hammer_node can also be passively associated with other HAMMER
774 * structures, such as inodes, while retaining 0 references. These
775 * associations can be cleared backwards using a pointer-to-pointer in
776 * the hammer_node.
777 *
778 * This allows the HAMMER implementation to cache hammer_nodes long-term
779 * and short-cut a great deal of the infrastructure's complexity. In
780 * most cases a cached node can be reacquired without having to dip into
781 * either the buffer or cluster management code.
782 *
783 * The caller must pass a referenced cluster on call and will retain
784 * ownership of the reference on return. The node will acquire its own
785 * additional references, if necessary.
786 */
787hammer_node_t
788hammer_get_node(hammer_mount_t hmp, hammer_off_t node_offset, int *errorp)
66325755 789{
47197d71 790 hammer_node_t node;
b33e2cc0 791
40043e7f 792 KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_BTREE);
b33e2cc0
MD
793
794 /*
47197d71 795 * Locate the structure, allocating one if necessary.
b33e2cc0 796 */
47197d71 797again:
40043e7f 798 node = RB_LOOKUP(hammer_nod_rb_tree, &hmp->rb_nods_root, node_offset);
47197d71
MD
799 if (node == NULL) {
800 ++hammer_count_nodes;
801 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
802 node->node_offset = node_offset;
40043e7f
MD
803 node->hmp = hmp;
804 if (RB_INSERT(hammer_nod_rb_tree, &hmp->rb_nods_root, node)) {
47197d71
MD
805 --hammer_count_nodes;
806 kfree(node, M_HAMMER);
807 goto again;
b33e2cc0 808 }
8cd0a023 809 }
47197d71
MD
810 hammer_ref(&node->lock);
811 *errorp = hammer_load_node(node);
812 if (*errorp) {
813 hammer_rel_node(node);
814 node = NULL;
815 }
47197d71 816 return(node);
8cd0a023
MD
817}
818
a89aec1b 819/*
47197d71 820 * Reference an already-referenced node.
a89aec1b
MD
821 */
822int
47197d71 823hammer_ref_node(hammer_node_t node)
eaeff70d 824{
47197d71 825 int error;
055f5ff8
MD
826
827 KKASSERT(node->lock.refs > 0);
828 hammer_ref(&node->lock);
829 if ((error = hammer_load_node(node)) != 0)
830 hammer_rel_node(node);
831 return(error);
832}
833
834/*
835 * Load a node's on-disk data reference.
836 */
837static int
838hammer_load_node(hammer_node_t node)
8cd0a023
MD
839{
840 hammer_buffer_t buffer;
8cd0a023
MD
841 int error;
842
055f5ff8
MD
843 if (node->ondisk)
844 return(0);
a89aec1b 845 error = 0;
055f5ff8 846 hammer_lock_ex(&node->lock);
8cd0a023 847 if (node->ondisk == NULL) {
055f5ff8
MD
848 /*
849 * This is a little confusing but the jist is that
850 * node->buffer determines whether the node is on
851 * the buffer's clist and node->ondisk determines
852 * whether the buffer is referenced.
853 */
854 if ((buffer = node->buffer) != NULL) {
855 error = hammer_ref_buffer(buffer);
856 } else {
40043e7f 857 buffer = hammer_get_buffer(node->hmp,
47197d71
MD
858 node->node_offset, 0,
859 &error);
055f5ff8
MD
860 if (buffer) {
861 KKASSERT(error == 0);
862 TAILQ_INSERT_TAIL(&buffer->clist,
863 node, entry);
864 node->buffer = buffer;
8cd0a023
MD
865 }
866 }
055f5ff8
MD
867 if (error == 0) {
868 node->ondisk = (void *)((char *)buffer->ondisk +
869 (node->node_offset & HAMMER_BUFMASK));
870 }
427e5fc6 871 }
055f5ff8 872 hammer_unlock(&node->lock);
8cd0a023 873 return (error);
427e5fc6
MD
874}
875
8cd0a023 876/*
055f5ff8
MD
877 * Safely reference a node, interlock against flushes via the IO subsystem.
878 */
879hammer_node_t
880hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
881 int *errorp)
882{
883 hammer_node_t node;
884
885 if ((node = *cache) != NULL)
886 hammer_ref(&node->lock);
887 if (node) {
888 *errorp = hammer_load_node(node);
889 if (*errorp) {
890 hammer_rel_node(node);
891 node = NULL;
892 }
893 } else {
894 *errorp = ENOENT;
895 }
896 return(node);
897}
898
899/*
900 * Release a hammer_node. On the last release the node dereferences
901 * its underlying buffer and may or may not be destroyed.
8cd0a023 902 */
427e5fc6 903void
8cd0a023
MD
904hammer_rel_node(hammer_node_t node)
905{
8cd0a023
MD
906 hammer_buffer_t buffer;
907
055f5ff8
MD
908 /*
909 * If this isn't the last ref just decrement the ref count and
910 * return.
911 */
912 if (node->lock.refs > 1) {
913 hammer_unref(&node->lock);
914 return;
915 }
8cd0a023 916
055f5ff8
MD
917 /*
918 * If there is no ondisk info or no buffer the node failed to load,
919 * remove the last reference and destroy the node.
920 */
921 if (node->ondisk == NULL) {
922 hammer_unref(&node->lock);
923 hammer_flush_node(node);
924 /* node is stale now */
925 return;
926 }
b3deaf57 927
055f5ff8
MD
928 /*
929 * Do final cleanups and then either destroy the node and leave it
930 * passively cached. The buffer reference is removed regardless.
931 */
932 buffer = node->buffer;
933 node->ondisk = NULL;
b3deaf57 934
055f5ff8 935 if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
8cd0a023 936 hammer_unref(&node->lock);
055f5ff8
MD
937 hammer_rel_buffer(buffer, 0);
938 return;
427e5fc6 939 }
055f5ff8 940
47197d71
MD
941 /*
942 * Destroy the node if it has been marked for deletion. We mark
943 * it as being free. Note that the disk space is physically
944 * freed when the fifo cycles back through the node.
945 */
40043e7f
MD
946 if (node->flags & HAMMER_NODE_DELETED) {
947 hammer_blockmap_free(node->hmp, node->node_offset,
948 sizeof(*node->ondisk));
949 }
47197d71 950
055f5ff8
MD
951 /*
952 * Destroy the node. Record pertainant data because the node
953 * becomes stale the instant we flush it.
954 */
055f5ff8
MD
955 hammer_unref(&node->lock);
956 hammer_flush_node(node);
957 /* node is stale */
055f5ff8 958 hammer_rel_buffer(buffer, 0);
427e5fc6
MD
959}
960
8cd0a023 961/*
055f5ff8
MD
962 * Passively cache a referenced hammer_node in *cache. The caller may
963 * release the node on return.
8cd0a023
MD
964 */
965void
966hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
967{
b3deaf57
MD
968 hammer_node_t old;
969
970 /*
971 * If the node is being deleted, don't cache it!
972 */
973 if (node->flags & HAMMER_NODE_DELETED)
974 return;
975
976 /*
977 * Cache the node. If we previously cached a different node we
978 * have to give HAMMER a chance to destroy it.
979 */
980again:
8cd0a023 981 if (node->cache1 != cache) {
d113fda1 982 if (node->cache2 != cache) {
b3deaf57 983 if ((old = *cache) != NULL) {
d113fda1
MD
984 KKASSERT(node->lock.refs != 0);
985 hammer_uncache_node(cache);
b3deaf57
MD
986 goto again;
987 }
8cd0a023
MD
988 if (node->cache2)
989 *node->cache2 = NULL;
990 node->cache2 = node->cache1;
991 node->cache1 = cache;
992 *cache = node;
d113fda1
MD
993 } else {
994 struct hammer_node **tmp;
995 tmp = node->cache1;
996 node->cache1 = node->cache2;
997 node->cache2 = tmp;
8cd0a023
MD
998 }
999 }
8cd0a023
MD
1000}
1001
1002void
1003hammer_uncache_node(struct hammer_node **cache)
1004{
1005 hammer_node_t node;
1006
1007 if ((node = *cache) != NULL) {
1008 *cache = NULL;
1009 if (node->cache1 == cache) {
1010 node->cache1 = node->cache2;
1011 node->cache2 = NULL;
1012 } else if (node->cache2 == cache) {
1013 node->cache2 = NULL;
1014 } else {
1015 panic("hammer_uncache_node: missing cache linkage");
1016 }
b3deaf57 1017 if (node->cache1 == NULL && node->cache2 == NULL)
8cd0a023 1018 hammer_flush_node(node);
8cd0a023
MD
1019 }
1020}
1021
1022/*
1023 * Remove a node's cache references and destroy the node if it has no
b3deaf57 1024 * other references or backing store.
8cd0a023
MD
1025 */
1026void
1027hammer_flush_node(hammer_node_t node)
1028{
1029 hammer_buffer_t buffer;
1030
1031 if (node->cache1)
1032 *node->cache1 = NULL;
1033 if (node->cache2)
1034 *node->cache2 = NULL;
b3deaf57 1035 if (node->lock.refs == 0 && node->ondisk == NULL) {
40043e7f 1036 RB_REMOVE(hammer_nod_rb_tree, &node->hmp->rb_nods_root, node);
8cd0a023
MD
1037 if ((buffer = node->buffer) != NULL) {
1038 node->buffer = NULL;
055f5ff8 1039 TAILQ_REMOVE(&buffer->clist, node, entry);
b3deaf57 1040 /* buffer is unreferenced because ondisk is NULL */
8cd0a023 1041 }
b3deaf57 1042 --hammer_count_nodes;
8cd0a023
MD
1043 kfree(node, M_HAMMER);
1044 }
1045}
1046
1047/*
055f5ff8
MD
1048 * Flush passively cached B-Tree nodes associated with this buffer.
1049 * This is only called when the buffer is about to be destroyed, so
1050 * none of the nodes should have any references.
8cd0a023 1051 */
8cd0a023 1052void
055f5ff8 1053hammer_flush_buffer_nodes(hammer_buffer_t buffer)
8cd0a023 1054{
055f5ff8
MD
1055 hammer_node_t node;
1056
1057 while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1058 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1059 hammer_ref(&node->lock);
1060 node->flags |= HAMMER_NODE_FLUSH;
1061 hammer_rel_node(node);
1062 }
8cd0a023
MD
1063}
1064
47197d71 1065
8cd0a023 1066/************************************************************************
47197d71 1067 * ALLOCATORS *
8cd0a023
MD
1068 ************************************************************************/
1069
d26d0ae9 1070/*
47197d71 1071 * Allocate a B-Tree node.
d26d0ae9 1072 */
47197d71
MD
1073hammer_node_t
1074hammer_alloc_btree(hammer_mount_t hmp, int *errorp)
1075{
1076 hammer_buffer_t buffer = NULL;
1077 hammer_node_t node = NULL;
1078 hammer_off_t node_offset;
1079
40043e7f
MD
1080 node_offset = hammer_blockmap_alloc(hmp, HAMMER_ZONE_BTREE_INDEX,
1081 sizeof(struct hammer_node_ondisk),
1082 errorp);
1083 if (*errorp == 0) {
47197d71 1084 node = hammer_get_node(hmp, node_offset, errorp);
40043e7f
MD
1085 hammer_modify_node(node);
1086 bzero(node->ondisk, sizeof(*node->ondisk));
1087 }
47197d71
MD
1088 if (buffer)
1089 hammer_rel_buffer(buffer, 0);
1090 return(node);
1091}
d26d0ae9 1092
47197d71
MD
1093/*
1094 * The returned buffers are already appropriately marked as being modified.
1095 * If the caller marks them again unnecessary undo records may be generated.
1096 *
40043e7f
MD
1097 * In-band data is indicated by data_bufferp == NULL. Pass a data_len of 0
1098 * for zero-fill (caller modifies data_len afterwords).
47197d71
MD
1099 */
1100void *
1101hammer_alloc_record(hammer_mount_t hmp,
40043e7f
MD
1102 hammer_off_t *rec_offp, u_int8_t rec_type,
1103 struct hammer_buffer **rec_bufferp,
1104 int32_t data_len, void **datap,
1105 struct hammer_buffer **data_bufferp, int *errorp)
47197d71 1106{
47197d71 1107 hammer_record_ondisk_t rec;
40043e7f
MD
1108 hammer_off_t rec_offset;
1109 hammer_off_t data_offset;
1110 int32_t reclen;
47197d71 1111
40043e7f
MD
1112 if (datap)
1113 *datap = NULL;
47197d71 1114
40043e7f
MD
1115 /*
1116 * Allocate the record
1117 */
1118 rec_offset = hammer_blockmap_alloc(hmp, HAMMER_ZONE_RECORD_INDEX,
1119 HAMMER_RECORD_SIZE, errorp);
47197d71
MD
1120 if (*errorp)
1121 return(NULL);
d26d0ae9 1122
40043e7f
MD
1123 /*
1124 * Allocate data
1125 */
1126 if (data_len) {
1127 if (data_bufferp == NULL) {
1128 switch(rec_type) {
1129 case HAMMER_RECTYPE_DATA:
1130 reclen = offsetof(struct hammer_data_record,
1131 data[0]);
1132 break;
1133 case HAMMER_RECTYPE_DIRENTRY:
1134 reclen = offsetof(struct hammer_entry_record,
1135 name[0]);
1136 break;
1137 default:
1138 panic("hammer_alloc_record: illegal "
1139 "in-band data");
1140 /* NOT REACHED */
1141 reclen = 0;
1142 break;
1143 }
1144 KKASSERT(reclen + data_len <= HAMMER_RECORD_SIZE);
1145 data_offset = rec_offset + reclen;
1146 } else if (data_len < HAMMER_BUFSIZE) {
1147 data_offset = hammer_blockmap_alloc(hmp,
1148 HAMMER_ZONE_SMALL_DATA_INDEX,
1149 data_len, errorp);
1150 } else {
1151 data_offset = hammer_blockmap_alloc(hmp,
1152 HAMMER_ZONE_LARGE_DATA_INDEX,
1153 data_len, errorp);
1154 }
1155 } else {
1156 data_offset = 0;
1157 }
1158 if (*errorp) {
1159 hammer_blockmap_free(hmp, rec_offset, HAMMER_RECORD_SIZE);
1160 return(NULL);
1161 }
1162
d26d0ae9 1163 /*
47197d71 1164 * Basic return values.
d26d0ae9 1165 */
47197d71 1166 *rec_offp = rec_offset;
40043e7f
MD
1167 rec = hammer_bread(hmp, rec_offset, errorp, rec_bufferp);
1168 KKASSERT(*errorp == 0);
1169 rec->base.data_off = data_offset;
47197d71 1170 rec->base.data_len = data_len;
40043e7f
MD
1171 hammer_modify_buffer(*rec_bufferp, NULL, 0);
1172
1173 if (data_bufferp) {
1174 if (data_len) {
1175 *datap = hammer_bread(hmp, data_offset, errorp,
1176 data_bufferp);
1177 KKASSERT(*errorp == 0);
1178 hammer_modify_buffer(*data_bufferp, NULL, 0);
d26d0ae9 1179 } else {
40043e7f
MD
1180 *datap = NULL;
1181 }
1182 } else if (data_len) {
1183 KKASSERT(data_offset + data_len - rec_offset <=
1184 HAMMER_RECORD_SIZE);
1185 if (datap) {
1186 *datap = (void *)((char *)rec +
1187 (int32_t)(data_offset - rec_offset));
d26d0ae9 1188 }
d26d0ae9 1189 } else {
40043e7f 1190 KKASSERT(datap == NULL);
d26d0ae9 1191 }
40043e7f 1192 KKASSERT(*errorp == 0);
47197d71 1193 return(rec);
d26d0ae9
MD
1194}
1195
427e5fc6 1196/*
47197d71
MD
1197 * Generate an undo fifo entry and return the buffer to the caller (XXX).
1198 * The caller must create a dependancy to ensure that the undo record is
1199 * flushed before the modified buffer is flushed.
427e5fc6 1200 */
47197d71
MD
1201int
1202hammer_generate_undo(hammer_mount_t hmp, hammer_off_t off, void *base, int len)
427e5fc6 1203{
40043e7f
MD
1204 return(0);
1205#if 0
47197d71
MD
1206 hammer_off_t rec_offset;
1207 hammer_fifo_undo_t undo;
1208 hammer_buffer_t buffer = NULL;
1209 int error;
427e5fc6 1210
47197d71
MD
1211 rec_offset = hammer_alloc_fifo(hmp, sizeof(*undo), len,
1212 &buffer, HAMMER_HEAD_TYPE_UNDO,
1213 0, NULL, &error);
1214 if (error == 0) {
1215 undo = (void *)((char *)buffer->ondisk +
1216 ((int32_t)rec_offset & HAMMER_BUFMASK));
1217 undo->undo_offset = off;
1218 bcopy(base, undo + 1, len);
427e5fc6 1219 }
8cd0a023
MD
1220 if (buffer)
1221 hammer_rel_buffer(buffer, 0);
47197d71 1222 return(error);
40043e7f 1223#endif
427e5fc6
MD
1224}
1225
40043e7f
MD
1226#if 0
1227
47197d71
MD
1228/*
1229 * Allocate space from the FIFO. The first rec_len bytes will be zero'd.
1230 * The entire space is marked modified (the caller should not remark it as
1231 * that will cause unnecessary undo records to be added).
1232 */
1233static
1234hammer_off_t
1235hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len,
1236 struct hammer_buffer **rec_bufferp, u_int16_t hdr_type,
1237 int can_cross,
1238 struct hammer_buffer **data2_bufferp, int *errorp)
1239{
1240 hammer_volume_t root_volume;
1241 hammer_volume_t end_volume;
1242 hammer_volume_ondisk_t ondisk;
1243 hammer_fifo_head_t head;
40043e7f 1244 hammer_fifo_tail_t tail;
47197d71
MD
1245 hammer_off_t end_off = 0;
1246 hammer_off_t tmp_off = 0;
1247 int32_t end_vol_no;
1248 int32_t tmp_vol_no;
1249 int32_t xoff;
1250 int32_t aligned_bytes;
1251 int must_pad;
1252
40043e7f
MD
1253 aligned_bytes = (rec_len + data_len + HAMMER_TAIL_ONDISK_SIZE +
1254 HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK;
47197d71
MD
1255
1256 root_volume = hammer_get_root_volume(hmp, errorp);
40043e7f 1257 if (root_volume)
47197d71 1258 hammer_modify_volume(root_volume, NULL, 0);
40043e7f
MD
1259
1260 while (root_volume) {
47197d71
MD
1261 ondisk = root_volume->ondisk;
1262
1263 end_off = ondisk->vol0_fifo_end;
1264 end_vol_no = HAMMER_VOL_DECODE(end_off);
1265
1266 end_volume = hammer_get_volume(hmp, end_vol_no, errorp);
1267 if (*errorp)
1268 goto done;
427e5fc6 1269
47197d71
MD
1270 /*
1271 * Check to see if we ran out of space. Include some extra
1272 * room.
1273 *
1274 * vol0_fifo_end cannot be advanced into the same buffer
1275 * that vol0_fifo_beg resides in. This allows us to
1276 * instantiate a new buffer without reading it in.
1277 *
1278 * XXX messy.
1279 */
1280 tmp_off = ondisk->vol0_fifo_beg & ~HAMMER_BUFMASK64;
1281 tmp_vol_no = HAMMER_VOL_DECODE(tmp_off);
1282 if ((tmp_off & HAMMER_OFF_SHORT_MASK) == 0) {
1283 if (end_vol_no + 1 == tmp_vol_no) {
1284 tmp_vol_no = end_vol_no;
1285 tmp_off = end_volume->maxbuf_off;
1286 } else if (end_vol_no + 1 == hmp->nvolumes &&
1287 tmp_vol_no == 0) {
1288 tmp_vol_no = end_vol_no;
1289 tmp_off = end_volume->maxbuf_off;
1290 }
1291 }
1292 hammer_rel_volume(end_volume, 0);
427e5fc6 1293
47197d71
MD
1294 /*
1295 * XXX dummy head at end of fifo
1296 */
1297 if (end_vol_no == tmp_vol_no &&
1298 end_off < tmp_off &&
1299 end_off + aligned_bytes + sizeof(*head) >= tmp_off) {
66325755 1300 *errorp = ENOSPC;
47197d71 1301 goto done;
66325755 1302 }
427e5fc6 1303
47197d71
MD
1304 if ((int32_t)end_off & HAMMER_BUFMASK)
1305 head = hammer_bread(hmp, end_off, errorp, rec_bufferp);
1306 else
1307 head = hammer_bnew(hmp, end_off, errorp, rec_bufferp);
1308 if (*errorp)
1309 goto done;
427e5fc6 1310
47197d71
MD
1311 /*
1312 * Load the buffer, retry if someone else squeeked in
1313 * while we were blocked.
1314 */
c0ade690 1315
47197d71
MD
1316 if (ondisk->vol0_fifo_end != end_off)
1317 continue;
427e5fc6 1318
47197d71
MD
1319 /*
1320 * Ok, we're gonna do something. Modify the buffer
1321 */
1322 hammer_modify_buffer(*rec_bufferp, NULL, 0);
1323 if (ondisk->vol0_fifo_end != end_off)
1324 continue;
1325 xoff = (int32_t)end_off & HAMMER_BUFMASK;
427e5fc6 1326
47197d71
MD
1327 /*
1328 * The non-data portion of the fifo record cannot cross
1329 * a buffer boundary.
1330 *
1331 * The entire record cannot cross a buffer boundary if
1332 * can_cross is 0.
1333 *
40043e7f
MD
1334 * The entire record cannot cover more then two whole buffers
1335 * regardless. Even if the data portion is 16K, this case
1336 * can occur due to the addition of the fifo_tail.
1337 *
47197d71
MD
1338 * It is illegal for a record to cross a volume boundary.
1339 *
1340 * It is illegal for a record to cross a recovery boundary
1341 * (this is so recovery code is guaranteed a record rather
1342 * then data at certain points).
1343 *
1344 * Add a pad record and loop if it does.
1345 */
1346 must_pad = 0;
1347 if (xoff + rec_len > HAMMER_BUFSIZE)
1348 must_pad = 1;
1349 if (can_cross == 0) {
1350 if (xoff + aligned_bytes > HAMMER_BUFSIZE)
1351 must_pad = 1;
1352 } else {
1353 if (xoff + aligned_bytes > HAMMER_BUFSIZE &&
1354 (end_off + aligned_bytes) >=
1355 (*rec_bufferp)->volume->maxbuf_off) {
1356 must_pad = 1;
1357 }
1358 if ((end_off ^ (end_off + aligned_bytes)) &
1359 HAMMER_OFF_SHORT_REC_MASK) {
1360 must_pad = 1;
1361 }
40043e7f
MD
1362 if (xoff + aligned_bytes - HAMMER_BUFSIZE >
1363 HAMMER_BUFSIZE) {
1364 KKASSERT(xoff != 0);
1365 must_pad = 1;
1366 }
47197d71 1367 }
40043e7f
MD
1368
1369 /*
1370 * Pad to end of the buffer if necessary. PADs can be
1371 * squeezed into as little as 8 bytes (hence our alignment
1372 * requirement). The crc, reserved, and sequence number
1373 * fields are not used, but initialize them anyway if there
1374 * is enough room.
1375 */
47197d71 1376 if (must_pad) {
40043e7f 1377 xoff = HAMMER_BUFSIZE - xoff;
47197d71
MD
1378 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
1379 head->hdr_type = HAMMER_HEAD_TYPE_PAD;
40043e7f
MD
1380 head->hdr_size = xoff;
1381 if (xoff >= HAMMER_HEAD_ONDISK_SIZE +
1382 HAMMER_TAIL_ONDISK_SIZE) {
1383 head->hdr_crc = 0;
1384 head->hdr_reserved02 = 0;
1385 head->hdr_seq = 0;
1386 }
1387
1388 tail = (void *)((char *)head + xoff -
1389 HAMMER_TAIL_ONDISK_SIZE);
1390 if ((void *)head != (void *)tail) {
1391 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
1392 tail->tail_type = HAMMER_HEAD_TYPE_PAD;
1393 tail->tail_size = xoff;
1394 }
1395 KKASSERT((xoff & HAMMER_HEAD_ALIGN_MASK) == 0);
47197d71
MD
1396 ondisk->vol0_fifo_end =
1397 hammer_advance_fifo((*rec_bufferp)->volume,
40043e7f 1398 end_off, xoff);
47197d71
MD
1399 continue;
1400 }
1401
1402 if (xoff + aligned_bytes > HAMMER_BUFSIZE) {
40043e7f
MD
1403 xoff = xoff + aligned_bytes - HAMMER_BUFSIZE;
1404
1405 KKASSERT(xoff <= HAMMER_BUFSIZE);
1406 tail = hammer_bnew(hmp, end_off + aligned_bytes -
1407 HAMMER_TAIL_ONDISK_SIZE,
1408 errorp, data2_bufferp);
47197d71
MD
1409 hammer_modify_buffer(*data2_bufferp, NULL, 0);
1410 if (*errorp)
1411 goto done;
40043e7f
MD
1412
1413 /*
1414 * Retry if someone else appended to the fifo while
1415 * we were blocked.
1416 */
1417 if (ondisk->vol0_fifo_end != end_off)
1418 continue;
1419 } else {
1420 tail = (void *)((char *)head + aligned_bytes -
1421 HAMMER_TAIL_ONDISK_SIZE);
47197d71
MD
1422 }
1423
40043e7f 1424 bzero(head, rec_len);
47197d71
MD
1425 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
1426 head->hdr_type = hdr_type;
40043e7f 1427 head->hdr_size = aligned_bytes;
47197d71 1428 head->hdr_crc = 0;
40043e7f
MD
1429 head->hdr_seq = root_volume->ondisk->vol0_next_seq++;
1430
1431 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
1432 tail->tail_type = hdr_type;
1433 tail->tail_size = aligned_bytes;
1434
47197d71
MD
1435 ondisk->vol0_fifo_end =
1436 hammer_advance_fifo((*rec_bufferp)->volume,
1437 end_off, aligned_bytes);
1438done:
1439 hammer_rel_volume(root_volume, 0);
1440 break;
1441 }
1442 if (*errorp)
1443 end_off = 0;
1444 return(end_off);
427e5fc6
MD
1445}
1446
47197d71
MD
1447/*
1448 * Mark a fifo record as having been freed. XXX needs undo.
1449 */
427e5fc6 1450void
47197d71 1451hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset)
427e5fc6 1452{
47197d71
MD
1453 hammer_buffer_t buffer = NULL;
1454 hammer_fifo_head_t head;
1455 int error;
c0ade690 1456
47197d71
MD
1457 head = hammer_bread(hmp, fifo_offset, &error, &buffer);
1458 if (head) {
1459 hammer_modify_buffer(buffer, &head->hdr_type,
1460 sizeof(head->hdr_type));
40043e7f 1461 head->hdr_type |= HAMMER_HEAD_FLAG_FREE;
47197d71
MD
1462 }
1463 if (buffer)
1464 hammer_rel_buffer(buffer, 0);
427e5fc6
MD
1465}
1466
47197d71
MD
1467/*
1468 * Attempt to rewind the FIFO
1469 *
1470 * This routine is allowed to do nothing.
1471 */
427e5fc6 1472void
47197d71 1473hammer_unwind_fifo(hammer_mount_t hmp, hammer_off_t rec_offset)
427e5fc6 1474{
427e5fc6
MD
1475}
1476
427e5fc6 1477/*
47197d71 1478 * Advance the FIFO a certain number of bytes.
427e5fc6 1479 */
47197d71
MD
1480static
1481hammer_off_t
1482hammer_advance_fifo(hammer_volume_t volume, hammer_off_t off, int32_t bytes)
427e5fc6 1483{
47197d71 1484 int32_t vol_no;
61aeeb33 1485
47197d71
MD
1486 off += bytes;
1487 KKASSERT(off <= volume->maxbuf_off);
1488 KKASSERT((off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
1489 if (off == volume->maxbuf_off) {
1490 vol_no = volume->vol_no + 1;
1491 if (vol_no == volume->hmp->nvolumes)
1492 vol_no = 0;
1493 off = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0);
fbc6e32a 1494 }
47197d71 1495 return(off);
fbc6e32a 1496}
40043e7f 1497#endif
fbc6e32a
MD
1498
1499/*
1500 * Sync dirty buffers to the media
1501 */
1502
0b075555
MD
1503static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
1504static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
1505
fbc6e32a
MD
1506int
1507hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
1508{
1509 struct hammer_sync_info info;
1510
1511 info.error = 0;
1512 info.waitfor = waitfor;
1513
0b075555
MD
1514 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
1515 hammer_sync_scan1, hammer_sync_scan2, &info);
1516
fbc6e32a
MD
1517 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
1518 hammer_sync_volume, &info);
1519 return(info.error);
1520}
1521
0b075555
MD
1522static int
1523hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
1524{
1525 struct hammer_inode *ip;
1526
1527 ip = VTOI(vp);
6a37e7e4
MD
1528 if (vp->v_type == VNON || ip == NULL ||
1529 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1530 RB_EMPTY(&vp->v_rbdirty_tree))) {
0b075555
MD
1531 return(-1);
1532 }
1533 return(0);
1534}
1535
1536static int
1537hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
1538{
1539 struct hammer_sync_info *info = data;
1540 struct hammer_inode *ip;
1541 int error;
1542
1543 ip = VTOI(vp);
1544 if (vp->v_type == VNON || vp->v_type == VBAD ||
1545 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1546 RB_EMPTY(&vp->v_rbdirty_tree))) {
1547 return(0);
1548 }
d5ef456e
MD
1549 error = VOP_FSYNC(vp, info->waitfor);
1550 if (error)
1551 info->error = error;
0b075555
MD
1552 return(0);
1553}
1554
fbc6e32a
MD
1555int
1556hammer_sync_volume(hammer_volume_t volume, void *data)
1557{
1558 struct hammer_sync_info *info = data;
1559
46fe7ae1 1560 hammer_ref(&volume->io.lock);
47197d71 1561 RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL,
0b075555 1562 hammer_sync_buffer, info);
47197d71 1563 hammer_rel_volume(volume, 1);
fbc6e32a
MD
1564 return(0);
1565}
1566
1567int
055f5ff8 1568hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
fbc6e32a 1569{
46fe7ae1
MD
1570 hammer_ref(&buffer->io.lock);
1571 hammer_rel_buffer(buffer, 1);
fbc6e32a 1572 return(0);
427e5fc6
MD
1573}
1574
40043e7f 1575#if 0
427e5fc6 1576/*
f3b0f382
MD
1577 * Generic buffer initialization. Initialize the A-list into an all-allocated
1578 * state with the free block limit properly set.
1579 *
1580 * Note that alloc_new_buffer() will free the appropriate block range via
1581 * the appropriate cluster alist, so the free count is properly propogated.
427e5fc6 1582 */
4d75d829 1583void
47197d71
MD
1584hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type)
1585{
1586 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
1587 head->hdr_type = type;
40043e7f 1588 head->hdr_size = 0;
47197d71
MD
1589 head->hdr_crc = 0;
1590 head->hdr_seq = 0;
427e5fc6
MD
1591}
1592
40043e7f
MD
1593#endif
1594