Merge from vendor branch NETGRAPH:
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
CommitLineData
40043e7f
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
4a2796f3 34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.20 2008/06/20 05:38:26 dillon Exp $
40043e7f
MD
35 */
36
37/*
38 * HAMMER blockmap
39 */
40#include "hammer.h"
41
0832c9bb
MD
42static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43
44/*
45 * Reserved big-blocks red-black tree support
46 */
47RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
48 hammer_res_rb_compare, hammer_off_t, zone_offset);
49
50static int
51hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
52{
53 if (res1->zone_offset < res2->zone_offset)
54 return(-1);
55 if (res1->zone_offset > res2->zone_offset)
56 return(1);
57 return(0);
58}
bf686dbe 59
40043e7f
MD
60/*
61 * Allocate bytes from a zone
62 */
63hammer_off_t
36f82b23
MD
64hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
65 int bytes, int *errorp)
40043e7f 66{
0832c9bb 67 hammer_mount_t hmp;
40043e7f 68 hammer_volume_t root_volume;
cb51be26
MD
69 hammer_blockmap_t blockmap;
70 hammer_blockmap_t freemap;
0832c9bb 71 hammer_reserve_t resv;
c3be93f2
MD
72 struct hammer_blockmap_layer1 *layer1;
73 struct hammer_blockmap_layer2 *layer2;
f03c9cf4
MD
74 hammer_buffer_t buffer1 = NULL;
75 hammer_buffer_t buffer2 = NULL;
76 hammer_buffer_t buffer3 = NULL;
c3be93f2 77 hammer_off_t tmp_offset;
f03c9cf4 78 hammer_off_t next_offset;
0832c9bb 79 hammer_off_t result_offset;
c3be93f2
MD
80 hammer_off_t layer1_offset;
81 hammer_off_t layer2_offset;
cb51be26 82 hammer_off_t base_off;
f03c9cf4 83 int loops = 0;
40043e7f 84
0832c9bb 85 hmp = trans->hmp;
40043e7f
MD
86
87 /*
88 * Deal with alignment and buffer-boundary issues.
89 *
90 * Be careful, certain primary alignments are used below to allocate
91 * new blockmap blocks.
92 */
0832c9bb 93 bytes = (bytes + 15) & ~15;
4a2796f3 94 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
0832c9bb 95 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
bf686dbe
MD
96
97 /*
cb51be26 98 * Setup
bf686dbe 99 */
cb51be26
MD
100 root_volume = trans->rootvol;
101 *errorp = 0;
102 blockmap = &hmp->blockmap[zone];
103 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
104 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
105
106 hammer_lock_ex(&hmp->blkmap_lock);
107 next_offset = blockmap->next_offset;
f03c9cf4 108
cb51be26 109again:
0832c9bb 110 /*
cb51be26 111 * Check for wrap
0832c9bb 112 */
4a2796f3 113 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
cb51be26
MD
114 if (++loops == 2) {
115 result_offset = 0;
116 *errorp = ENOSPC;
117 goto done;
118 }
119 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
120 }
0832c9bb 121
f03c9cf4 122 /*
4a2796f3
MD
123 * The allocation request may not cross a buffer boundary. Special
124 * large allocations must not cross a large-block boundary.
f03c9cf4 125 */
bf686dbe 126 tmp_offset = next_offset + bytes - 1;
4a2796f3
MD
127 if (bytes <= HAMMER_BUFSIZE) {
128 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
129 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
130 goto again;
131 }
132 } else {
133 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
134 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
135 goto again;
136 }
bf686dbe 137 }
40043e7f
MD
138
139 /*
cb51be26 140 * Dive layer 1.
40043e7f 141 */
cb51be26 142 layer1_offset = freemap->phys_offset +
f03c9cf4 143 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
0832c9bb 144 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
40043e7f 145 KKASSERT(*errorp == 0);
19619882 146
c3be93f2 147 /*
cb51be26 148 * Check CRC.
c3be93f2 149 */
cb51be26
MD
150 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
151 Debugger("CRC FAILED: LAYER1");
40043e7f 152 }
40043e7f
MD
153
154 /*
cb51be26
MD
155 * If we are at a big-block boundary and layer1 indicates no
156 * free big-blocks, then we cannot allocate a new bigblock in
157 * layer2, skip to the next layer1 entry.
40043e7f 158 */
cb51be26
MD
159 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
160 layer1->blocks_free == 0) {
161 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
f03c9cf4 162 ~HAMMER_BLOCKMAP_LAYER2_MASK;
f03c9cf4
MD
163 goto again;
164 }
cb51be26 165 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
40043e7f 166
c3be93f2 167 /*
f03c9cf4 168 * Dive layer 2, each entry represents a large-block.
c3be93f2 169 */
f03c9cf4
MD
170 layer2_offset = layer1->phys_offset +
171 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
0832c9bb 172 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
f03c9cf4
MD
173 KKASSERT(*errorp == 0);
174
19619882 175 /*
cb51be26 176 * Check CRC.
19619882 177 */
cb51be26
MD
178 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
179 Debugger("CRC FAILED: LAYER2");
19619882
MD
180 }
181
cb51be26 182 /*
4a2796f3
MD
183 * Complex junk follows. The next_offset is an ephermal pointer,
184 * it can point anywhere, really, so we have to check that we can
185 * actually allocate at this point.
186 *
187 * If we own the zone but just entered into it the easiest thing to
188 * do is skip it. We could adjust according to layer2->append_off
189 * but it isn't really worth doing.
190 *
191 * If someone else owns the zone we must skip it.
cb51be26 192 */
4a2796f3
MD
193 if (layer2->zone == zone) {
194 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
195 next_offset += HAMMER_LARGEBLOCK_SIZE;
cb51be26
MD
196 goto again;
197 }
4a2796f3
MD
198 } else if (layer2->zone) {
199 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
200 ~HAMMER_LARGEBLOCK_MASK64;
201 goto again;
202 }
203
204 /*
205 * Now check to see if someone has reserved the big-block. Again,
206 * if we are at the beginning of it then the reservation was not
207 * under our control and we must skip it. Same if someone else owns
208 * the reservation.
209 */
210 base_off = (next_offset &
211 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
212 HAMMER_ZONE_RAW_BUFFER;
213 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
214 if (resv) {
215 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
216 next_offset += HAMMER_LARGEBLOCK_SIZE;
217 goto again;
218 }
219 if (resv->zone != zone) {
cb51be26
MD
220 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
221 ~HAMMER_LARGEBLOCK_MASK64;
f03c9cf4
MD
222 goto again;
223 }
cb51be26
MD
224 }
225
4a2796f3
MD
226 /*
227 * Ok, we can allocate out of this layer2 big-block. Assume ownership
228 * of the layer for real. At this point we've validated any
229 * reservation that might exist and can just ignore resv.
230 */
cb51be26 231 if (layer2->zone == 0) {
f03c9cf4 232 /*
cb51be26 233 * Assign the bigblock to our zone
f03c9cf4 234 */
cb51be26
MD
235 hammer_modify_buffer(trans, buffer1,
236 layer1, sizeof(*layer1));
237 --layer1->blocks_free;
238 layer1->layer1_crc = crc32(layer1,
239 HAMMER_LAYER1_CRCSIZE);
240 hammer_modify_buffer_done(buffer1);
241 hammer_modify_buffer(trans, buffer2,
242 layer2, sizeof(*layer2));
243 layer2->zone = zone;
244 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
245 KKASSERT(layer2->append_off == 0);
246 hammer_modify_volume_field(trans, trans->rootvol,
247 vol0_stat_freebigblocks);
248 --root_volume->ondisk->vol0_stat_freebigblocks;
249 hmp->copy_stat_freebigblocks =
250 root_volume->ondisk->vol0_stat_freebigblocks;
251 hammer_modify_volume_done(trans->rootvol);
cb51be26 252 } else {
4a2796f3
MD
253 KKASSERT(layer2->append_off <=
254 ((int)next_offset & HAMMER_LARGEBLOCK_MASK));
cb51be26
MD
255 hammer_modify_buffer(trans, buffer2,
256 layer2, sizeof(*layer2));
40043e7f 257 }
cb51be26 258 KKASSERT(layer2->zone == zone);
40043e7f 259
c3be93f2 260 layer2->bytes_free -= bytes;
4a2796f3
MD
261 layer2->append_off = ((int)next_offset & HAMMER_LARGEBLOCK_MASK) +
262 bytes;
19619882 263 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
10a5d1ba 264 hammer_modify_buffer_done(buffer2);
eb3f8f1f 265 KKASSERT(layer2->bytes_free >= 0);
40043e7f
MD
266
267 /*
0832c9bb
MD
268 * If we are allocating from the base of a new buffer we can avoid
269 * a disk read by calling hammer_bnew().
40043e7f 270 */
f03c9cf4 271 if ((next_offset & HAMMER_BUFMASK) == 0) {
4a2796f3
MD
272 hammer_bnew_ext(trans->hmp, next_offset, bytes,
273 errorp, &buffer3);
40043e7f 274 }
0832c9bb 275 result_offset = next_offset;
40043e7f 276
c3be93f2 277 /*
0832c9bb 278 * Process allocated result_offset
c3be93f2 279 */
0832c9bb
MD
280done:
281 hammer_modify_volume(NULL, root_volume, NULL, 0);
282 if (result_offset) {
283 if (result_offset == next_offset) {
cb51be26 284 blockmap->next_offset = next_offset + bytes;
0832c9bb 285 } else {
cb51be26 286 blockmap->next_offset = next_offset;
bf686dbe 287 }
0832c9bb 288 } else {
cb51be26 289 blockmap->next_offset = next_offset;
0832c9bb
MD
290 }
291 hammer_modify_volume_done(root_volume);
d99d6bf5 292 hammer_unlock(&hmp->blkmap_lock);
0832c9bb
MD
293
294 /*
295 * Cleanup
296 */
f03c9cf4
MD
297 if (buffer1)
298 hammer_rel_buffer(buffer1, 0);
299 if (buffer2)
300 hammer_rel_buffer(buffer2, 0);
301 if (buffer3)
302 hammer_rel_buffer(buffer3, 0);
0832c9bb
MD
303
304 return(result_offset);
40043e7f
MD
305}
306
307/*
4a2796f3 308 * Frontend function - Reserve bytes in a zone.
47637bff
MD
309 *
310 * This code reserves bytes out of a blockmap without committing to any
cb51be26
MD
311 * meta-data modifications, allowing the front-end to directly issue disk
312 * write I/O for large blocks of data
4a2796f3
MD
313 *
314 * The backend later finalizes the reservation with hammer_blockmap_finalize()
315 * upon committing the related record.
47637bff 316 */
0832c9bb
MD
317hammer_reserve_t
318hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
319 hammer_off_t *zone_offp, int *errorp)
47637bff
MD
320{
321 hammer_volume_t root_volume;
cb51be26
MD
322 hammer_blockmap_t blockmap;
323 hammer_blockmap_t freemap;
47637bff
MD
324 struct hammer_blockmap_layer1 *layer1;
325 struct hammer_blockmap_layer2 *layer2;
326 hammer_buffer_t buffer1 = NULL;
327 hammer_buffer_t buffer2 = NULL;
328 hammer_buffer_t buffer3 = NULL;
329 hammer_off_t tmp_offset;
330 hammer_off_t next_offset;
331 hammer_off_t layer1_offset;
332 hammer_off_t layer2_offset;
cb51be26 333 hammer_off_t base_off;
0832c9bb 334 hammer_reserve_t resv;
cb51be26 335 hammer_reserve_t resx;
47637bff 336 int loops = 0;
47637bff 337
0832c9bb
MD
338 /*
339 * Setup
340 */
47637bff
MD
341 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
342 root_volume = hammer_get_root_volume(hmp, errorp);
343 if (*errorp)
0832c9bb 344 return(NULL);
cb51be26
MD
345 blockmap = &hmp->blockmap[zone];
346 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
347 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
47637bff
MD
348
349 /*
350 * Deal with alignment and buffer-boundary issues.
351 *
352 * Be careful, certain primary alignments are used below to allocate
353 * new blockmap blocks.
354 */
0832c9bb 355 bytes = (bytes + 15) & ~15;
4a2796f3 356 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
47637bff 357
d99d6bf5 358 hammer_lock_ex(&hmp->blkmap_lock);
cb51be26
MD
359 next_offset = blockmap->next_offset;
360again:
361 resv = NULL;
47637bff
MD
362
363 /*
cb51be26 364 * Check for wrap
47637bff 365 */
4a2796f3 366 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
cb51be26 367 if (++loops == 2) {
47637bff
MD
368 *errorp = ENOSPC;
369 goto done;
370 }
371 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
372 }
373
374 /*
4a2796f3
MD
375 * The allocation request may not cross a buffer boundary. Special
376 * large allocations must not cross a large-block boundary.
47637bff
MD
377 */
378 tmp_offset = next_offset + bytes - 1;
4a2796f3
MD
379 if (bytes <= HAMMER_BUFSIZE) {
380 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
381 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
382 goto again;
383 }
384 } else {
385 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
386 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
387 goto again;
388 }
47637bff
MD
389 }
390
391 /*
392 * Dive layer 1.
393 */
cb51be26 394 layer1_offset = freemap->phys_offset +
47637bff
MD
395 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
396 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
397 KKASSERT(*errorp == 0);
47637bff
MD
398
399 /*
cb51be26 400 * Check CRC.
47637bff
MD
401 */
402 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
403 Debugger("CRC FAILED: LAYER1");
404 }
47637bff
MD
405
406 /*
cb51be26
MD
407 * If we are at a big-block boundary and layer1 indicates no
408 * free big-blocks, then we cannot allocate a new bigblock in
409 * layer2, skip to the next layer1 entry.
47637bff 410 */
cb51be26
MD
411 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
412 layer1->blocks_free == 0) {
413 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
47637bff
MD
414 ~HAMMER_BLOCKMAP_LAYER2_MASK;
415 goto again;
416 }
cb51be26 417 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
47637bff
MD
418
419 /*
420 * Dive layer 2, each entry represents a large-block.
421 */
422 layer2_offset = layer1->phys_offset +
423 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
424 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
425 KKASSERT(*errorp == 0);
426
427 /*
0832c9bb
MD
428 * Check CRC if not allocating into uninitialized space (which we
429 * aren't when reserving space).
47637bff
MD
430 */
431 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
432 Debugger("CRC FAILED: LAYER2");
433 }
434
0832c9bb 435 /*
4a2796f3
MD
436 * Complex junk follows. The next_offset is an ephermal pointer,
437 * it can point anywhere, really, so we have to check that we can
438 * actually allocate at this point.
439 *
440 * If we own the zone but just entered into it the easiest thing to
441 * do is skip it. We could adjust according to layer2->append_off
442 * but it isn't really worth doing.
443 *
444 * If someone else owns the zone we must skip it.
0832c9bb 445 */
4a2796f3
MD
446 if (layer2->zone == zone) {
447 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
0832c9bb
MD
448 next_offset += HAMMER_LARGEBLOCK_SIZE;
449 goto again;
cb51be26 450 }
4a2796f3
MD
451 } else if (layer2->zone) {
452 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
453 ~HAMMER_LARGEBLOCK_MASK64;
454 goto again;
455 }
456
457 /*
458 * Now check to see if someone has reserved the big-block. Again,
459 * if we are at the beginning of it then the reservation was not
460 * under our control and we must skip it. Same if someone else owns
461 * the reservation.
462 */
463 base_off = (next_offset &
464 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
465 HAMMER_ZONE_RAW_BUFFER;
466 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
467 if (resv) {
468 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
47637bff
MD
469 next_offset += HAMMER_LARGEBLOCK_SIZE;
470 goto again;
471 }
4a2796f3
MD
472 if (resv->zone != zone) {
473 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
474 ~HAMMER_LARGEBLOCK_MASK64;
475 goto again;
476 }
47637bff
MD
477 }
478
479 /*
480 * The reservation code does not modify layer2->bytes_free, it
481 * simply adjusts next_offset.
482 */
483 KKASSERT(layer2->bytes_free >= 0);
484
cb51be26
MD
485 /*
486 * Make the zone-2 reservation.
487 */
488 if (resv) {
489 ++resv->refs;
490 KKASSERT(resv->zone == zone);
491 } else {
cb51be26
MD
492 resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
493 resv->refs = 1;
494 resv->zone = zone;
495 resv->zone_offset = base_off;
496 resx = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
497 KKASSERT(resx == NULL);
498 ++hammer_count_reservations;
499 }
500
47637bff 501 /*
0832c9bb
MD
502 * If we are not reserving a whole buffer but are at the start of
503 * a new block, call hammer_bnew() to avoid a disk read.
504 *
4a2796f3
MD
505 * If we are reserving a whole buffer (or more), the caller will
506 * probably use a direct read, so do nothing.
47637bff 507 */
0832c9bb
MD
508 if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
509 hammer_bnew(hmp, next_offset, errorp, &buffer3);
510 }
511
47637bff
MD
512
513 /*
514 * Adjust our iterator and alloc_offset. The layer1 and layer2
515 * space beyond alloc_offset is uninitialized. alloc_offset must
516 * be big-block aligned.
517 */
47637bff 518done:
0832c9bb
MD
519 if (resv) {
520 hammer_modify_volume(NULL, root_volume, NULL, 0);
cb51be26 521 blockmap->next_offset = next_offset + bytes;
0832c9bb 522 hammer_modify_volume_done(root_volume);
cb51be26 523 } else if (blockmap->next_offset != next_offset) {
0832c9bb 524 hammer_modify_volume(NULL, root_volume, NULL, 0);
cb51be26 525 blockmap->next_offset = next_offset;
0832c9bb
MD
526 hammer_modify_volume_done(root_volume);
527 }
528
47637bff
MD
529 if (buffer1)
530 hammer_rel_buffer(buffer1, 0);
531 if (buffer2)
532 hammer_rel_buffer(buffer2, 0);
533 if (buffer3)
534 hammer_rel_buffer(buffer3, 0);
535 hammer_rel_volume(root_volume, 0);
d99d6bf5 536 hammer_unlock(&hmp->blkmap_lock);
0832c9bb
MD
537 *zone_offp = next_offset;
538
539 return(resv);
540}
541
542/*
4a2796f3 543 * A record with a storage reservation calls this function when it is
0832c9bb 544 * being freed. The storage may or may not have actually been allocated.
4a2796f3
MD
545 *
546 * This function removes the lock that prevented other entities from
547 * allocating out of the storage or removing the zone assignment.
0832c9bb
MD
548 */
549void
550hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
551{
552 KKASSERT(resv->refs > 0);
553 if (--resv->refs == 0) {
cb51be26 554 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
0832c9bb
MD
555 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
556 kfree(resv, M_HAMMER);
557 --hammer_count_reservations;
558 }
47637bff
MD
559}
560
cb51be26
MD
561/*
562 * This ensures that no data reallocations will take place at the specified
563 * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
564 * preventing deleted data space, which has no UNDO, from being reallocated
4a2796f3 565 * too quickly.
cb51be26
MD
566 */
567void
568hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
569 hammer_off_t zone2_offset)
570{
571 if (resv == NULL) {
572 resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
573 resv->refs = 1; /* ref for on-delay list */
574 resv->zone_offset = zone2_offset;
575 RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
576 ++hammer_count_reservations;
577 } else if (resv->flags & HAMMER_RESF_ONDELAY) {
578 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
579 resv->flush_group = hmp->flusher.next + 1;
580 } else {
581 ++resv->refs; /* ref for on-delay list */
582 }
583 resv->flags |= HAMMER_RESF_ONDELAY;
584 resv->flush_group = hmp->flusher.next + 1;
585 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
586}
587
588void
589hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
590{
591 KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
592 resv->flags &= ~HAMMER_RESF_ONDELAY;
593 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
594 hammer_blockmap_reserve_complete(hmp, resv);
595}
596
47637bff 597/*
4a2796f3 598 * Backend function - free (offset, bytes) in a zone.
40043e7f 599 */
c3be93f2 600void
36f82b23 601hammer_blockmap_free(hammer_transaction_t trans,
cb51be26 602 hammer_off_t zone_offset, int bytes)
40043e7f 603{
0832c9bb 604 hammer_mount_t hmp;
c3be93f2 605 hammer_volume_t root_volume;
0832c9bb 606 hammer_reserve_t resv;
cb51be26
MD
607 hammer_blockmap_t blockmap;
608 hammer_blockmap_t freemap;
c3be93f2
MD
609 struct hammer_blockmap_layer1 *layer1;
610 struct hammer_blockmap_layer2 *layer2;
f03c9cf4
MD
611 hammer_buffer_t buffer1 = NULL;
612 hammer_buffer_t buffer2 = NULL;
c3be93f2
MD
613 hammer_off_t layer1_offset;
614 hammer_off_t layer2_offset;
cb51be26 615 hammer_off_t base_off;
c3be93f2
MD
616 int error;
617 int zone;
618
cb51be26
MD
619 if (bytes == 0)
620 return;
0832c9bb
MD
621 hmp = trans->hmp;
622
cb51be26
MD
623 /*
624 * Alignment
625 */
4a2796f3
MD
626 bytes = (bytes + 15) & ~15;
627 KKASSERT(bytes <= HAMMER_XBUFSIZE);
628 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
629 ~HAMMER_LARGEBLOCK_MASK64) == 0);
f03c9cf4 630
cb51be26
MD
631 /*
632 * Basic zone validation & locking
633 */
634 zone = HAMMER_ZONE_DECODE(zone_offset);
635 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
636 root_volume = trans->rootvol;
637 error = 0;
d99d6bf5 638 hammer_lock_ex(&hmp->blkmap_lock);
f03c9cf4 639
cb51be26
MD
640 blockmap = &hmp->blockmap[zone];
641 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
c3be93f2
MD
642
643 /*
644 * Dive layer 1.
645 */
cb51be26
MD
646 layer1_offset = freemap->phys_offset +
647 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
0832c9bb 648 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
c3be93f2 649 KKASSERT(error == 0);
cb51be26
MD
650 KKASSERT(layer1->phys_offset &&
651 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
19619882
MD
652 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
653 Debugger("CRC FAILED: LAYER1");
654 }
c3be93f2
MD
655
656 /*
657 * Dive layer 2, each entry represents a large-block.
658 */
659 layer2_offset = layer1->phys_offset +
cb51be26 660 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
0832c9bb 661 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
c3be93f2 662 KKASSERT(error == 0);
19619882
MD
663 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
664 Debugger("CRC FAILED: LAYER2");
665 }
666
36f82b23 667 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
4a2796f3
MD
668
669 /*
670 * Freeing previously allocated space
671 */
672 KKASSERT(layer2->zone == zone);
673 layer2->bytes_free += bytes;
674 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
675 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
676 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
677 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
678 base_off);
679 if (resv) {
680 /*
681 * Portions of this block have been reserved, do
682 * not free it.
683 *
684 * Make sure the reservation remains through
685 * the next flush cycle so potentially undoable
686 * data is not overwritten.
687 */
688 KKASSERT(resv->zone == zone);
689 hammer_reserve_setdelay(hmp, resv, base_off);
690 } else if ((blockmap->next_offset ^ zone_offset) &
691 ~HAMMER_LARGEBLOCK_MASK64) {
692 /*
693 * Our iterator is not in the now-free big-block
694 * and we can release it.
695 *
696 * Make sure the reservation remains through
697 * the next flush cycle so potentially undoable
698 * data is not overwritten.
699 */
700 hammer_reserve_setdelay(hmp, resv, base_off);
701 KKASSERT(layer2->zone == zone);
702 hammer_del_buffers(hmp,
703 zone_offset &
704 ~HAMMER_LARGEBLOCK_MASK64,
705 base_off,
706 HAMMER_LARGEBLOCK_SIZE);
707 layer2->zone = 0;
708 layer2->append_off = 0;
36f82b23
MD
709 hammer_modify_buffer(trans, buffer1,
710 layer1, sizeof(*layer1));
4a2796f3 711 ++layer1->blocks_free;
19619882
MD
712 layer1->layer1_crc = crc32(layer1,
713 HAMMER_LAYER1_CRCSIZE);
10a5d1ba 714 hammer_modify_buffer_done(buffer1);
cb51be26
MD
715 hammer_modify_volume_field(trans,
716 trans->rootvol,
717 vol0_stat_freebigblocks);
4a2796f3 718 ++root_volume->ondisk->vol0_stat_freebigblocks;
cb51be26
MD
719 hmp->copy_stat_freebigblocks =
720 root_volume->ondisk->vol0_stat_freebigblocks;
721 hammer_modify_volume_done(trans->rootvol);
c3be93f2
MD
722 }
723 }
4a2796f3
MD
724
725 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
726 hammer_modify_buffer_done(buffer2);
727 hammer_unlock(&hmp->blkmap_lock);
728
729 if (buffer1)
730 hammer_rel_buffer(buffer1, 0);
731 if (buffer2)
732 hammer_rel_buffer(buffer2, 0);
733}
734
735/*
736 * Backend function - finalize (offset, bytes) in a zone.
737 *
738 * Allocate space that was previously reserved by the frontend.
739 */
740void
741hammer_blockmap_finalize(hammer_transaction_t trans,
742 hammer_off_t zone_offset, int bytes)
743{
744 hammer_mount_t hmp;
745 hammer_volume_t root_volume;
746 hammer_blockmap_t blockmap;
747 hammer_blockmap_t freemap;
748 struct hammer_blockmap_layer1 *layer1;
749 struct hammer_blockmap_layer2 *layer2;
750 hammer_buffer_t buffer1 = NULL;
751 hammer_buffer_t buffer2 = NULL;
752 hammer_off_t layer1_offset;
753 hammer_off_t layer2_offset;
754 int error;
755 int zone;
756 int append_off;
757
758 if (bytes == 0)
759 return;
760 hmp = trans->hmp;
761
762 /*
763 * Alignment
764 */
765 bytes = (bytes + 15) & ~15;
766 KKASSERT(bytes <= HAMMER_XBUFSIZE);
767
768 /*
769 * Basic zone validation & locking
770 */
771 zone = HAMMER_ZONE_DECODE(zone_offset);
772 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
773 root_volume = trans->rootvol;
774 error = 0;
775 hammer_lock_ex(&hmp->blkmap_lock);
776
777 blockmap = &hmp->blockmap[zone];
778 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
779
780 /*
781 * Dive layer 1.
782 */
783 layer1_offset = freemap->phys_offset +
784 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
785 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
786 KKASSERT(error == 0);
787 KKASSERT(layer1->phys_offset &&
788 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
789 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
790 Debugger("CRC FAILED: LAYER1");
791 }
792
793 /*
794 * Dive layer 2, each entry represents a large-block.
795 */
796 layer2_offset = layer1->phys_offset +
797 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
798 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
799 KKASSERT(error == 0);
800 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
801 Debugger("CRC FAILED: LAYER2");
802 }
803
804 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
805
806 /*
807 * Finalize some or all of the space covered by a current
808 * reservation. An allocation in the same layer may have
809 * already assigned ownership.
810 */
811 if (layer2->zone == 0) {
812 hammer_modify_buffer(trans, buffer1,
813 layer1, sizeof(*layer1));
814 --layer1->blocks_free;
815 layer1->layer1_crc = crc32(layer1,
816 HAMMER_LAYER1_CRCSIZE);
817 hammer_modify_buffer_done(buffer1);
818 layer2->zone = zone;
819 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
820 KKASSERT(layer2->append_off == 0);
821 hammer_modify_volume_field(trans,
822 trans->rootvol,
823 vol0_stat_freebigblocks);
824 --root_volume->ondisk->vol0_stat_freebigblocks;
825 hmp->copy_stat_freebigblocks =
826 root_volume->ondisk->vol0_stat_freebigblocks;
827 hammer_modify_volume_done(trans->rootvol);
828 }
829 if (layer2->zone != zone)
830 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
831 KKASSERT(layer2->zone == zone);
832 layer2->bytes_free -= bytes;
833
834 /*
835 * Finalizations can occur out of order, or combined with allocations.
836 * append_off must be set to the highest allocated offset.
837 */
838 append_off = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
839 if (layer2->append_off < append_off)
840 layer2->append_off = append_off;
841
19619882 842 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
10a5d1ba 843 hammer_modify_buffer_done(buffer2);
d99d6bf5 844 hammer_unlock(&hmp->blkmap_lock);
f03c9cf4
MD
845
846 if (buffer1)
847 hammer_rel_buffer(buffer1, 0);
848 if (buffer2)
849 hammer_rel_buffer(buffer2, 0);
40043e7f
MD
850}
851
bf686dbe
MD
852/*
853 * Return the number of free bytes in the big-block containing the
854 * specified blockmap offset.
855 */
856int
cb51be26 857hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
bf686dbe
MD
858 int *curp, int *errorp)
859{
860 hammer_volume_t root_volume;
cb51be26
MD
861 hammer_blockmap_t blockmap;
862 hammer_blockmap_t freemap;
bf686dbe
MD
863 struct hammer_blockmap_layer1 *layer1;
864 struct hammer_blockmap_layer2 *layer2;
865 hammer_buffer_t buffer = NULL;
866 hammer_off_t layer1_offset;
867 hammer_off_t layer2_offset;
868 int bytes;
869 int zone;
870
cb51be26 871 zone = HAMMER_ZONE_DECODE(zone_offset);
bf686dbe
MD
872 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
873 root_volume = hammer_get_root_volume(hmp, errorp);
874 if (*errorp) {
875 *curp = 0;
876 return(0);
877 }
cb51be26
MD
878 blockmap = &hmp->blockmap[zone];
879 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
bf686dbe
MD
880
881 /*
882 * Dive layer 1.
883 */
cb51be26
MD
884 layer1_offset = freemap->phys_offset +
885 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
bf686dbe
MD
886 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
887 KKASSERT(*errorp == 0);
888 KKASSERT(layer1->phys_offset);
19619882
MD
889 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
890 Debugger("CRC FAILED: LAYER1");
891 }
bf686dbe
MD
892
893 /*
894 * Dive layer 2, each entry represents a large-block.
895 */
896 layer2_offset = layer1->phys_offset +
cb51be26 897 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
bf686dbe 898 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
bf686dbe 899 KKASSERT(*errorp == 0);
19619882
MD
900 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
901 Debugger("CRC FAILED: LAYER2");
902 }
cb51be26 903 KKASSERT(layer2->zone == zone);
bf686dbe
MD
904
905 bytes = layer2->bytes_free;
906
cb51be26 907 if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
bf686dbe
MD
908 *curp = 0;
909 else
910 *curp = 1;
bf686dbe
MD
911 if (buffer)
912 hammer_rel_buffer(buffer, 0);
913 hammer_rel_volume(root_volume, 0);
914 if (hammer_debug_general & 0x0800) {
915 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
cb51be26 916 zone_offset, bytes);
bf686dbe
MD
917 }
918 return(bytes);
919}
920
921
40043e7f
MD
922/*
923 * Lookup a blockmap offset.
924 */
925hammer_off_t
cb51be26
MD
926hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
927 int *errorp)
40043e7f
MD
928{
929 hammer_volume_t root_volume;
cb51be26 930 hammer_blockmap_t freemap;
c3be93f2
MD
931 struct hammer_blockmap_layer1 *layer1;
932 struct hammer_blockmap_layer2 *layer2;
40043e7f 933 hammer_buffer_t buffer = NULL;
c3be93f2
MD
934 hammer_off_t layer1_offset;
935 hammer_off_t layer2_offset;
40043e7f 936 hammer_off_t result_offset;
cb51be26
MD
937 hammer_off_t base_off;
938 hammer_reserve_t resv;
40043e7f 939 int zone;
40043e7f 940
cb51be26
MD
941 /*
942 * Calculate the zone-2 offset.
943 */
944 zone = HAMMER_ZONE_DECODE(zone_offset);
40043e7f 945 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
cb51be26
MD
946
947 result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
948 HAMMER_ZONE_RAW_BUFFER;
949
950 /*
951 * We can actually stop here, normal blockmaps are now direct-mapped
952 * onto the freemap and so represent zone-2 addresses.
953 */
954 if (hammer_verify_zone == 0) {
955 *errorp = 0;
956 return(result_offset);
957 }
958
959 /*
960 * Validate the allocation zone
961 */
40043e7f
MD
962 root_volume = hammer_get_root_volume(hmp, errorp);
963 if (*errorp)
964 return(0);
cb51be26
MD
965 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
966 KKASSERT(freemap->phys_offset != 0);
40043e7f
MD
967
968 /*
c3be93f2 969 * Dive layer 1.
40043e7f 970 */
cb51be26
MD
971 layer1_offset = freemap->phys_offset +
972 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
c3be93f2 973 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
40043e7f 974 KKASSERT(*errorp == 0);
cb51be26 975 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
19619882
MD
976 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
977 Debugger("CRC FAILED: LAYER1");
978 }
40043e7f
MD
979
980 /*
c3be93f2 981 * Dive layer 2, each entry represents a large-block.
40043e7f 982 */
c3be93f2 983 layer2_offset = layer1->phys_offset +
cb51be26 984 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
c3be93f2 985 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
40043e7f 986
40043e7f 987 KKASSERT(*errorp == 0);
cb51be26
MD
988 if (layer2->zone == 0) {
989 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
990 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
991 base_off);
992 KKASSERT(resv && resv->zone == zone);
993
994 } else if (layer2->zone != zone) {
995 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
996 layer2->zone, zone);
997 }
19619882
MD
998 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
999 Debugger("CRC FAILED: LAYER2");
1000 }
c3be93f2 1001
40043e7f
MD
1002 if (buffer)
1003 hammer_rel_buffer(buffer, 0);
1004 hammer_rel_volume(root_volume, 0);
1005 if (hammer_debug_general & 0x0800) {
1006 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
cb51be26 1007 zone_offset, result_offset);
40043e7f
MD
1008 }
1009 return(result_offset);
1010}
1011
bf686dbe
MD
1012
1013/*
cb51be26 1014 * Check space availability
bf686dbe 1015 */
cb51be26
MD
1016int
1017hammer_checkspace(hammer_mount_t hmp)
bf686dbe 1018{
cb51be26
MD
1019 const int in_size = sizeof(struct hammer_inode_data) +
1020 sizeof(union hammer_btree_elm);
1021 const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1022 const int blkconv = HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE;
1023 const int limit_inodes = HAMMER_LARGEBLOCK_SIZE / in_size;
1024 const int limit_recs = HAMMER_LARGEBLOCK_SIZE / rec_size;
1025 int usedbigblocks;;
1026
1027 /*
1028 * Quick and very dirty, not even using the right units (bigblocks
1029 * vs 16K buffers), but this catches almost everything.
1030 */
1031 if (hmp->copy_stat_freebigblocks >= hmp->rsv_databufs + 8 &&
1032 hmp->rsv_inodes < limit_inodes &&
1033 hmp->rsv_recs < limit_recs &&
1034 hmp->rsv_databytes < HAMMER_LARGEBLOCK_SIZE) {
1035 return(0);
bf686dbe 1036 }
0832c9bb
MD
1037
1038 /*
cb51be26 1039 * Do a more involved check
0832c9bb 1040 */
cb51be26
MD
1041 usedbigblocks = (hmp->rsv_inodes * in_size / HAMMER_LARGEBLOCK_SIZE) +
1042 (hmp->rsv_recs * rec_size / HAMMER_LARGEBLOCK_SIZE) +
1043 hmp->rsv_databufs / blkconv + 6;
1044 if (hmp->copy_stat_freebigblocks >= usedbigblocks)
1045 return(0);
1046 return (ENOSPC);
6f97fce3
MD
1047}
1048