HAMMER - Add live dedup sysctl and support
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
CommitLineData
40043e7f
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
e469566b 34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
40043e7f
MD
35 */
36
37/*
38 * HAMMER blockmap
39 */
40#include "hammer.h"
41
0832c9bb 42static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
362ec2dc 43static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
1ce12d35 44 hammer_off_t base_offset, int zone,
5e435c92 45 struct hammer_blockmap_layer2 *layer2);
362ec2dc 46static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
507df98a 47static int update_bytes_free(hammer_reserve_t resv, int bytes);
0832c9bb
MD
48
49/*
50 * Reserved big-blocks red-black tree support
51 */
52RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53 hammer_res_rb_compare, hammer_off_t, zone_offset);
54
55static int
56hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57{
58 if (res1->zone_offset < res2->zone_offset)
59 return(-1);
60 if (res1->zone_offset > res2->zone_offset)
61 return(1);
62 return(0);
63}
bf686dbe 64
40043e7f
MD
65/*
66 * Allocate bytes from a zone
67 */
68hammer_off_t
df2ccbac
MD
69hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70 hammer_off_t hint, int *errorp)
40043e7f 71{
0832c9bb 72 hammer_mount_t hmp;
40043e7f 73 hammer_volume_t root_volume;
cb51be26
MD
74 hammer_blockmap_t blockmap;
75 hammer_blockmap_t freemap;
0832c9bb 76 hammer_reserve_t resv;
c3be93f2
MD
77 struct hammer_blockmap_layer1 *layer1;
78 struct hammer_blockmap_layer2 *layer2;
f03c9cf4
MD
79 hammer_buffer_t buffer1 = NULL;
80 hammer_buffer_t buffer2 = NULL;
81 hammer_buffer_t buffer3 = NULL;
c3be93f2 82 hammer_off_t tmp_offset;
f03c9cf4 83 hammer_off_t next_offset;
0832c9bb 84 hammer_off_t result_offset;
c3be93f2
MD
85 hammer_off_t layer1_offset;
86 hammer_off_t layer2_offset;
cb51be26 87 hammer_off_t base_off;
f03c9cf4 88 int loops = 0;
df301614 89 int offset; /* offset within big-block */
df2ccbac 90 int use_hint;
40043e7f 91
0832c9bb 92 hmp = trans->hmp;
40043e7f
MD
93
94 /*
95 * Deal with alignment and buffer-boundary issues.
96 *
97 * Be careful, certain primary alignments are used below to allocate
98 * new blockmap blocks.
99 */
0832c9bb 100 bytes = (bytes + 15) & ~15;
4a2796f3 101 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
0832c9bb 102 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
bf686dbe
MD
103
104 /*
cb51be26 105 * Setup
bf686dbe 106 */
cb51be26
MD
107 root_volume = trans->rootvol;
108 *errorp = 0;
109 blockmap = &hmp->blockmap[zone];
110 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112
df2ccbac
MD
113 /*
114 * Use the hint if we have one.
115 */
116 if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117 next_offset = (hint + 15) & ~(hammer_off_t)15;
118 use_hint = 1;
119 } else {
120 next_offset = blockmap->next_offset;
121 use_hint = 0;
122 }
cb51be26 123again:
df2ccbac
MD
124
125 /*
126 * use_hint is turned off if we leave the hinted big-block.
127 */
128 if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129 next_offset = blockmap->next_offset;
130 use_hint = 0;
131 }
132
0832c9bb 133 /*
cb51be26 134 * Check for wrap
0832c9bb 135 */
4a2796f3 136 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
cb51be26
MD
137 if (++loops == 2) {
138 result_offset = 0;
139 *errorp = ENOSPC;
df301614 140 goto failed;
cb51be26
MD
141 }
142 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143 }
0832c9bb 144
f03c9cf4 145 /*
4a2796f3
MD
146 * The allocation request may not cross a buffer boundary. Special
147 * large allocations must not cross a large-block boundary.
f03c9cf4 148 */
bf686dbe 149 tmp_offset = next_offset + bytes - 1;
4a2796f3
MD
150 if (bytes <= HAMMER_BUFSIZE) {
151 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153 goto again;
154 }
155 } else {
156 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158 goto again;
159 }
bf686dbe 160 }
df301614 161 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
40043e7f
MD
162
163 /*
cb51be26 164 * Dive layer 1.
40043e7f 165 */
cb51be26 166 layer1_offset = freemap->phys_offset +
f03c9cf4 167 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
865c9609 168
0832c9bb 169 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
cdb6e4e6
MD
170 if (*errorp) {
171 result_offset = 0;
172 goto failed;
173 }
19619882
MD
174
175 /*
cb51be26 176 * Check CRC.
c3be93f2 177 */
cb51be26 178 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
db9f9d7f
MD
179 hammer_lock_ex(&hmp->blkmap_lock);
180 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181 panic("CRC FAILED: LAYER1");
182 hammer_unlock(&hmp->blkmap_lock);
40043e7f 183 }
40043e7f
MD
184
185 /*
cb51be26
MD
186 * If we are at a big-block boundary and layer1 indicates no
187 * free big-blocks, then we cannot allocate a new bigblock in
188 * layer2, skip to the next layer1 entry.
40043e7f 189 */
df301614 190 if (offset == 0 && layer1->blocks_free == 0) {
cb51be26 191 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
f03c9cf4 192 ~HAMMER_BLOCKMAP_LAYER2_MASK;
f03c9cf4
MD
193 goto again;
194 }
cb51be26 195 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
40043e7f 196
c3be93f2 197 /*
c47d84e8
MN
198 * Skip this layer1 entry if it is pointing to a layer2 big-block
199 * on a volume that we are currently trying to remove from the
200 * file-system. This is used by the volume-del code together with
201 * the reblocker to free up a volume.
202 */
203 if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204 hmp->volume_to_remove) {
205 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206 ~HAMMER_BLOCKMAP_LAYER2_MASK;
207 goto again;
208 }
209
210 /*
f03c9cf4 211 * Dive layer 2, each entry represents a large-block.
c3be93f2 212 */
f03c9cf4
MD
213 layer2_offset = layer1->phys_offset +
214 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
0832c9bb 215 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
cdb6e4e6
MD
216 if (*errorp) {
217 result_offset = 0;
218 goto failed;
219 }
f03c9cf4 220
19619882 221 /*
db9f9d7f
MD
222 * Check CRC. This can race another thread holding the lock
223 * and in the middle of modifying layer2.
19619882 224 */
cb51be26 225 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
db9f9d7f
MD
226 hammer_lock_ex(&hmp->blkmap_lock);
227 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228 panic("CRC FAILED: LAYER2");
229 hammer_unlock(&hmp->blkmap_lock);
19619882
MD
230 }
231
cb51be26 232 /*
df301614 233 * Skip the layer if the zone is owned by someone other then us.
cb51be26 234 */
df301614
MD
235 if (layer2->zone && layer2->zone != zone) {
236 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237 goto again;
238 }
239 if (offset < layer2->append_off) {
240 next_offset += layer2->append_off - offset;
4a2796f3
MD
241 goto again;
242 }
243
244 /*
df2ccbac
MD
245 * If operating in the current non-hint blockmap block, do not
246 * allow it to get over-full. Also drop any active hinting so
247 * blockmap->next_offset is updated at the end.
248 *
249 * We do this for B-Tree and meta-data allocations to provide
250 * localization for updates.
251 */
252 if ((zone == HAMMER_ZONE_BTREE_INDEX ||
253 zone == HAMMER_ZONE_META_INDEX) &&
254 offset >= HAMMER_LARGEBLOCK_OVERFILL &&
255 !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
256 ) {
257 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
258 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
259 use_hint = 0;
260 goto again;
261 }
262 }
263
264 /*
df301614
MD
265 * We need the lock from this point on. We have to re-check zone
266 * ownership after acquiring the lock and also check for reservations.
267 */
268 hammer_lock_ex(&hmp->blkmap_lock);
269
270 if (layer2->zone && layer2->zone != zone) {
271 hammer_unlock(&hmp->blkmap_lock);
272 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
273 goto again;
274 }
275 if (offset < layer2->append_off) {
276 hammer_unlock(&hmp->blkmap_lock);
277 next_offset += layer2->append_off - offset;
278 goto again;
279 }
280
281 /*
282 * The bigblock might be reserved by another zone. If it is reserved
283 * by our zone we may have to move next_offset past the append_off.
4a2796f3
MD
284 */
285 base_off = (next_offset &
286 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
287 HAMMER_ZONE_RAW_BUFFER;
288 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
289 if (resv) {
4a2796f3 290 if (resv->zone != zone) {
df301614 291 hammer_unlock(&hmp->blkmap_lock);
cb51be26
MD
292 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
293 ~HAMMER_LARGEBLOCK_MASK64;
f03c9cf4
MD
294 goto again;
295 }
df301614
MD
296 if (offset < resv->append_off) {
297 hammer_unlock(&hmp->blkmap_lock);
298 next_offset += resv->append_off - offset;
299 goto again;
300 }
1ce12d35 301 ++resv->refs;
cb51be26
MD
302 }
303
4a2796f3
MD
304 /*
305 * Ok, we can allocate out of this layer2 big-block. Assume ownership
306 * of the layer for real. At this point we've validated any
307 * reservation that might exist and can just ignore resv.
308 */
cb51be26 309 if (layer2->zone == 0) {
f03c9cf4 310 /*
cb51be26 311 * Assign the bigblock to our zone
f03c9cf4 312 */
cb51be26
MD
313 hammer_modify_buffer(trans, buffer1,
314 layer1, sizeof(*layer1));
315 --layer1->blocks_free;
316 layer1->layer1_crc = crc32(layer1,
317 HAMMER_LAYER1_CRCSIZE);
318 hammer_modify_buffer_done(buffer1);
319 hammer_modify_buffer(trans, buffer2,
320 layer2, sizeof(*layer2));
321 layer2->zone = zone;
322 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
323 KKASSERT(layer2->append_off == 0);
324 hammer_modify_volume_field(trans, trans->rootvol,
325 vol0_stat_freebigblocks);
326 --root_volume->ondisk->vol0_stat_freebigblocks;
327 hmp->copy_stat_freebigblocks =
328 root_volume->ondisk->vol0_stat_freebigblocks;
329 hammer_modify_volume_done(trans->rootvol);
cb51be26
MD
330 } else {
331 hammer_modify_buffer(trans, buffer2,
332 layer2, sizeof(*layer2));
40043e7f 333 }
cb51be26 334 KKASSERT(layer2->zone == zone);
40043e7f 335
320a5c59
MD
336 /*
337 * NOTE: bytes_free can legally go negative due to de-dup.
338 */
c3be93f2 339 layer2->bytes_free -= bytes;
df301614
MD
340 KKASSERT(layer2->append_off <= offset);
341 layer2->append_off = offset + bytes;
19619882 342 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
10a5d1ba 343 hammer_modify_buffer_done(buffer2);
40043e7f 344
1ce12d35
MD
345 /*
346 * We hold the blockmap lock and should be the only ones
347 * capable of modifying resv->append_off. Track the allocation
348 * as appropriate.
349 */
350 KKASSERT(bytes != 0);
df301614
MD
351 if (resv) {
352 KKASSERT(resv->append_off <= offset);
353 resv->append_off = offset + bytes;
5e435c92 354 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1ce12d35 355 hammer_blockmap_reserve_complete(hmp, resv);
df301614
MD
356 }
357
40043e7f 358 /*
0832c9bb
MD
359 * If we are allocating from the base of a new buffer we can avoid
360 * a disk read by calling hammer_bnew().
40043e7f 361 */
f03c9cf4 362 if ((next_offset & HAMMER_BUFMASK) == 0) {
4a2796f3
MD
363 hammer_bnew_ext(trans->hmp, next_offset, bytes,
364 errorp, &buffer3);
40043e7f 365 }
0832c9bb 366 result_offset = next_offset;
40043e7f 367
c3be93f2 368 /*
df2ccbac
MD
369 * If we weren't supplied with a hint or could not use the hint
370 * then we wound up using blockmap->next_offset as the hint and
371 * need to save it.
c3be93f2 372 */
df2ccbac
MD
373 if (use_hint == 0) {
374 hammer_modify_volume(NULL, root_volume, NULL, 0);
375 blockmap->next_offset = next_offset + bytes;
376 hammer_modify_volume_done(root_volume);
377 }
d99d6bf5 378 hammer_unlock(&hmp->blkmap_lock);
df301614 379failed:
0832c9bb
MD
380
381 /*
382 * Cleanup
383 */
f03c9cf4
MD
384 if (buffer1)
385 hammer_rel_buffer(buffer1, 0);
386 if (buffer2)
387 hammer_rel_buffer(buffer2, 0);
388 if (buffer3)
389 hammer_rel_buffer(buffer3, 0);
0832c9bb
MD
390
391 return(result_offset);
40043e7f
MD
392}
393
394/*
4a2796f3 395 * Frontend function - Reserve bytes in a zone.
47637bff
MD
396 *
397 * This code reserves bytes out of a blockmap without committing to any
cb51be26
MD
398 * meta-data modifications, allowing the front-end to directly issue disk
399 * write I/O for large blocks of data
4a2796f3
MD
400 *
401 * The backend later finalizes the reservation with hammer_blockmap_finalize()
402 * upon committing the related record.
47637bff 403 */
0832c9bb
MD
404hammer_reserve_t
405hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
406 hammer_off_t *zone_offp, int *errorp)
47637bff
MD
407{
408 hammer_volume_t root_volume;
cb51be26
MD
409 hammer_blockmap_t blockmap;
410 hammer_blockmap_t freemap;
47637bff
MD
411 struct hammer_blockmap_layer1 *layer1;
412 struct hammer_blockmap_layer2 *layer2;
413 hammer_buffer_t buffer1 = NULL;
414 hammer_buffer_t buffer2 = NULL;
415 hammer_buffer_t buffer3 = NULL;
416 hammer_off_t tmp_offset;
417 hammer_off_t next_offset;
418 hammer_off_t layer1_offset;
419 hammer_off_t layer2_offset;
cb51be26 420 hammer_off_t base_off;
0832c9bb 421 hammer_reserve_t resv;
cb51be26 422 hammer_reserve_t resx;
47637bff 423 int loops = 0;
df301614 424 int offset;
47637bff 425
0832c9bb
MD
426 /*
427 * Setup
428 */
47637bff
MD
429 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
430 root_volume = hammer_get_root_volume(hmp, errorp);
431 if (*errorp)
0832c9bb 432 return(NULL);
cb51be26
MD
433 blockmap = &hmp->blockmap[zone];
434 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
435 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
47637bff
MD
436
437 /*
438 * Deal with alignment and buffer-boundary issues.
439 *
440 * Be careful, certain primary alignments are used below to allocate
441 * new blockmap blocks.
442 */
0832c9bb 443 bytes = (bytes + 15) & ~15;
4a2796f3 444 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
47637bff 445
cb51be26 446 next_offset = blockmap->next_offset;
df301614 447again:
fce862c7 448 resv = NULL;
47637bff 449 /*
cb51be26 450 * Check for wrap
47637bff 451 */
4a2796f3 452 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
cb51be26 453 if (++loops == 2) {
47637bff 454 *errorp = ENOSPC;
df301614 455 goto failed;
47637bff
MD
456 }
457 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
458 }
459
460 /*
4a2796f3
MD
461 * The allocation request may not cross a buffer boundary. Special
462 * large allocations must not cross a large-block boundary.
47637bff
MD
463 */
464 tmp_offset = next_offset + bytes - 1;
4a2796f3
MD
465 if (bytes <= HAMMER_BUFSIZE) {
466 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
467 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
468 goto again;
469 }
470 } else {
471 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
472 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
473 goto again;
474 }
47637bff 475 }
df301614 476 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
47637bff
MD
477
478 /*
479 * Dive layer 1.
480 */
cb51be26 481 layer1_offset = freemap->phys_offset +
47637bff
MD
482 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
483 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
cdb6e4e6
MD
484 if (*errorp)
485 goto failed;
47637bff
MD
486
487 /*
cb51be26 488 * Check CRC.
47637bff
MD
489 */
490 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
db9f9d7f
MD
491 hammer_lock_ex(&hmp->blkmap_lock);
492 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
493 panic("CRC FAILED: LAYER1");
494 hammer_unlock(&hmp->blkmap_lock);
47637bff 495 }
47637bff
MD
496
497 /*
cb51be26
MD
498 * If we are at a big-block boundary and layer1 indicates no
499 * free big-blocks, then we cannot allocate a new bigblock in
500 * layer2, skip to the next layer1 entry.
47637bff 501 */
cb51be26
MD
502 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
503 layer1->blocks_free == 0) {
504 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
47637bff
MD
505 ~HAMMER_BLOCKMAP_LAYER2_MASK;
506 goto again;
507 }
cb51be26 508 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
47637bff
MD
509
510 /*
511 * Dive layer 2, each entry represents a large-block.
512 */
513 layer2_offset = layer1->phys_offset +
514 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
515 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
cdb6e4e6
MD
516 if (*errorp)
517 goto failed;
47637bff
MD
518
519 /*
0832c9bb
MD
520 * Check CRC if not allocating into uninitialized space (which we
521 * aren't when reserving space).
47637bff
MD
522 */
523 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
db9f9d7f
MD
524 hammer_lock_ex(&hmp->blkmap_lock);
525 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
526 panic("CRC FAILED: LAYER2");
527 hammer_unlock(&hmp->blkmap_lock);
47637bff
MD
528 }
529
0832c9bb 530 /*
df301614 531 * Skip the layer if the zone is owned by someone other then us.
0832c9bb 532 */
df301614
MD
533 if (layer2->zone && layer2->zone != zone) {
534 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
535 goto again;
536 }
537 if (offset < layer2->append_off) {
538 next_offset += layer2->append_off - offset;
4a2796f3
MD
539 goto again;
540 }
541
542 /*
df301614
MD
543 * We need the lock from this point on. We have to re-check zone
544 * ownership after acquiring the lock and also check for reservations.
545 */
546 hammer_lock_ex(&hmp->blkmap_lock);
547
548 if (layer2->zone && layer2->zone != zone) {
549 hammer_unlock(&hmp->blkmap_lock);
550 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
551 goto again;
552 }
553 if (offset < layer2->append_off) {
554 hammer_unlock(&hmp->blkmap_lock);
555 next_offset += layer2->append_off - offset;
556 goto again;
557 }
558
559 /*
560 * The bigblock might be reserved by another zone. If it is reserved
561 * by our zone we may have to move next_offset past the append_off.
4a2796f3
MD
562 */
563 base_off = (next_offset &
df301614 564 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
4a2796f3
MD
565 HAMMER_ZONE_RAW_BUFFER;
566 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
567 if (resv) {
4a2796f3 568 if (resv->zone != zone) {
df301614 569 hammer_unlock(&hmp->blkmap_lock);
4a2796f3
MD
570 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
571 ~HAMMER_LARGEBLOCK_MASK64;
572 goto again;
573 }
df301614
MD
574 if (offset < resv->append_off) {
575 hammer_unlock(&hmp->blkmap_lock);
576 next_offset += resv->append_off - offset;
577 goto again;
578 }
cb51be26 579 ++resv->refs;
df301614 580 resx = NULL;
cb51be26 581 } else {
bac808fe 582 resx = kmalloc(sizeof(*resv), hmp->m_misc,
df301614
MD
583 M_WAITOK | M_ZERO | M_USE_RESERVE);
584 resx->refs = 1;
585 resx->zone = zone;
586 resx->zone_offset = base_off;
5e435c92
MD
587 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
588 resx->flags |= HAMMER_RESF_LAYER2FREE;
df301614
MD
589 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
590 KKASSERT(resv == NULL);
591 resv = resx;
a7e9bef1 592 ++hammer_count_reservations;
cb51be26 593 }
df301614 594 resv->append_off = offset + bytes;
cb51be26
MD
595
596 /*
0832c9bb
MD
597 * If we are not reserving a whole buffer but are at the start of
598 * a new block, call hammer_bnew() to avoid a disk read.
599 *
4a2796f3
MD
600 * If we are reserving a whole buffer (or more), the caller will
601 * probably use a direct read, so do nothing.
47637bff 602 */
0832c9bb
MD
603 if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
604 hammer_bnew(hmp, next_offset, errorp, &buffer3);
605 }
606
47637bff
MD
607 /*
608 * Adjust our iterator and alloc_offset. The layer1 and layer2
609 * space beyond alloc_offset is uninitialized. alloc_offset must
610 * be big-block aligned.
611 */
df301614
MD
612 blockmap->next_offset = next_offset + bytes;
613 hammer_unlock(&hmp->blkmap_lock);
0832c9bb 614
df301614 615failed:
47637bff
MD
616 if (buffer1)
617 hammer_rel_buffer(buffer1, 0);
618 if (buffer2)
619 hammer_rel_buffer(buffer2, 0);
620 if (buffer3)
621 hammer_rel_buffer(buffer3, 0);
622 hammer_rel_volume(root_volume, 0);
0832c9bb
MD
623 *zone_offp = next_offset;
624
625 return(resv);
626}
627
1b0ab2c3 628/*
507df98a
ID
629 * Frontend function - Dedup bytes in a zone.
630 *
631 * Dedup reservations work exactly the same as normal write reservations
632 * except we only adjust bytes_free field and don't touch append offset.
633 * Finalization mechanic for dedup reservations is also the same as for
634 * normal write ones - the backend finalizes the reservation with
635 * hammer_blockmap_finalize().
636 */
637hammer_reserve_t
638hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
639 hammer_off_t zone_offset, int *errorp)
640{
641 hammer_volume_t root_volume;
642 hammer_blockmap_t freemap;
643 struct hammer_blockmap_layer1 *layer1;
644 struct hammer_blockmap_layer2 *layer2;
645 hammer_buffer_t buffer1 = NULL;
646 hammer_buffer_t buffer2 = NULL;
647 hammer_off_t layer1_offset;
648 hammer_off_t layer2_offset;
649 hammer_off_t base_off;
650 hammer_reserve_t resv = NULL;
651 hammer_reserve_t resx = NULL;
652
653 /*
654 * Setup
655 */
656 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
657 root_volume = hammer_get_root_volume(hmp, errorp);
658 if (*errorp)
659 return (NULL);
660 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
661 KKASSERT(freemap->phys_offset != 0);
662
663 bytes = (bytes + 15) & ~15;
664 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
665
666 /*
667 * Dive layer 1.
668 */
669 layer1_offset = freemap->phys_offset +
670 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
671 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
672 if (*errorp)
673 goto failed;
674
675 /*
676 * Check CRC.
677 */
678 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
679 hammer_lock_ex(&hmp->blkmap_lock);
680 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
681 panic("CRC FAILED: LAYER1");
682 hammer_unlock(&hmp->blkmap_lock);
683 }
684 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
685
686 /*
687 * Dive layer 2, each entry represents a large-block.
688 */
689 layer2_offset = layer1->phys_offset +
690 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
691 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
692 if (*errorp)
693 goto failed;
694
695 /*
696 * Check CRC.
697 */
698 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
699 hammer_lock_ex(&hmp->blkmap_lock);
700 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
701 panic("CRC FAILED: LAYER2");
702 hammer_unlock(&hmp->blkmap_lock);
703 }
704
705 /*
706 * Fail if the zone is owned by someone other than us.
707 */
708 if (layer2->zone && layer2->zone != zone)
709 goto failed;
710
711 /*
712 * We need the lock from this point on. We have to re-check zone
713 * ownership after acquiring the lock and also check for reservations.
714 */
715 hammer_lock_ex(&hmp->blkmap_lock);
716
717 if (layer2->zone && layer2->zone != zone) {
718 hammer_unlock(&hmp->blkmap_lock);
719 goto failed;
720 }
721
722 base_off = (zone_offset &
723 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
724 HAMMER_ZONE_RAW_BUFFER;
725 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
726 if (resv) {
727 if (resv->zone != zone) {
728 hammer_unlock(&hmp->blkmap_lock);
729 resv = NULL;
730 goto failed;
731 }
732 /*
733 * Due to possible big block underflow we can't simply
734 * subtract bytes from bytes_free.
735 */
736 if (update_bytes_free(resv, bytes) == 0) {
737 hammer_unlock(&hmp->blkmap_lock);
738 resv = NULL;
739 goto failed;
740 }
741 ++resv->refs;
742 resx = NULL;
743 } else {
744 resx = kmalloc(sizeof(*resv), hmp->m_misc,
745 M_WAITOK | M_ZERO | M_USE_RESERVE);
746 resx->refs = 1;
747 resx->zone = zone;
748 resx->bytes_free = layer2->bytes_free;
749 /*
750 * Due to possible big block underflow we can't simply
751 * subtract bytes from bytes_free.
752 */
753 if (update_bytes_free(resx, bytes) == 0) {
754 hammer_unlock(&hmp->blkmap_lock);
755 kfree(resx, hmp->m_misc);
756 goto failed;
757 }
758 resx->zone_offset = base_off;
759 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
760 KKASSERT(resv == NULL);
761 resv = resx;
762 ++hammer_count_reservations;
763 }
764
765 hammer_unlock(&hmp->blkmap_lock);
766
767failed:
768 if (buffer1)
769 hammer_rel_buffer(buffer1, 0);
770 if (buffer2)
771 hammer_rel_buffer(buffer2, 0);
772 hammer_rel_volume(root_volume, 0);
773
774 return(resv);
775}
776
777static int
778update_bytes_free(hammer_reserve_t resv, int bytes)
779{
780 int32_t temp;
781
782 /*
783 * Big-block underflow check
784 */
785 temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
786 cpu_ccfence(); /* XXX do we really need it ? */
787 if (temp > resv->bytes_free) {
788 kprintf("BIGBLOCK UNDERFLOW\n");
789 return (0);
790 }
791
792 resv->bytes_free -= bytes;
793 return (1);
794}
795
796/*
5e435c92
MD
797 * Dereference a reservation structure. Upon the final release the
798 * underlying big-block is checked and if it is entirely free we delete
799 * any related HAMMER buffers to avoid potential conflicts with future
800 * reuse of the big-block.
0832c9bb
MD
801 */
802void
803hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
804{
5e435c92 805 hammer_off_t base_offset;
362ec2dc 806 int error;
1b0ab2c3 807
0832c9bb 808 KKASSERT(resv->refs > 0);
5e435c92
MD
809 KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
810 HAMMER_ZONE_RAW_BUFFER);
811
812 /*
813 * Setting append_off to the max prevents any new allocations
814 * from occuring while we are trying to dispose of the reservation,
815 * allowing us to safely delete any related HAMMER buffers.
362ec2dc
MD
816 *
817 * If we are unable to clean out all related HAMMER buffers we
818 * requeue the delay.
5e435c92
MD
819 */
820 if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
821 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
1ce12d35
MD
822 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
823 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
507df98a
ID
824 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
825 hammer_dedup_cache_inval(hmp, base_offset);
362ec2dc
MD
826 error = hammer_del_buffers(hmp, base_offset,
827 resv->zone_offset,
828 HAMMER_LARGEBLOCK_SIZE,
f7d0505a
MD
829 1);
830 if (hammer_debug_general & 0x20000) {
831 kprintf("hammer: dellgblk %016jx error %d\n",
832 (intmax_t)base_offset, error);
833 }
362ec2dc
MD
834 if (error)
835 hammer_reserve_setdelay(hmp, resv);
5e435c92 836 }
0832c9bb 837 if (--resv->refs == 0) {
f7d0505a
MD
838 if (hammer_debug_general & 0x20000) {
839 kprintf("hammer: delresvr %016jx zone %02x\n",
840 (intmax_t)resv->zone_offset, resv->zone);
841 }
cb51be26 842 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
0832c9bb 843 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
bac808fe 844 kfree(resv, hmp->m_misc);
0832c9bb
MD
845 --hammer_count_reservations;
846 }
47637bff
MD
847}
848
849/*
5e435c92
MD
850 * Prevent a potentially free big-block from being reused until after
851 * the related flushes have completely cycled, otherwise crash recovery
852 * could resurrect a data block that was already reused and overwritten.
853 *
1ce12d35
MD
854 * The caller might reset the underlying layer2 entry's append_off to 0, so
855 * our covering append_off must be set to max to prevent any reallocation
856 * until after the flush delays complete, not to mention proper invalidation
857 * of any underlying cached blocks.
cb51be26 858 */
5e435c92 859static void
362ec2dc 860hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
1ce12d35 861 int zone, struct hammer_blockmap_layer2 *layer2)
cb51be26 862{
5e435c92 863 hammer_reserve_t resv;
df301614 864
5e435c92
MD
865 /*
866 * Allocate the reservation if necessary.
1ce12d35
MD
867 *
868 * NOTE: need lock in future around resv lookup/allocation and
869 * the setdelay call, currently refs is not bumped until the call.
5e435c92
MD
870 */
871again:
872 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
cb51be26 873 if (resv == NULL) {
bac808fe 874 resv = kmalloc(sizeof(*resv), hmp->m_misc,
df301614 875 M_WAITOK | M_ZERO | M_USE_RESERVE);
1ce12d35 876 resv->zone = zone;
5e435c92
MD
877 resv->zone_offset = base_offset;
878 resv->refs = 0;
1ce12d35
MD
879 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
880
5e435c92
MD
881 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
882 resv->flags |= HAMMER_RESF_LAYER2FREE;
df301614 883 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
bac808fe 884 kfree(resv, hmp->m_misc);
5e435c92 885 goto again;
df301614 886 }
5e435c92 887 ++hammer_count_reservations;
1ce12d35
MD
888 } else {
889 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
890 resv->flags |= HAMMER_RESF_LAYER2FREE;
5e435c92 891 }
1ce12d35 892 hammer_reserve_setdelay(hmp, resv);
362ec2dc 893}
5e435c92 894
362ec2dc
MD
895/*
896 * Enter the reservation on the on-delay list, or move it if it
897 * is already on the list.
898 */
899static void
900hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
901{
5e435c92 902 if (resv->flags & HAMMER_RESF_ONDELAY) {
cb51be26
MD
903 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
904 resv->flush_group = hmp->flusher.next + 1;
5e435c92 905 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
cb51be26 906 } else {
5e435c92 907 ++resv->refs;
a7e9bef1 908 ++hmp->rsv_fromdelay;
df301614
MD
909 resv->flags |= HAMMER_RESF_ONDELAY;
910 resv->flush_group = hmp->flusher.next + 1;
911 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912 }
cb51be26
MD
913}
914
f7d0505a
MD
915/*
916 * Reserve has reached its flush point, remove it from the delay list
917 * and finish it off. hammer_blockmap_reserve_complete() inherits
918 * the ondelay reference.
919 */
cb51be26
MD
920void
921hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
922{
923 KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
924 resv->flags &= ~HAMMER_RESF_ONDELAY;
925 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
a7e9bef1 926 --hmp->rsv_fromdelay;
cb51be26
MD
927 hammer_blockmap_reserve_complete(hmp, resv);
928}
929
cb51be26 930/*
4a2796f3 931 * Backend function - free (offset, bytes) in a zone.
cdb6e4e6
MD
932 *
933 * XXX error return
40043e7f 934 */
c3be93f2 935void
36f82b23 936hammer_blockmap_free(hammer_transaction_t trans,
cb51be26 937 hammer_off_t zone_offset, int bytes)
40043e7f 938{
0832c9bb 939 hammer_mount_t hmp;
c3be93f2 940 hammer_volume_t root_volume;
cb51be26 941 hammer_blockmap_t freemap;
c3be93f2
MD
942 struct hammer_blockmap_layer1 *layer1;
943 struct hammer_blockmap_layer2 *layer2;
f03c9cf4
MD
944 hammer_buffer_t buffer1 = NULL;
945 hammer_buffer_t buffer2 = NULL;
c3be93f2
MD
946 hammer_off_t layer1_offset;
947 hammer_off_t layer2_offset;
cb51be26 948 hammer_off_t base_off;
c3be93f2
MD
949 int error;
950 int zone;
951
cb51be26
MD
952 if (bytes == 0)
953 return;
0832c9bb
MD
954 hmp = trans->hmp;
955
cb51be26
MD
956 /*
957 * Alignment
958 */
4a2796f3
MD
959 bytes = (bytes + 15) & ~15;
960 KKASSERT(bytes <= HAMMER_XBUFSIZE);
961 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
962 ~HAMMER_LARGEBLOCK_MASK64) == 0);
f03c9cf4 963
cb51be26
MD
964 /*
965 * Basic zone validation & locking
966 */
967 zone = HAMMER_ZONE_DECODE(zone_offset);
968 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
969 root_volume = trans->rootvol;
970 error = 0;
f03c9cf4 971
cb51be26 972 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
c3be93f2
MD
973
974 /*
975 * Dive layer 1.
976 */
cb51be26
MD
977 layer1_offset = freemap->phys_offset +
978 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
0832c9bb 979 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
cdb6e4e6
MD
980 if (error)
981 goto failed;
cb51be26
MD
982 KKASSERT(layer1->phys_offset &&
983 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
19619882 984 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
db9f9d7f
MD
985 hammer_lock_ex(&hmp->blkmap_lock);
986 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
987 panic("CRC FAILED: LAYER1");
988 hammer_unlock(&hmp->blkmap_lock);
19619882 989 }
c3be93f2
MD
990
991 /*
992 * Dive layer 2, each entry represents a large-block.
993 */
994 layer2_offset = layer1->phys_offset +
cb51be26 995 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
0832c9bb 996 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
cdb6e4e6
MD
997 if (error)
998 goto failed;
19619882 999 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
db9f9d7f
MD
1000 hammer_lock_ex(&hmp->blkmap_lock);
1001 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1002 panic("CRC FAILED: LAYER2");
1003 hammer_unlock(&hmp->blkmap_lock);
19619882
MD
1004 }
1005
df301614
MD
1006 hammer_lock_ex(&hmp->blkmap_lock);
1007
36f82b23 1008 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
4a2796f3
MD
1009
1010 /*
5e435c92 1011 * Free space previously allocated via blockmap_alloc().
320a5c59
MD
1012 *
1013 * NOTE: bytes_free can be and remain negative due to de-dup ops
1014 * but can never become larger than HAMMER_LARGEBLOCK_SIZE.
4a2796f3
MD
1015 */
1016 KKASSERT(layer2->zone == zone);
1017 layer2->bytes_free += bytes;
1018 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
5e435c92
MD
1019
1020 /*
1021 * If a big-block becomes entirely free we must create a covering
1022 * reservation to prevent premature reuse. Note, however, that
1023 * the big-block and/or reservation may still have an append_off
1024 * that allows further (non-reused) allocations.
1025 *
1026 * Once the reservation has been made we re-check layer2 and if
1027 * the big-block is still entirely free we reset the layer2 entry.
1028 * The reservation will prevent premature reuse.
1029 *
1030 * NOTE: hammer_buffer's are only invalidated when the reservation
1031 * is completed, if the layer2 entry is still completely free at
1032 * that time. Any allocations from the reservation that may have
1033 * occured in the mean time, or active references on the reservation
1034 * from new pending allocations, will prevent the invalidation from
1035 * occuring.
1036 */
4a2796f3
MD
1037 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1038 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
5e435c92 1039
1ce12d35 1040 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
5e435c92 1041 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
4a2796f3
MD
1042 layer2->zone = 0;
1043 layer2->append_off = 0;
36f82b23
MD
1044 hammer_modify_buffer(trans, buffer1,
1045 layer1, sizeof(*layer1));
4a2796f3 1046 ++layer1->blocks_free;
19619882
MD
1047 layer1->layer1_crc = crc32(layer1,
1048 HAMMER_LAYER1_CRCSIZE);
10a5d1ba 1049 hammer_modify_buffer_done(buffer1);
cb51be26
MD
1050 hammer_modify_volume_field(trans,
1051 trans->rootvol,
1052 vol0_stat_freebigblocks);
4a2796f3 1053 ++root_volume->ondisk->vol0_stat_freebigblocks;
cb51be26
MD
1054 hmp->copy_stat_freebigblocks =
1055 root_volume->ondisk->vol0_stat_freebigblocks;
1056 hammer_modify_volume_done(trans->rootvol);
c3be93f2
MD
1057 }
1058 }
4a2796f3
MD
1059 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1060 hammer_modify_buffer_done(buffer2);
1061 hammer_unlock(&hmp->blkmap_lock);
1062
cdb6e4e6 1063failed:
4a2796f3
MD
1064 if (buffer1)
1065 hammer_rel_buffer(buffer1, 0);
1066 if (buffer2)
1067 hammer_rel_buffer(buffer2, 0);
1068}
1069
bb29b5d8
MD
1070int
1071hammer_blockmap_dedup(hammer_transaction_t trans,
1072 hammer_off_t zone_offset, int bytes)
1073{
1074 hammer_mount_t hmp;
1075 hammer_volume_t root_volume;
bb29b5d8
MD
1076 hammer_blockmap_t freemap;
1077 struct hammer_blockmap_layer1 *layer1;
1078 struct hammer_blockmap_layer2 *layer2;
1079 hammer_buffer_t buffer1 = NULL;
1080 hammer_buffer_t buffer2 = NULL;
1081 hammer_off_t layer1_offset;
1082 hammer_off_t layer2_offset;
1083 int32_t temp;
1084 int error;
1085 int zone;
1086
1087 if (bytes == 0)
1088 return (0);
1089 hmp = trans->hmp;
1090
1091 /*
1092 * Alignment
1093 */
1094 bytes = (bytes + 15) & ~15;
1095 KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1096 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1097 ~HAMMER_LARGEBLOCK_MASK64) == 0);
1098
1099 /*
1100 * Basic zone validation & locking
1101 */
1102 zone = HAMMER_ZONE_DECODE(zone_offset);
1103 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1104 root_volume = trans->rootvol;
1105 error = 0;
1106
bb29b5d8
MD
1107 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1108
1109 /*
1110 * Dive layer 1.
1111 */
1112 layer1_offset = freemap->phys_offset +
1113 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1114 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1115 if (error)
1116 goto failed;
1117 KKASSERT(layer1->phys_offset &&
1118 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1119 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1120 hammer_lock_ex(&hmp->blkmap_lock);
1121 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1122 panic("CRC FAILED: LAYER1");
1123 hammer_unlock(&hmp->blkmap_lock);
1124 }
1125
1126 /*
1127 * Dive layer 2, each entry represents a large-block.
1128 */
1129 layer2_offset = layer1->phys_offset +
1130 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1131 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1132 if (error)
1133 goto failed;
1134 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1135 hammer_lock_ex(&hmp->blkmap_lock);
1136 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1137 panic("CRC FAILED: LAYER2");
1138 hammer_unlock(&hmp->blkmap_lock);
1139 }
1140
1141 hammer_lock_ex(&hmp->blkmap_lock);
1142
1143 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1144
1145 /*
1146 * Free space previously allocated via blockmap_alloc().
1147 *
1148 * NOTE: bytes_free can be and remain negative due to de-dup ops
1149 * but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1150 */
1151 KKASSERT(layer2->zone == zone);
1152 temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1153 cpu_ccfence(); /* prevent gcc from optimizing temp out */
1154 if (temp > layer2->bytes_free) {
1155 error = ERANGE;
1156 goto underflow;
1157 }
1158 layer2->bytes_free -= bytes;
1159
1160 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1161
1162 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1163underflow:
1164 hammer_modify_buffer_done(buffer2);
1165 hammer_unlock(&hmp->blkmap_lock);
1166
1167failed:
1168 if (buffer1)
1169 hammer_rel_buffer(buffer1, 0);
1170 if (buffer2)
1171 hammer_rel_buffer(buffer2, 0);
1172 return (error);
1173}
1174
4a2796f3
MD
1175/*
1176 * Backend function - finalize (offset, bytes) in a zone.
1177 *
1178 * Allocate space that was previously reserved by the frontend.
1179 */
cdb6e4e6 1180int
4a2796f3 1181hammer_blockmap_finalize(hammer_transaction_t trans,
5e435c92 1182 hammer_reserve_t resv,
4a2796f3
MD
1183 hammer_off_t zone_offset, int bytes)
1184{
1185 hammer_mount_t hmp;
1186 hammer_volume_t root_volume;
4a2796f3
MD
1187 hammer_blockmap_t freemap;
1188 struct hammer_blockmap_layer1 *layer1;
1189 struct hammer_blockmap_layer2 *layer2;
1190 hammer_buffer_t buffer1 = NULL;
1191 hammer_buffer_t buffer2 = NULL;
1192 hammer_off_t layer1_offset;
1193 hammer_off_t layer2_offset;
1194 int error;
1195 int zone;
df301614 1196 int offset;
4a2796f3
MD
1197
1198 if (bytes == 0)
cdb6e4e6 1199 return(0);
4a2796f3
MD
1200 hmp = trans->hmp;
1201
1202 /*
1203 * Alignment
1204 */
1205 bytes = (bytes + 15) & ~15;
1206 KKASSERT(bytes <= HAMMER_XBUFSIZE);
1207
1208 /*
1209 * Basic zone validation & locking
1210 */
1211 zone = HAMMER_ZONE_DECODE(zone_offset);
1212 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1213 root_volume = trans->rootvol;
1214 error = 0;
4a2796f3 1215
4a2796f3
MD
1216 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1217
1218 /*
1219 * Dive layer 1.
1220 */
1221 layer1_offset = freemap->phys_offset +
1222 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1223 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
cdb6e4e6
MD
1224 if (error)
1225 goto failed;
4a2796f3
MD
1226 KKASSERT(layer1->phys_offset &&
1227 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1228 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
db9f9d7f
MD
1229 hammer_lock_ex(&hmp->blkmap_lock);
1230 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1231 panic("CRC FAILED: LAYER1");
1232 hammer_unlock(&hmp->blkmap_lock);
4a2796f3
MD
1233 }
1234
1235 /*
1236 * Dive layer 2, each entry represents a large-block.
1237 */
1238 layer2_offset = layer1->phys_offset +
1239 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1240 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
cdb6e4e6
MD
1241 if (error)
1242 goto failed;
4a2796f3 1243 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
db9f9d7f
MD
1244 hammer_lock_ex(&hmp->blkmap_lock);
1245 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1246 panic("CRC FAILED: LAYER2");
1247 hammer_unlock(&hmp->blkmap_lock);
4a2796f3
MD
1248 }
1249
df301614
MD
1250 hammer_lock_ex(&hmp->blkmap_lock);
1251
4a2796f3
MD
1252 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1253
1254 /*
1255 * Finalize some or all of the space covered by a current
1256 * reservation. An allocation in the same layer may have
1257 * already assigned ownership.
1258 */
1259 if (layer2->zone == 0) {
1260 hammer_modify_buffer(trans, buffer1,
1261 layer1, sizeof(*layer1));
1262 --layer1->blocks_free;
1263 layer1->layer1_crc = crc32(layer1,
1264 HAMMER_LAYER1_CRCSIZE);
1265 hammer_modify_buffer_done(buffer1);
1266 layer2->zone = zone;
1267 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1268 KKASSERT(layer2->append_off == 0);
1269 hammer_modify_volume_field(trans,
1270 trans->rootvol,
1271 vol0_stat_freebigblocks);
1272 --root_volume->ondisk->vol0_stat_freebigblocks;
1273 hmp->copy_stat_freebigblocks =
1274 root_volume->ondisk->vol0_stat_freebigblocks;
1275 hammer_modify_volume_done(trans->rootvol);
1276 }
1277 if (layer2->zone != zone)
1278 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1279 KKASSERT(layer2->zone == zone);
1ce12d35 1280 KKASSERT(bytes != 0);
4a2796f3 1281 layer2->bytes_free -= bytes;
507df98a
ID
1282
1283 if (resv) {
5e435c92 1284 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
507df98a 1285 }
4a2796f3
MD
1286
1287 /*
1288 * Finalizations can occur out of order, or combined with allocations.
1289 * append_off must be set to the highest allocated offset.
1290 */
df301614
MD
1291 offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1292 if (layer2->append_off < offset)
1293 layer2->append_off = offset;
4a2796f3 1294
19619882 1295 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
10a5d1ba 1296 hammer_modify_buffer_done(buffer2);
d99d6bf5 1297 hammer_unlock(&hmp->blkmap_lock);
f03c9cf4 1298
cdb6e4e6 1299failed:
f03c9cf4
MD
1300 if (buffer1)
1301 hammer_rel_buffer(buffer1, 0);
1302 if (buffer2)
1303 hammer_rel_buffer(buffer2, 0);
cdb6e4e6 1304 return(error);
40043e7f
MD
1305}
1306
1307/*
320a5c59
MD
1308 * Return the approximate number of free bytes in the big-block
1309 * containing the specified blockmap offset.
1310 *
1311 * WARNING: A negative number can be returned if data de-dup exists,
1312 * and the result will also not represent he actual number
1313 * of free bytes in this case.
1314 *
1315 * This code is used only by the reblocker.
bf686dbe
MD
1316 */
1317int
cb51be26 1318hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
bf686dbe
MD
1319 int *curp, int *errorp)
1320{
1321 hammer_volume_t root_volume;
cb51be26
MD
1322 hammer_blockmap_t blockmap;
1323 hammer_blockmap_t freemap;
bf686dbe
MD
1324 struct hammer_blockmap_layer1 *layer1;
1325 struct hammer_blockmap_layer2 *layer2;
1326 hammer_buffer_t buffer = NULL;
1327 hammer_off_t layer1_offset;
1328 hammer_off_t layer2_offset;
320a5c59 1329 int32_t bytes;
bf686dbe
MD
1330 int zone;
1331
cb51be26 1332 zone = HAMMER_ZONE_DECODE(zone_offset);
bf686dbe
MD
1333 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1334 root_volume = hammer_get_root_volume(hmp, errorp);
1335 if (*errorp) {
1336 *curp = 0;
1337 return(0);
1338 }
cb51be26
MD
1339 blockmap = &hmp->blockmap[zone];
1340 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
bf686dbe
MD
1341
1342 /*
1343 * Dive layer 1.
1344 */
cb51be26
MD
1345 layer1_offset = freemap->phys_offset +
1346 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
bf686dbe 1347 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
cdb6e4e6
MD
1348 if (*errorp) {
1349 bytes = 0;
1350 goto failed;
1351 }
bf686dbe 1352 KKASSERT(layer1->phys_offset);
19619882 1353 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
db9f9d7f
MD
1354 hammer_lock_ex(&hmp->blkmap_lock);
1355 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1356 panic("CRC FAILED: LAYER1");
1357 hammer_unlock(&hmp->blkmap_lock);
19619882 1358 }
bf686dbe
MD
1359
1360 /*
1361 * Dive layer 2, each entry represents a large-block.
cdb6e4e6
MD
1362 *
1363 * (reuse buffer, layer1 pointer becomes invalid)
bf686dbe
MD
1364 */
1365 layer2_offset = layer1->phys_offset +
cb51be26 1366 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
bf686dbe 1367 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
cdb6e4e6
MD
1368 if (*errorp) {
1369 bytes = 0;
1370 goto failed;
1371 }
19619882 1372 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
db9f9d7f
MD
1373 hammer_lock_ex(&hmp->blkmap_lock);
1374 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1375 panic("CRC FAILED: LAYER2");
1376 hammer_unlock(&hmp->blkmap_lock);
19619882 1377 }
cb51be26 1378 KKASSERT(layer2->zone == zone);
bf686dbe
MD
1379
1380 bytes = layer2->bytes_free;
1381
cb51be26 1382 if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
bf686dbe
MD
1383 *curp = 0;
1384 else
1385 *curp = 1;
cdb6e4e6 1386failed:
bf686dbe
MD
1387 if (buffer)
1388 hammer_rel_buffer(buffer, 0);
1389 hammer_rel_volume(root_volume, 0);
1390 if (hammer_debug_general & 0x0800) {
1391 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
973c11b9 1392 (long long)zone_offset, bytes);
bf686dbe
MD
1393 }
1394 return(bytes);
1395}
1396
1397
1398/*
40043e7f
MD
1399 * Lookup a blockmap offset.
1400 */
1401hammer_off_t
cb51be26
MD
1402hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1403 int *errorp)
40043e7f
MD
1404{
1405 hammer_volume_t root_volume;
cb51be26 1406 hammer_blockmap_t freemap;
c3be93f2
MD
1407 struct hammer_blockmap_layer1 *layer1;
1408 struct hammer_blockmap_layer2 *layer2;
40043e7f 1409 hammer_buffer_t buffer = NULL;
c3be93f2
MD
1410 hammer_off_t layer1_offset;
1411 hammer_off_t layer2_offset;
40043e7f 1412 hammer_off_t result_offset;
cb51be26
MD
1413 hammer_off_t base_off;
1414 hammer_reserve_t resv;
40043e7f 1415 int zone;
40043e7f 1416
cb51be26
MD
1417 /*
1418 * Calculate the zone-2 offset.
1419 */
1420 zone = HAMMER_ZONE_DECODE(zone_offset);
40043e7f 1421 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
cb51be26
MD
1422
1423 result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1424 HAMMER_ZONE_RAW_BUFFER;
1425
1426 /*
1427 * We can actually stop here, normal blockmaps are now direct-mapped
1428 * onto the freemap and so represent zone-2 addresses.
1429 */
1430 if (hammer_verify_zone == 0) {
1431 *errorp = 0;
1432 return(result_offset);
1433 }
1434
1435 /*
1436 * Validate the allocation zone
1437 */
40043e7f
MD
1438 root_volume = hammer_get_root_volume(hmp, errorp);
1439 if (*errorp)
1440 return(0);
cb51be26
MD
1441 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1442 KKASSERT(freemap->phys_offset != 0);
40043e7f
MD
1443
1444 /*
c3be93f2 1445 * Dive layer 1.
40043e7f 1446 */
cb51be26
MD
1447 layer1_offset = freemap->phys_offset +
1448 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
c3be93f2 1449 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
cdb6e4e6
MD
1450 if (*errorp)
1451 goto failed;
cb51be26 1452 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
19619882 1453 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
db9f9d7f
MD
1454 hammer_lock_ex(&hmp->blkmap_lock);
1455 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1456 panic("CRC FAILED: LAYER1");
1457 hammer_unlock(&hmp->blkmap_lock);
19619882 1458 }
40043e7f
MD
1459
1460 /*
c3be93f2 1461 * Dive layer 2, each entry represents a large-block.
40043e7f 1462 */
c3be93f2 1463 layer2_offset = layer1->phys_offset +
cb51be26 1464 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
c3be93f2 1465 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
40043e7f 1466
cdb6e4e6
MD
1467 if (*errorp)
1468 goto failed;
cb51be26
MD
1469 if (layer2->zone == 0) {
1470 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1471 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1472 base_off);
1473 KKASSERT(resv && resv->zone == zone);
1474
1475 } else if (layer2->zone != zone) {
1476 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1477 layer2->zone, zone);
1478 }
19619882 1479 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
db9f9d7f
MD
1480 hammer_lock_ex(&hmp->blkmap_lock);
1481 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1482 panic("CRC FAILED: LAYER2");
1483 hammer_unlock(&hmp->blkmap_lock);
19619882 1484 }
c3be93f2 1485
cdb6e4e6 1486failed:
40043e7f
MD
1487 if (buffer)
1488 hammer_rel_buffer(buffer, 0);
1489 hammer_rel_volume(root_volume, 0);
1490 if (hammer_debug_general & 0x0800) {
1491 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
973c11b9 1492 (long long)zone_offset, (long long)result_offset);
40043e7f
MD
1493 }
1494 return(result_offset);
1495}
1496
bf686dbe
MD
1497
1498/*
cb51be26 1499 * Check space availability
b0aab9b9
MD
1500 *
1501 * MPSAFE - does not require fs_token
bf686dbe 1502 */
cb51be26 1503int
0f65be10 1504_hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
bf686dbe 1505{
cb51be26
MD
1506 const int in_size = sizeof(struct hammer_inode_data) +
1507 sizeof(union hammer_btree_elm);
1508 const int rec_size = (sizeof(union hammer_btree_elm) * 2);
a7e9bef1 1509 int64_t usedbytes;
cb51be26 1510
a7e9bef1
MD
1511 usedbytes = hmp->rsv_inodes * in_size +
1512 hmp->rsv_recs * rec_size +
1513 hmp->rsv_databytes +
7b6ccb11
MD
1514 ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1515 ((int64_t)hidirtybufspace << 2) +
1516 (slop << HAMMER_LARGEBLOCK_BITS);
a7e9bef1 1517
7b6ccb11 1518 hammer_count_extra_space_used = usedbytes; /* debugging */
0f65be10
MD
1519 if (resp)
1520 *resp = usedbytes;
a7e9bef1 1521
7b6ccb11
MD
1522 if (hmp->copy_stat_freebigblocks >=
1523 (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
cb51be26 1524 return(0);
7b6ccb11 1525 }
cb51be26 1526 return (ENOSPC);
6f97fce3
MD
1527}
1528