e95ef465ac618524d6b768401cba313e54084f1e
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55              hammer_res_rb_compare, hammer_off_t, zone_offset);
56
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60         if (res1->zone_offset < res2->zone_offset)
61                 return(-1);
62         if (res1->zone_offset > res2->zone_offset)
63                 return(1);
64         return(0);
65 }
66
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72                       hammer_off_t hint, int *errorp)
73 {
74         hammer_mount_t hmp;
75         hammer_volume_t root_volume;
76         hammer_blockmap_t blockmap;
77         hammer_blockmap_t freemap;
78         hammer_reserve_t resv;
79         hammer_blockmap_layer1_t layer1;
80         hammer_blockmap_layer2_t layer2;
81         hammer_buffer_t buffer1 = NULL;
82         hammer_buffer_t buffer2 = NULL;
83         hammer_buffer_t buffer3 = NULL;
84         hammer_off_t tmp_offset;
85         hammer_off_t next_offset;
86         hammer_off_t result_offset;
87         hammer_off_t layer1_offset;
88         hammer_off_t layer2_offset;
89         hammer_off_t base_off;
90         int loops = 0;
91         int offset;             /* offset within big-block */
92         int use_hint;
93
94         hmp = trans->hmp;
95
96         /*
97          * Deal with alignment and buffer-boundary issues.
98          *
99          * Be careful, certain primary alignments are used below to allocate
100          * new blockmap blocks.
101          */
102         bytes = HAMMER_DATA_DOALIGN(bytes);
103         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104         KKASSERT(hammer_is_index_record(zone));
105
106         /*
107          * Setup
108          */
109         root_volume = trans->rootvol;
110         *errorp = 0;
111         blockmap = &hmp->blockmap[zone];
112         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114
115         /*
116          * Use the hint if we have one.
117          */
118         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119                 next_offset = HAMMER_DATA_DOALIGN_WITH(hammer_off_t, hint);
120                 use_hint = 1;
121         } else {
122                 next_offset = blockmap->next_offset;
123                 use_hint = 0;
124         }
125 again:
126
127         /*
128          * use_hint is turned off if we leave the hinted big-block.
129          */
130         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131                 next_offset = blockmap->next_offset;
132                 use_hint = 0;
133         }
134
135         /*
136          * Check for wrap
137          */
138         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139                 if (++loops == 2) {
140                         hmkprintf(hmp, "No space left for zone %d "
141                                 "allocation\n", zone);
142                         result_offset = 0;
143                         *errorp = ENOSPC;
144                         goto failed;
145                 }
146                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
147         }
148
149         /*
150          * The allocation request may not cross a buffer boundary.  Special
151          * large allocations must not cross a big-block boundary.
152          */
153         tmp_offset = next_offset + bytes - 1;
154         if (bytes <= HAMMER_BUFSIZE) {
155                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
156                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
157                         goto again;
158                 }
159         } else {
160                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
161                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
162                         goto again;
163                 }
164         }
165         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
166
167         /*
168          * Dive layer 1.
169          */
170         layer1_offset = freemap->phys_offset +
171                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
172
173         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
174         if (*errorp) {
175                 result_offset = 0;
176                 goto failed;
177         }
178
179         /*
180          * Check CRC.
181          */
182         if (!hammer_crc_test_layer1(layer1)) {
183                 hammer_lock_ex(&hmp->blkmap_lock);
184                 if (!hammer_crc_test_layer1(layer1))
185                         hpanic("CRC FAILED: LAYER1");
186                 hammer_unlock(&hmp->blkmap_lock);
187         }
188
189         /*
190          * If we are at a big-block boundary and layer1 indicates no
191          * free big-blocks, then we cannot allocate a new big-block in
192          * layer2, skip to the next layer1 entry.
193          */
194         if (offset == 0 && layer1->blocks_free == 0) {
195                 next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
196                 if (hammer_check_volume(hmp, &next_offset)) {
197                         result_offset = 0;
198                         goto failed;
199                 }
200                 goto again;
201         }
202         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
203
204         /*
205          * Skip the whole volume if it is pointing to a layer2 big-block
206          * on a volume that we are currently trying to remove from the
207          * file-system. This is used by the volume-del code together with
208          * the reblocker to free up a volume.
209          */
210         if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
211                 hammer_skip_volume(&next_offset);
212                 goto again;
213         }
214
215         /*
216          * Dive layer 2, each entry represents a big-block.
217          */
218         layer2_offset = layer1->phys_offset +
219                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
220         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
221         if (*errorp) {
222                 result_offset = 0;
223                 goto failed;
224         }
225
226         /*
227          * Check CRC.  This can race another thread holding the lock
228          * and in the middle of modifying layer2.
229          */
230         if (!hammer_crc_test_layer2(layer2)) {
231                 hammer_lock_ex(&hmp->blkmap_lock);
232                 if (!hammer_crc_test_layer2(layer2))
233                         hpanic("CRC FAILED: LAYER2");
234                 hammer_unlock(&hmp->blkmap_lock);
235         }
236
237         /*
238          * Skip the layer if the zone is owned by someone other then us.
239          */
240         if (layer2->zone && layer2->zone != zone) {
241                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
242                 goto again;
243         }
244         if (offset < layer2->append_off) {
245                 next_offset += layer2->append_off - offset;
246                 goto again;
247         }
248
249 #if 0
250         /*
251          * If operating in the current non-hint blockmap block, do not
252          * allow it to get over-full.  Also drop any active hinting so
253          * blockmap->next_offset is updated at the end.
254          *
255          * We do this for B-Tree and meta-data allocations to provide
256          * localization for updates.
257          */
258         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
259              zone == HAMMER_ZONE_META_INDEX) &&
260             offset >= HAMMER_BIGBLOCK_OVERFILL &&
261             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
262                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
263                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
264                         use_hint = 0;
265                         goto again;
266                 }
267         }
268 #endif
269
270         /*
271          * We need the lock from this point on.  We have to re-check zone
272          * ownership after acquiring the lock and also check for reservations.
273          */
274         hammer_lock_ex(&hmp->blkmap_lock);
275
276         if (layer2->zone && layer2->zone != zone) {
277                 hammer_unlock(&hmp->blkmap_lock);
278                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
279                 goto again;
280         }
281         if (offset < layer2->append_off) {
282                 hammer_unlock(&hmp->blkmap_lock);
283                 next_offset += layer2->append_off - offset;
284                 goto again;
285         }
286
287         /*
288          * The big-block might be reserved by another zone.  If it is reserved
289          * by our zone we may have to move next_offset past the append_off.
290          */
291         base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
292         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293         if (resv) {
294                 if (resv->zone != zone) {
295                         hammer_unlock(&hmp->blkmap_lock);
296                         next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
297                         goto again;
298                 }
299                 if (offset < resv->append_off) {
300                         hammer_unlock(&hmp->blkmap_lock);
301                         next_offset += resv->append_off - offset;
302                         goto again;
303                 }
304                 ++resv->refs;
305         }
306
307         /*
308          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
309          * of the layer for real.  At this point we've validated any
310          * reservation that might exist and can just ignore resv.
311          */
312         if (layer2->zone == 0) {
313                 /*
314                  * Assign the big-block to our zone
315                  */
316                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
317                 --layer1->blocks_free;
318                 hammer_crc_set_layer1(layer1);
319                 hammer_modify_buffer_done(buffer1);
320                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
321                 layer2->zone = zone;
322                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
323                 KKASSERT(layer2->append_off == 0);
324                 hammer_modify_volume_field(trans, trans->rootvol,
325                                            vol0_stat_freebigblocks);
326                 --root_volume->ondisk->vol0_stat_freebigblocks;
327                 hmp->copy_stat_freebigblocks =
328                         root_volume->ondisk->vol0_stat_freebigblocks;
329                 hammer_modify_volume_done(trans->rootvol);
330         } else {
331                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
332         }
333         KKASSERT(layer2->zone == zone);
334
335         /*
336          * NOTE: bytes_free can legally go negative due to de-dup.
337          */
338         layer2->bytes_free -= bytes;
339         KKASSERT(layer2->append_off <= offset);
340         layer2->append_off = offset + bytes;
341         hammer_crc_set_layer2(layer2);
342         hammer_modify_buffer_done(buffer2);
343
344         /*
345          * We hold the blockmap lock and should be the only ones
346          * capable of modifying resv->append_off.  Track the allocation
347          * as appropriate.
348          */
349         KKASSERT(bytes != 0);
350         if (resv) {
351                 KKASSERT(resv->append_off <= offset);
352                 resv->append_off = offset + bytes;
353                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354                 hammer_blockmap_reserve_complete(hmp, resv);
355         }
356
357         /*
358          * If we are allocating from the base of a new buffer we can avoid
359          * a disk read by calling hammer_bnew_ext().
360          */
361         if ((next_offset & HAMMER_BUFMASK) == 0) {
362                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
363                                 errorp, &buffer3);
364                 if (*errorp) {
365                         result_offset = 0;
366                         goto failed;
367                 }
368         }
369         result_offset = next_offset;
370
371         /*
372          * If we weren't supplied with a hint or could not use the hint
373          * then we wound up using blockmap->next_offset as the hint and
374          * need to save it.
375          */
376         if (use_hint == 0) {
377                 hammer_modify_volume_noundo(NULL, root_volume);
378                 blockmap->next_offset = next_offset + bytes;
379                 hammer_modify_volume_done(root_volume);
380         }
381         hammer_unlock(&hmp->blkmap_lock);
382 failed:
383
384         /*
385          * Cleanup
386          */
387         if (buffer1)
388                 hammer_rel_buffer(buffer1, 0);
389         if (buffer2)
390                 hammer_rel_buffer(buffer2, 0);
391         if (buffer3)
392                 hammer_rel_buffer(buffer3, 0);
393
394         return(result_offset);
395 }
396
397 /*
398  * Frontend function - Reserve bytes in a zone.
399  *
400  * This code reserves bytes out of a blockmap without committing to any
401  * meta-data modifications, allowing the front-end to directly issue disk
402  * write I/O for big-blocks of data
403  *
404  * The backend later finalizes the reservation with hammer_blockmap_finalize()
405  * upon committing the related record.
406  */
407 hammer_reserve_t
408 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
409                         hammer_off_t *zone_offp, int *errorp)
410 {
411         hammer_volume_t root_volume;
412         hammer_blockmap_t blockmap;
413         hammer_blockmap_t freemap;
414         hammer_blockmap_layer1_t layer1;
415         hammer_blockmap_layer2_t layer2;
416         hammer_buffer_t buffer1 = NULL;
417         hammer_buffer_t buffer2 = NULL;
418         hammer_buffer_t buffer3 = NULL;
419         hammer_off_t tmp_offset;
420         hammer_off_t next_offset;
421         hammer_off_t layer1_offset;
422         hammer_off_t layer2_offset;
423         hammer_off_t base_off;
424         hammer_reserve_t resv;
425         hammer_reserve_t resx = NULL;
426         int loops = 0;
427         int offset;
428
429         /*
430          * Setup
431          */
432         KKASSERT(hammer_is_index_record(zone));
433         root_volume = hammer_get_root_volume(hmp, errorp);
434         if (*errorp)
435                 return(NULL);
436         blockmap = &hmp->blockmap[zone];
437         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
438         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
439
440         /*
441          * Deal with alignment and buffer-boundary issues.
442          *
443          * Be careful, certain primary alignments are used below to allocate
444          * new blockmap blocks.
445          */
446         bytes = HAMMER_DATA_DOALIGN(bytes);
447         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
448
449         next_offset = blockmap->next_offset;
450 again:
451         resv = NULL;
452         /*
453          * Check for wrap
454          */
455         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
456                 if (++loops == 2) {
457                         hmkprintf(hmp, "No space left for zone %d "
458                                 "reservation\n", zone);
459                         *errorp = ENOSPC;
460                         goto failed;
461                 }
462                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
463         }
464
465         /*
466          * The allocation request may not cross a buffer boundary.  Special
467          * large allocations must not cross a big-block boundary.
468          */
469         tmp_offset = next_offset + bytes - 1;
470         if (bytes <= HAMMER_BUFSIZE) {
471                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
472                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
473                         goto again;
474                 }
475         } else {
476                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
477                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
478                         goto again;
479                 }
480         }
481         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
482
483         /*
484          * Dive layer 1.
485          */
486         layer1_offset = freemap->phys_offset +
487                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
488         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
489         if (*errorp)
490                 goto failed;
491
492         /*
493          * Check CRC.
494          */
495         if (!hammer_crc_test_layer1(layer1)) {
496                 hammer_lock_ex(&hmp->blkmap_lock);
497                 if (!hammer_crc_test_layer1(layer1))
498                         hpanic("CRC FAILED: LAYER1");
499                 hammer_unlock(&hmp->blkmap_lock);
500         }
501
502         /*
503          * If we are at a big-block boundary and layer1 indicates no
504          * free big-blocks, then we cannot allocate a new big-block in
505          * layer2, skip to the next layer1 entry.
506          */
507         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
508             layer1->blocks_free == 0) {
509                 next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
510                 if (hammer_check_volume(hmp, &next_offset))
511                         goto failed;
512                 goto again;
513         }
514         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
515
516         /*
517          * Dive layer 2, each entry represents a big-block.
518          */
519         layer2_offset = layer1->phys_offset +
520                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
521         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
522         if (*errorp)
523                 goto failed;
524
525         /*
526          * Check CRC if not allocating into uninitialized space (which we
527          * aren't when reserving space).
528          */
529         if (!hammer_crc_test_layer2(layer2)) {
530                 hammer_lock_ex(&hmp->blkmap_lock);
531                 if (!hammer_crc_test_layer2(layer2))
532                         hpanic("CRC FAILED: LAYER2");
533                 hammer_unlock(&hmp->blkmap_lock);
534         }
535
536         /*
537          * Skip the layer if the zone is owned by someone other then us.
538          */
539         if (layer2->zone && layer2->zone != zone) {
540                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
541                 goto again;
542         }
543         if (offset < layer2->append_off) {
544                 next_offset += layer2->append_off - offset;
545                 goto again;
546         }
547
548         /*
549          * We need the lock from this point on.  We have to re-check zone
550          * ownership after acquiring the lock and also check for reservations.
551          */
552         hammer_lock_ex(&hmp->blkmap_lock);
553
554         if (layer2->zone && layer2->zone != zone) {
555                 hammer_unlock(&hmp->blkmap_lock);
556                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
557                 goto again;
558         }
559         if (offset < layer2->append_off) {
560                 hammer_unlock(&hmp->blkmap_lock);
561                 next_offset += layer2->append_off - offset;
562                 goto again;
563         }
564
565         /*
566          * The big-block might be reserved by another zone.  If it is reserved
567          * by our zone we may have to move next_offset past the append_off.
568          */
569         base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
570         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
571         if (resv) {
572                 if (resv->zone != zone) {
573                         hammer_unlock(&hmp->blkmap_lock);
574                         next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
575                         goto again;
576                 }
577                 if (offset < resv->append_off) {
578                         hammer_unlock(&hmp->blkmap_lock);
579                         next_offset += resv->append_off - offset;
580                         goto again;
581                 }
582                 ++resv->refs;
583         } else {
584                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
585                                M_WAITOK | M_ZERO | M_USE_RESERVE);
586                 resx->refs = 1;
587                 resx->zone = zone;
588                 resx->zone_offset = base_off;
589                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
590                         resx->flags |= HAMMER_RESF_LAYER2FREE;
591                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
592                 KKASSERT(resv == NULL);
593                 resv = resx;
594                 ++hammer_count_reservations;
595         }
596         resv->append_off = offset + bytes;
597
598         /*
599          * If we are not reserving a whole buffer but are at the start of
600          * a new block, call hammer_bnew() to avoid a disk read.
601          *
602          * If we are reserving a whole buffer (or more), the caller will
603          * probably use a direct read, so do nothing.
604          *
605          * If we do not have a whole lot of system memory we really can't
606          * afford to block while holding the blkmap_lock!
607          */
608         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
609                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
610                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
611                         if (*errorp)
612                                 goto failed;
613                 }
614         }
615
616         blockmap->next_offset = next_offset + bytes;
617         hammer_unlock(&hmp->blkmap_lock);
618
619 failed:
620         if (buffer1)
621                 hammer_rel_buffer(buffer1, 0);
622         if (buffer2)
623                 hammer_rel_buffer(buffer2, 0);
624         if (buffer3)
625                 hammer_rel_buffer(buffer3, 0);
626         hammer_rel_volume(root_volume, 0);
627         *zone_offp = next_offset;
628
629         return(resv);
630 }
631
632 /*
633  * Frontend function - Dedup bytes in a zone.
634  *
635  * Dedup reservations work exactly the same as normal write reservations
636  * except we only adjust bytes_free field and don't touch append offset.
637  * Finalization mechanic for dedup reservations is also the same as for
638  * normal write ones - the backend finalizes the reservation with
639  * hammer_blockmap_finalize().
640  */
641 hammer_reserve_t
642 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
643                               hammer_off_t zone_offset, int *errorp)
644 {
645         hammer_volume_t root_volume;
646         hammer_blockmap_t freemap;
647         hammer_blockmap_layer1_t layer1;
648         hammer_blockmap_layer2_t layer2;
649         hammer_buffer_t buffer1 = NULL;
650         hammer_buffer_t buffer2 = NULL;
651         hammer_off_t layer1_offset;
652         hammer_off_t layer2_offset;
653         hammer_off_t base_off;
654         hammer_reserve_t resv = NULL;
655         hammer_reserve_t resx = NULL;
656
657         /*
658          * Setup
659          */
660         KKASSERT(hammer_is_index_record(zone));
661         root_volume = hammer_get_root_volume(hmp, errorp);
662         if (*errorp)
663                 return (NULL);
664         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
665         KKASSERT(freemap->phys_offset != 0);
666
667         bytes = HAMMER_DATA_DOALIGN(bytes);
668         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
669
670         /*
671          * Dive layer 1.
672          */
673         layer1_offset = freemap->phys_offset +
674                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
675         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
676         if (*errorp)
677                 goto failed;
678
679         /*
680          * Check CRC.
681          */
682         if (!hammer_crc_test_layer1(layer1)) {
683                 hammer_lock_ex(&hmp->blkmap_lock);
684                 if (!hammer_crc_test_layer1(layer1))
685                         hpanic("CRC FAILED: LAYER1");
686                 hammer_unlock(&hmp->blkmap_lock);
687         }
688         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
689
690         /*
691          * Dive layer 2, each entry represents a big-block.
692          */
693         layer2_offset = layer1->phys_offset +
694                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
695         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
696         if (*errorp)
697                 goto failed;
698
699         /*
700          * Check CRC.
701          */
702         if (!hammer_crc_test_layer2(layer2)) {
703                 hammer_lock_ex(&hmp->blkmap_lock);
704                 if (!hammer_crc_test_layer2(layer2))
705                         hpanic("CRC FAILED: LAYER2");
706                 hammer_unlock(&hmp->blkmap_lock);
707         }
708
709         /*
710          * Fail if the zone is owned by someone other than us.
711          */
712         if (layer2->zone && layer2->zone != zone)
713                 goto failed;
714
715         /*
716          * We need the lock from this point on.  We have to re-check zone
717          * ownership after acquiring the lock and also check for reservations.
718          */
719         hammer_lock_ex(&hmp->blkmap_lock);
720
721         if (layer2->zone && layer2->zone != zone) {
722                 hammer_unlock(&hmp->blkmap_lock);
723                 goto failed;
724         }
725
726         base_off = hammer_xlate_to_zone2(zone_offset & ~HAMMER_BIGBLOCK_MASK64);
727         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
728         if (resv) {
729                 if (resv->zone != zone) {
730                         hammer_unlock(&hmp->blkmap_lock);
731                         resv = NULL;
732                         goto failed;
733                 }
734                 /*
735                  * Due to possible big-block underflow we can't simply
736                  * subtract bytes from bytes_free.
737                  */
738                 if (update_bytes_free(resv, bytes) == 0) {
739                         hammer_unlock(&hmp->blkmap_lock);
740                         resv = NULL;
741                         goto failed;
742                 }
743                 ++resv->refs;
744         } else {
745                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
746                                M_WAITOK | M_ZERO | M_USE_RESERVE);
747                 resx->refs = 1;
748                 resx->zone = zone;
749                 resx->bytes_free = layer2->bytes_free;
750                 /*
751                  * Due to possible big-block underflow we can't simply
752                  * subtract bytes from bytes_free.
753                  */
754                 if (update_bytes_free(resx, bytes) == 0) {
755                         hammer_unlock(&hmp->blkmap_lock);
756                         kfree(resx, hmp->m_misc);
757                         goto failed;
758                 }
759                 resx->zone_offset = base_off;
760                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
761                 KKASSERT(resv == NULL);
762                 resv = resx;
763                 ++hammer_count_reservations;
764         }
765
766         hammer_unlock(&hmp->blkmap_lock);
767
768 failed:
769         if (buffer1)
770                 hammer_rel_buffer(buffer1, 0);
771         if (buffer2)
772                 hammer_rel_buffer(buffer2, 0);
773         hammer_rel_volume(root_volume, 0);
774
775         return(resv);
776 }
777
778 static int
779 update_bytes_free(hammer_reserve_t resv, int bytes)
780 {
781         int32_t temp;
782
783         /*
784          * Big-block underflow check
785          */
786         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
787         cpu_ccfence(); /* XXX do we really need it ? */
788         if (temp > resv->bytes_free) {
789                 hdkprintf("BIGBLOCK UNDERFLOW\n");
790                 return (0);
791         }
792
793         resv->bytes_free -= bytes;
794         return (1);
795 }
796
797 /*
798  * Dereference a reservation structure.  Upon the final release the
799  * underlying big-block is checked and if it is entirely free we delete
800  * any related HAMMER buffers to avoid potential conflicts with future
801  * reuse of the big-block.
802  */
803 void
804 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
805 {
806         hammer_off_t base_offset;
807         int error;
808
809         KKASSERT(resv->refs > 0);
810         KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
811
812         /*
813          * Setting append_off to the max prevents any new allocations
814          * from occuring while we are trying to dispose of the reservation,
815          * allowing us to safely delete any related HAMMER buffers.
816          *
817          * If we are unable to clean out all related HAMMER buffers we
818          * requeue the delay.
819          */
820         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
821                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
822                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
823                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
824                         hammer_dedup_cache_inval(hmp, base_offset);
825                 error = hammer_del_buffers(hmp, base_offset,
826                                            resv->zone_offset,
827                                            HAMMER_BIGBLOCK_SIZE,
828                                            1);
829                 if (hammer_debug_general & 0x20000) {
830                         hkprintf("delbgblk %016jx error %d\n",
831                                 (intmax_t)base_offset, error);
832                 }
833                 if (error)
834                         hammer_reserve_setdelay(hmp, resv);
835         }
836         if (--resv->refs == 0) {
837                 if (hammer_debug_general & 0x20000) {
838                         hkprintf("delresvr %016jx zone %02x\n",
839                                 (intmax_t)resv->zone_offset, resv->zone);
840                 }
841                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
842                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
843                 kfree(resv, hmp->m_misc);
844                 --hammer_count_reservations;
845         }
846 }
847
848 /*
849  * Prevent a potentially free big-block from being reused until after
850  * the related flushes have completely cycled, otherwise crash recovery
851  * could resurrect a data block that was already reused and overwritten.
852  *
853  * The caller might reset the underlying layer2 entry's append_off to 0, so
854  * our covering append_off must be set to max to prevent any reallocation
855  * until after the flush delays complete, not to mention proper invalidation
856  * of any underlying cached blocks.
857  */
858 static void
859 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
860                         int zone, hammer_blockmap_layer2_t layer2)
861 {
862         hammer_reserve_t resv;
863
864         /*
865          * Allocate the reservation if necessary.
866          *
867          * NOTE: need lock in future around resv lookup/allocation and
868          * the setdelay call, currently refs is not bumped until the call.
869          */
870 again:
871         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
872         if (resv == NULL) {
873                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
874                                M_WAITOK | M_ZERO | M_USE_RESERVE);
875                 resv->zone = zone;
876                 resv->zone_offset = base_offset;
877                 resv->refs = 0;
878                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
879
880                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
881                         resv->flags |= HAMMER_RESF_LAYER2FREE;
882                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
883                         kfree(resv, hmp->m_misc);
884                         goto again;
885                 }
886                 ++hammer_count_reservations;
887         } else {
888                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
889                         resv->flags |= HAMMER_RESF_LAYER2FREE;
890         }
891         hammer_reserve_setdelay(hmp, resv);
892 }
893
894 /*
895  * Enter the reservation on the on-delay list, or move it if it
896  * is already on the list.
897  */
898 static void
899 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
900 {
901         if (resv->flags & HAMMER_RESF_ONDELAY) {
902                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
903                 resv->flg_no = hmp->flusher.next + 1;
904                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
905         } else {
906                 ++resv->refs;
907                 ++hmp->rsv_fromdelay;
908                 resv->flags |= HAMMER_RESF_ONDELAY;
909                 resv->flg_no = hmp->flusher.next + 1;
910                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
911         }
912 }
913
914 /*
915  * Reserve has reached its flush point, remove it from the delay list
916  * and finish it off.  hammer_blockmap_reserve_complete() inherits
917  * the ondelay reference.
918  */
919 void
920 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
921 {
922         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
923         resv->flags &= ~HAMMER_RESF_ONDELAY;
924         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
925         --hmp->rsv_fromdelay;
926         hammer_blockmap_reserve_complete(hmp, resv);
927 }
928
929 /*
930  * Backend function - free (offset, bytes) in a zone.
931  *
932  * XXX error return
933  */
934 void
935 hammer_blockmap_free(hammer_transaction_t trans,
936                      hammer_off_t zone_offset, int bytes)
937 {
938         hammer_mount_t hmp;
939         hammer_volume_t root_volume;
940         hammer_blockmap_t freemap;
941         hammer_blockmap_layer1_t layer1;
942         hammer_blockmap_layer2_t layer2;
943         hammer_buffer_t buffer1 = NULL;
944         hammer_buffer_t buffer2 = NULL;
945         hammer_off_t layer1_offset;
946         hammer_off_t layer2_offset;
947         hammer_off_t base_off;
948         int error;
949         int zone;
950
951         if (bytes == 0)
952                 return;
953         hmp = trans->hmp;
954
955         /*
956          * Alignment
957          */
958         bytes = HAMMER_DATA_DOALIGN(bytes);
959         KKASSERT(bytes <= HAMMER_XBUFSIZE);
960         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
961                   ~HAMMER_BIGBLOCK_MASK64) == 0);
962
963         /*
964          * Basic zone validation & locking
965          */
966         zone = HAMMER_ZONE_DECODE(zone_offset);
967         KKASSERT(hammer_is_index_record(zone));
968         root_volume = trans->rootvol;
969         error = 0;
970
971         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
972
973         /*
974          * Dive layer 1.
975          */
976         layer1_offset = freemap->phys_offset +
977                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
978         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
979         if (error)
980                 goto failed;
981         KKASSERT(layer1->phys_offset &&
982                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
983         if (!hammer_crc_test_layer1(layer1)) {
984                 hammer_lock_ex(&hmp->blkmap_lock);
985                 if (!hammer_crc_test_layer1(layer1))
986                         hpanic("CRC FAILED: LAYER1");
987                 hammer_unlock(&hmp->blkmap_lock);
988         }
989
990         /*
991          * Dive layer 2, each entry represents a big-block.
992          */
993         layer2_offset = layer1->phys_offset +
994                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
995         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
996         if (error)
997                 goto failed;
998         if (!hammer_crc_test_layer2(layer2)) {
999                 hammer_lock_ex(&hmp->blkmap_lock);
1000                 if (!hammer_crc_test_layer2(layer2))
1001                         hpanic("CRC FAILED: LAYER2");
1002                 hammer_unlock(&hmp->blkmap_lock);
1003         }
1004
1005         hammer_lock_ex(&hmp->blkmap_lock);
1006
1007         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1008
1009         /*
1010          * Free space previously allocated via blockmap_alloc().
1011          *
1012          * NOTE: bytes_free can be and remain negative due to de-dup ops
1013          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1014          */
1015         KKASSERT(layer2->zone == zone);
1016         layer2->bytes_free += bytes;
1017         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1018
1019         /*
1020          * If a big-block becomes entirely free we must create a covering
1021          * reservation to prevent premature reuse.  Note, however, that
1022          * the big-block and/or reservation may still have an append_off
1023          * that allows further (non-reused) allocations.
1024          *
1025          * Once the reservation has been made we re-check layer2 and if
1026          * the big-block is still entirely free we reset the layer2 entry.
1027          * The reservation will prevent premature reuse.
1028          *
1029          * NOTE: hammer_buffer's are only invalidated when the reservation
1030          * is completed, if the layer2 entry is still completely free at
1031          * that time.  Any allocations from the reservation that may have
1032          * occured in the mean time, or active references on the reservation
1033          * from new pending allocations, will prevent the invalidation from
1034          * occuring.
1035          */
1036         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1037                 base_off = hammer_xlate_to_zone2(zone_offset &
1038                                                 ~HAMMER_BIGBLOCK_MASK64);
1039
1040                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1041                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1042                         layer2->zone = 0;
1043                         layer2->append_off = 0;
1044                         hammer_modify_buffer(trans, buffer1,
1045                                              layer1, sizeof(*layer1));
1046                         ++layer1->blocks_free;
1047                         hammer_crc_set_layer1(layer1);
1048                         hammer_modify_buffer_done(buffer1);
1049                         hammer_modify_volume_field(trans,
1050                                         trans->rootvol,
1051                                         vol0_stat_freebigblocks);
1052                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1053                         hmp->copy_stat_freebigblocks =
1054                            root_volume->ondisk->vol0_stat_freebigblocks;
1055                         hammer_modify_volume_done(trans->rootvol);
1056                 }
1057         }
1058         hammer_crc_set_layer2(layer2);
1059         hammer_modify_buffer_done(buffer2);
1060         hammer_unlock(&hmp->blkmap_lock);
1061
1062 failed:
1063         if (buffer1)
1064                 hammer_rel_buffer(buffer1, 0);
1065         if (buffer2)
1066                 hammer_rel_buffer(buffer2, 0);
1067 }
1068
1069 int
1070 hammer_blockmap_dedup(hammer_transaction_t trans,
1071                      hammer_off_t zone_offset, int bytes)
1072 {
1073         hammer_mount_t hmp;
1074         hammer_blockmap_t freemap;
1075         hammer_blockmap_layer1_t layer1;
1076         hammer_blockmap_layer2_t layer2;
1077         hammer_buffer_t buffer1 = NULL;
1078         hammer_buffer_t buffer2 = NULL;
1079         hammer_off_t layer1_offset;
1080         hammer_off_t layer2_offset;
1081         int32_t temp;
1082         int error;
1083         int zone __debugvar;
1084
1085         if (bytes == 0)
1086                 return (0);
1087         hmp = trans->hmp;
1088
1089         /*
1090          * Alignment
1091          */
1092         bytes = HAMMER_DATA_DOALIGN(bytes);
1093         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1094         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1095                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1096
1097         /*
1098          * Basic zone validation & locking
1099          */
1100         zone = HAMMER_ZONE_DECODE(zone_offset);
1101         KKASSERT(hammer_is_index_record(zone));
1102         error = 0;
1103
1104         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1105
1106         /*
1107          * Dive layer 1.
1108          */
1109         layer1_offset = freemap->phys_offset +
1110                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1111         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1112         if (error)
1113                 goto failed;
1114         KKASSERT(layer1->phys_offset &&
1115                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1116         if (!hammer_crc_test_layer1(layer1)) {
1117                 hammer_lock_ex(&hmp->blkmap_lock);
1118                 if (!hammer_crc_test_layer1(layer1))
1119                         hpanic("CRC FAILED: LAYER1");
1120                 hammer_unlock(&hmp->blkmap_lock);
1121         }
1122
1123         /*
1124          * Dive layer 2, each entry represents a big-block.
1125          */
1126         layer2_offset = layer1->phys_offset +
1127                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1128         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1129         if (error)
1130                 goto failed;
1131         if (!hammer_crc_test_layer2(layer2)) {
1132                 hammer_lock_ex(&hmp->blkmap_lock);
1133                 if (!hammer_crc_test_layer2(layer2))
1134                         hpanic("CRC FAILED: LAYER2");
1135                 hammer_unlock(&hmp->blkmap_lock);
1136         }
1137
1138         hammer_lock_ex(&hmp->blkmap_lock);
1139
1140         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1141
1142         /*
1143          * Free space previously allocated via blockmap_alloc().
1144          *
1145          * NOTE: bytes_free can be and remain negative due to de-dup ops
1146          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1147          */
1148         KKASSERT(layer2->zone == zone);
1149         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1150         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1151         if (temp > layer2->bytes_free) {
1152                 error = ERANGE;
1153                 goto underflow;
1154         }
1155         layer2->bytes_free -= bytes;
1156
1157         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1158
1159         hammer_crc_set_layer2(layer2);
1160 underflow:
1161         hammer_modify_buffer_done(buffer2);
1162         hammer_unlock(&hmp->blkmap_lock);
1163
1164 failed:
1165         if (buffer1)
1166                 hammer_rel_buffer(buffer1, 0);
1167         if (buffer2)
1168                 hammer_rel_buffer(buffer2, 0);
1169         return (error);
1170 }
1171
1172 /*
1173  * Backend function - finalize (offset, bytes) in a zone.
1174  *
1175  * Allocate space that was previously reserved by the frontend.
1176  */
1177 int
1178 hammer_blockmap_finalize(hammer_transaction_t trans,
1179                          hammer_reserve_t resv,
1180                          hammer_off_t zone_offset, int bytes)
1181 {
1182         hammer_mount_t hmp;
1183         hammer_volume_t root_volume;
1184         hammer_blockmap_t freemap;
1185         hammer_blockmap_layer1_t layer1;
1186         hammer_blockmap_layer2_t layer2;
1187         hammer_buffer_t buffer1 = NULL;
1188         hammer_buffer_t buffer2 = NULL;
1189         hammer_off_t layer1_offset;
1190         hammer_off_t layer2_offset;
1191         int error;
1192         int zone;
1193         int offset;
1194
1195         if (bytes == 0)
1196                 return(0);
1197         hmp = trans->hmp;
1198
1199         /*
1200          * Alignment
1201          */
1202         bytes = HAMMER_DATA_DOALIGN(bytes);
1203         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1204
1205         /*
1206          * Basic zone validation & locking
1207          */
1208         zone = HAMMER_ZONE_DECODE(zone_offset);
1209         KKASSERT(hammer_is_index_record(zone));
1210         root_volume = trans->rootvol;
1211         error = 0;
1212
1213         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1214
1215         /*
1216          * Dive layer 1.
1217          */
1218         layer1_offset = freemap->phys_offset +
1219                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1220         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1221         if (error)
1222                 goto failed;
1223         KKASSERT(layer1->phys_offset &&
1224                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1225         if (!hammer_crc_test_layer1(layer1)) {
1226                 hammer_lock_ex(&hmp->blkmap_lock);
1227                 if (!hammer_crc_test_layer1(layer1))
1228                         hpanic("CRC FAILED: LAYER1");
1229                 hammer_unlock(&hmp->blkmap_lock);
1230         }
1231
1232         /*
1233          * Dive layer 2, each entry represents a big-block.
1234          */
1235         layer2_offset = layer1->phys_offset +
1236                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1237         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1238         if (error)
1239                 goto failed;
1240         if (!hammer_crc_test_layer2(layer2)) {
1241                 hammer_lock_ex(&hmp->blkmap_lock);
1242                 if (!hammer_crc_test_layer2(layer2))
1243                         hpanic("CRC FAILED: LAYER2");
1244                 hammer_unlock(&hmp->blkmap_lock);
1245         }
1246
1247         hammer_lock_ex(&hmp->blkmap_lock);
1248
1249         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1250
1251         /*
1252          * Finalize some or all of the space covered by a current
1253          * reservation.  An allocation in the same layer may have
1254          * already assigned ownership.
1255          */
1256         if (layer2->zone == 0) {
1257                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1258                 --layer1->blocks_free;
1259                 hammer_crc_set_layer1(layer1);
1260                 hammer_modify_buffer_done(buffer1);
1261                 layer2->zone = zone;
1262                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1263                 KKASSERT(layer2->append_off == 0);
1264                 hammer_modify_volume_field(trans,
1265                                 trans->rootvol,
1266                                 vol0_stat_freebigblocks);
1267                 --root_volume->ondisk->vol0_stat_freebigblocks;
1268                 hmp->copy_stat_freebigblocks =
1269                    root_volume->ondisk->vol0_stat_freebigblocks;
1270                 hammer_modify_volume_done(trans->rootvol);
1271         }
1272         if (layer2->zone != zone)
1273                 hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1274         KKASSERT(layer2->zone == zone);
1275         KKASSERT(bytes != 0);
1276         layer2->bytes_free -= bytes;
1277
1278         if (resv) {
1279                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1280         }
1281
1282         /*
1283          * Finalizations can occur out of order, or combined with allocations.
1284          * append_off must be set to the highest allocated offset.
1285          */
1286         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1287         if (layer2->append_off < offset)
1288                 layer2->append_off = offset;
1289
1290         hammer_crc_set_layer2(layer2);
1291         hammer_modify_buffer_done(buffer2);
1292         hammer_unlock(&hmp->blkmap_lock);
1293
1294 failed:
1295         if (buffer1)
1296                 hammer_rel_buffer(buffer1, 0);
1297         if (buffer2)
1298                 hammer_rel_buffer(buffer2, 0);
1299         return(error);
1300 }
1301
1302 /*
1303  * Return the approximate number of free bytes in the big-block
1304  * containing the specified blockmap offset.
1305  *
1306  * WARNING: A negative number can be returned if data de-dup exists,
1307  *          and the result will also not represent he actual number
1308  *          of free bytes in this case.
1309  *
1310  *          This code is used only by the reblocker.
1311  */
1312 int
1313 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1314                         int *curp, int *errorp)
1315 {
1316         hammer_volume_t root_volume;
1317         hammer_blockmap_t blockmap;
1318         hammer_blockmap_t freemap;
1319         hammer_blockmap_layer1_t layer1;
1320         hammer_blockmap_layer2_t layer2;
1321         hammer_buffer_t buffer = NULL;
1322         hammer_off_t layer1_offset;
1323         hammer_off_t layer2_offset;
1324         int32_t bytes;
1325         int zone;
1326
1327         zone = HAMMER_ZONE_DECODE(zone_offset);
1328         KKASSERT(hammer_is_index_record(zone));
1329         root_volume = hammer_get_root_volume(hmp, errorp);
1330         if (*errorp) {
1331                 *curp = 0;
1332                 return(0);
1333         }
1334         blockmap = &hmp->blockmap[zone];
1335         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1336
1337         /*
1338          * Dive layer 1.
1339          */
1340         layer1_offset = freemap->phys_offset +
1341                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1342         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1343         if (*errorp) {
1344                 *curp = 0;
1345                 bytes = 0;
1346                 goto failed;
1347         }
1348         KKASSERT(layer1->phys_offset);
1349         if (!hammer_crc_test_layer1(layer1)) {
1350                 hammer_lock_ex(&hmp->blkmap_lock);
1351                 if (!hammer_crc_test_layer1(layer1))
1352                         hpanic("CRC FAILED: LAYER1");
1353                 hammer_unlock(&hmp->blkmap_lock);
1354         }
1355
1356         /*
1357          * Dive layer 2, each entry represents a big-block.
1358          *
1359          * (reuse buffer, layer1 pointer becomes invalid)
1360          */
1361         layer2_offset = layer1->phys_offset +
1362                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1363         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1364         if (*errorp) {
1365                 *curp = 0;
1366                 bytes = 0;
1367                 goto failed;
1368         }
1369         if (!hammer_crc_test_layer2(layer2)) {
1370                 hammer_lock_ex(&hmp->blkmap_lock);
1371                 if (!hammer_crc_test_layer2(layer2))
1372                         hpanic("CRC FAILED: LAYER2");
1373                 hammer_unlock(&hmp->blkmap_lock);
1374         }
1375         KKASSERT(layer2->zone == zone);
1376
1377         bytes = layer2->bytes_free;
1378
1379         /*
1380          * *curp becomes 1 only when no error and,
1381          * next_offset and zone_offset are in the same big-block.
1382          */
1383         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1384                 *curp = 0;  /* not same */
1385         else
1386                 *curp = 1;
1387 failed:
1388         if (buffer)
1389                 hammer_rel_buffer(buffer, 0);
1390         hammer_rel_volume(root_volume, 0);
1391         if (hammer_debug_general & 0x4000) {
1392                 hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1393         }
1394         return(bytes);
1395 }
1396
1397
1398 /*
1399  * Lookup a blockmap offset and verify blockmap layers.
1400  */
1401 hammer_off_t
1402 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1403                         int *errorp)
1404 {
1405         hammer_volume_t root_volume;
1406         hammer_blockmap_t freemap;
1407         hammer_blockmap_layer1_t layer1;
1408         hammer_blockmap_layer2_t layer2;
1409         hammer_buffer_t buffer = NULL;
1410         hammer_off_t layer1_offset;
1411         hammer_off_t layer2_offset;
1412         hammer_off_t result_offset;
1413         hammer_off_t base_off;
1414         hammer_reserve_t resv __debugvar;
1415         int zone;
1416
1417         /*
1418          * Calculate the zone-2 offset.
1419          */
1420         zone = HAMMER_ZONE_DECODE(zone_offset);
1421         result_offset = hammer_xlate_to_zone2(zone_offset);
1422
1423         /*
1424          * Validate the allocation zone
1425          */
1426         root_volume = hammer_get_root_volume(hmp, errorp);
1427         if (*errorp)
1428                 return(0);
1429         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1430         KKASSERT(freemap->phys_offset != 0);
1431
1432         /*
1433          * Dive layer 1.
1434          */
1435         layer1_offset = freemap->phys_offset +
1436                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1437         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1438         if (*errorp)
1439                 goto failed;
1440         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1441         if (!hammer_crc_test_layer1(layer1)) {
1442                 hammer_lock_ex(&hmp->blkmap_lock);
1443                 if (!hammer_crc_test_layer1(layer1))
1444                         hpanic("CRC FAILED: LAYER1");
1445                 hammer_unlock(&hmp->blkmap_lock);
1446         }
1447
1448         /*
1449          * Dive layer 2, each entry represents a big-block.
1450          */
1451         layer2_offset = layer1->phys_offset +
1452                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1453         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1454
1455         if (*errorp)
1456                 goto failed;
1457         if (layer2->zone == 0) {
1458                 base_off = hammer_xlate_to_zone2(zone_offset &
1459                                                 ~HAMMER_BIGBLOCK_MASK64);
1460                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1461                                  base_off);
1462                 KKASSERT(resv && resv->zone == zone);
1463
1464         } else if (layer2->zone != zone) {
1465                 hpanic("bad zone %d/%d", layer2->zone, zone);
1466         }
1467         if (!hammer_crc_test_layer2(layer2)) {
1468                 hammer_lock_ex(&hmp->blkmap_lock);
1469                 if (!hammer_crc_test_layer2(layer2))
1470                         hpanic("CRC FAILED: LAYER2");
1471                 hammer_unlock(&hmp->blkmap_lock);
1472         }
1473
1474 failed:
1475         if (buffer)
1476                 hammer_rel_buffer(buffer, 0);
1477         hammer_rel_volume(root_volume, 0);
1478         if (hammer_debug_general & 0x0800) {
1479                 hdkprintf("%016jx -> %016jx\n",
1480                         (intmax_t)zone_offset, (intmax_t)result_offset);
1481         }
1482         return(result_offset);
1483 }
1484
1485
1486 /*
1487  * Check space availability
1488  *
1489  * MPSAFE - does not require fs_token
1490  */
1491 int
1492 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1493 {
1494         const int in_size = sizeof(struct hammer_inode_data) +
1495                             sizeof(union hammer_btree_elm);
1496         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1497         int64_t usedbytes;
1498
1499         usedbytes = hmp->rsv_inodes * in_size +
1500                     hmp->rsv_recs * rec_size +
1501                     hmp->rsv_databytes +
1502                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1503                     ((int64_t)hammer_limit_dirtybufspace) +
1504                     (slop << HAMMER_BIGBLOCK_BITS);
1505
1506         if (resp)
1507                 *resp = usedbytes;
1508
1509         if (hmp->copy_stat_freebigblocks >=
1510             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1511                 return(0);
1512         }
1513
1514         return (ENOSPC);
1515 }
1516
1517 static int
1518 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1519 {
1520         hammer_blockmap_t freemap;
1521         hammer_blockmap_layer1_t layer1;
1522         hammer_buffer_t buffer1 = NULL;
1523         hammer_off_t layer1_offset;
1524         int error = 0;
1525
1526         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1527
1528         layer1_offset = freemap->phys_offset +
1529                         HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1530         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1531         if (error)
1532                 goto end;
1533
1534         /*
1535          * No more physically available space in layer1s
1536          * of the current volume, go to the next volume.
1537          */
1538         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1539                 hammer_skip_volume(offsetp);
1540 end:
1541         if (buffer1)
1542                 hammer_rel_buffer(buffer1, 0);
1543         return(error);
1544 }
1545
1546 static void
1547 hammer_skip_volume(hammer_off_t *offsetp)
1548 {
1549         hammer_off_t offset;
1550         int zone, vol_no;
1551
1552         offset = *offsetp;
1553         zone = HAMMER_ZONE_DECODE(offset);
1554         vol_no = HAMMER_VOL_DECODE(offset) + 1;
1555         KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1556
1557         if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1558                 vol_no = 0;
1559                 ++zone;
1560         }
1561
1562         *offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1563 }