sys/vfs/hammer: Add HAMMER_ZONE_LAYER1_NEXT_OFFSET()
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55              hammer_res_rb_compare, hammer_off_t, zone_offset);
56
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60         if (res1->zone_offset < res2->zone_offset)
61                 return(-1);
62         if (res1->zone_offset > res2->zone_offset)
63                 return(1);
64         return(0);
65 }
66
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72                       hammer_off_t hint, int *errorp)
73 {
74         hammer_mount_t hmp;
75         hammer_volume_t root_volume;
76         hammer_blockmap_t blockmap;
77         hammer_blockmap_t freemap;
78         hammer_reserve_t resv;
79         hammer_blockmap_layer1_t layer1;
80         hammer_blockmap_layer2_t layer2;
81         hammer_buffer_t buffer1 = NULL;
82         hammer_buffer_t buffer2 = NULL;
83         hammer_buffer_t buffer3 = NULL;
84         hammer_off_t tmp_offset;
85         hammer_off_t next_offset;
86         hammer_off_t result_offset;
87         hammer_off_t layer1_offset;
88         hammer_off_t layer2_offset;
89         hammer_off_t base_off;
90         int loops = 0;
91         int offset;             /* offset within big-block */
92         int use_hint;
93
94         hmp = trans->hmp;
95
96         /*
97          * Deal with alignment and buffer-boundary issues.
98          *
99          * Be careful, certain primary alignments are used below to allocate
100          * new blockmap blocks.
101          */
102         bytes = HAMMER_DATA_DOALIGN(bytes);
103         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104         KKASSERT(hammer_is_zone2_mapped_index(zone));
105
106         /*
107          * Setup
108          */
109         root_volume = trans->rootvol;
110         *errorp = 0;
111         blockmap = &hmp->blockmap[zone];
112         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114
115         /*
116          * Use the hint if we have one.
117          */
118         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119                 next_offset = HAMMER_DATA_DOALIGN_WITH(hammer_off_t, hint);
120                 use_hint = 1;
121         } else {
122                 next_offset = blockmap->next_offset;
123                 use_hint = 0;
124         }
125 again:
126
127         /*
128          * use_hint is turned off if we leave the hinted big-block.
129          */
130         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131                 next_offset = blockmap->next_offset;
132                 use_hint = 0;
133         }
134
135         /*
136          * Check for wrap
137          */
138         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139                 if (++loops == 2) {
140                         hmkprintf(hmp, "No space left for zone %d "
141                                 "allocation\n", zone);
142                         result_offset = 0;
143                         *errorp = ENOSPC;
144                         goto failed;
145                 }
146                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
147         }
148
149         /*
150          * The allocation request may not cross a buffer boundary.  Special
151          * large allocations must not cross a big-block boundary.
152          */
153         tmp_offset = next_offset + bytes - 1;
154         if (bytes <= HAMMER_BUFSIZE) {
155                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
156                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
157                         goto again;
158                 }
159         } else {
160                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
161                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
162                         goto again;
163                 }
164         }
165         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
166
167         /*
168          * Dive layer 1.
169          */
170         layer1_offset = freemap->phys_offset +
171                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
172
173         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
174         if (*errorp) {
175                 result_offset = 0;
176                 goto failed;
177         }
178
179         /*
180          * Check CRC.
181          */
182         if (!hammer_crc_test_layer1(layer1)) {
183                 hammer_lock_ex(&hmp->blkmap_lock);
184                 if (!hammer_crc_test_layer1(layer1))
185                         hpanic("CRC FAILED: LAYER1");
186                 hammer_unlock(&hmp->blkmap_lock);
187         }
188
189         /*
190          * If we are at a big-block boundary and layer1 indicates no
191          * free big-blocks, then we cannot allocate a new big-block in
192          * layer2, skip to the next layer1 entry.
193          */
194         if (offset == 0 && layer1->blocks_free == 0) {
195                 next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
196                 if (hammer_check_volume(hmp, &next_offset)) {
197                         result_offset = 0;
198                         goto failed;
199                 }
200                 goto again;
201         }
202         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
203
204         /*
205          * Skip the whole volume if it is pointing to a layer2 big-block
206          * on a volume that we are currently trying to remove from the
207          * file-system. This is used by the volume-del code together with
208          * the reblocker to free up a volume.
209          */
210         if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
211                 hammer_skip_volume(&next_offset);
212                 goto again;
213         }
214
215         /*
216          * Dive layer 2, each entry represents a big-block.
217          */
218         layer2_offset = layer1->phys_offset +
219                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
220         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
221         if (*errorp) {
222                 result_offset = 0;
223                 goto failed;
224         }
225
226         /*
227          * Check CRC.  This can race another thread holding the lock
228          * and in the middle of modifying layer2.
229          */
230         if (!hammer_crc_test_layer2(layer2)) {
231                 hammer_lock_ex(&hmp->blkmap_lock);
232                 if (!hammer_crc_test_layer2(layer2))
233                         hpanic("CRC FAILED: LAYER2");
234                 hammer_unlock(&hmp->blkmap_lock);
235         }
236
237         /*
238          * Skip the layer if the zone is owned by someone other then us.
239          */
240         if (layer2->zone && layer2->zone != zone) {
241                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
242                 goto again;
243         }
244         if (offset < layer2->append_off) {
245                 next_offset += layer2->append_off - offset;
246                 goto again;
247         }
248
249 #if 0
250         /*
251          * If operating in the current non-hint blockmap block, do not
252          * allow it to get over-full.  Also drop any active hinting so
253          * blockmap->next_offset is updated at the end.
254          *
255          * We do this for B-Tree and meta-data allocations to provide
256          * localization for updates.
257          */
258         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
259              zone == HAMMER_ZONE_META_INDEX) &&
260             offset >= HAMMER_BIGBLOCK_OVERFILL &&
261             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
262                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
263                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
264                         use_hint = 0;
265                         goto again;
266                 }
267         }
268 #endif
269
270         /*
271          * We need the lock from this point on.  We have to re-check zone
272          * ownership after acquiring the lock and also check for reservations.
273          */
274         hammer_lock_ex(&hmp->blkmap_lock);
275
276         if (layer2->zone && layer2->zone != zone) {
277                 hammer_unlock(&hmp->blkmap_lock);
278                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
279                 goto again;
280         }
281         if (offset < layer2->append_off) {
282                 hammer_unlock(&hmp->blkmap_lock);
283                 next_offset += layer2->append_off - offset;
284                 goto again;
285         }
286
287         /*
288          * The big-block might be reserved by another zone.  If it is reserved
289          * by our zone we may have to move next_offset past the append_off.
290          */
291         base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
292         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293         if (resv) {
294                 if (resv->zone != zone) {
295                         hammer_unlock(&hmp->blkmap_lock);
296                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
297                                       ~HAMMER_BIGBLOCK_MASK64;
298                         goto again;
299                 }
300                 if (offset < resv->append_off) {
301                         hammer_unlock(&hmp->blkmap_lock);
302                         next_offset += resv->append_off - offset;
303                         goto again;
304                 }
305                 ++resv->refs;
306         }
307
308         /*
309          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
310          * of the layer for real.  At this point we've validated any
311          * reservation that might exist and can just ignore resv.
312          */
313         if (layer2->zone == 0) {
314                 /*
315                  * Assign the big-block to our zone
316                  */
317                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
318                 --layer1->blocks_free;
319                 hammer_crc_set_layer1(layer1);
320                 hammer_modify_buffer_done(buffer1);
321                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
322                 layer2->zone = zone;
323                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
324                 KKASSERT(layer2->append_off == 0);
325                 hammer_modify_volume_field(trans, trans->rootvol,
326                                            vol0_stat_freebigblocks);
327                 --root_volume->ondisk->vol0_stat_freebigblocks;
328                 hmp->copy_stat_freebigblocks =
329                         root_volume->ondisk->vol0_stat_freebigblocks;
330                 hammer_modify_volume_done(trans->rootvol);
331         } else {
332                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
333         }
334         KKASSERT(layer2->zone == zone);
335
336         /*
337          * NOTE: bytes_free can legally go negative due to de-dup.
338          */
339         layer2->bytes_free -= bytes;
340         KKASSERT(layer2->append_off <= offset);
341         layer2->append_off = offset + bytes;
342         hammer_crc_set_layer2(layer2);
343         hammer_modify_buffer_done(buffer2);
344
345         /*
346          * We hold the blockmap lock and should be the only ones
347          * capable of modifying resv->append_off.  Track the allocation
348          * as appropriate.
349          */
350         KKASSERT(bytes != 0);
351         if (resv) {
352                 KKASSERT(resv->append_off <= offset);
353                 resv->append_off = offset + bytes;
354                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
355                 hammer_blockmap_reserve_complete(hmp, resv);
356         }
357
358         /*
359          * If we are allocating from the base of a new buffer we can avoid
360          * a disk read by calling hammer_bnew_ext().
361          */
362         if ((next_offset & HAMMER_BUFMASK) == 0) {
363                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
364                                 errorp, &buffer3);
365                 if (*errorp) {
366                         result_offset = 0;
367                         goto failed;
368                 }
369         }
370         result_offset = next_offset;
371
372         /*
373          * If we weren't supplied with a hint or could not use the hint
374          * then we wound up using blockmap->next_offset as the hint and
375          * need to save it.
376          */
377         if (use_hint == 0) {
378                 hammer_modify_volume_noundo(NULL, root_volume);
379                 blockmap->next_offset = next_offset + bytes;
380                 hammer_modify_volume_done(root_volume);
381         }
382         hammer_unlock(&hmp->blkmap_lock);
383 failed:
384
385         /*
386          * Cleanup
387          */
388         if (buffer1)
389                 hammer_rel_buffer(buffer1, 0);
390         if (buffer2)
391                 hammer_rel_buffer(buffer2, 0);
392         if (buffer3)
393                 hammer_rel_buffer(buffer3, 0);
394
395         return(result_offset);
396 }
397
398 /*
399  * Frontend function - Reserve bytes in a zone.
400  *
401  * This code reserves bytes out of a blockmap without committing to any
402  * meta-data modifications, allowing the front-end to directly issue disk
403  * write I/O for big-blocks of data
404  *
405  * The backend later finalizes the reservation with hammer_blockmap_finalize()
406  * upon committing the related record.
407  */
408 hammer_reserve_t
409 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
410                         hammer_off_t *zone_offp, int *errorp)
411 {
412         hammer_volume_t root_volume;
413         hammer_blockmap_t blockmap;
414         hammer_blockmap_t freemap;
415         hammer_blockmap_layer1_t layer1;
416         hammer_blockmap_layer2_t layer2;
417         hammer_buffer_t buffer1 = NULL;
418         hammer_buffer_t buffer2 = NULL;
419         hammer_buffer_t buffer3 = NULL;
420         hammer_off_t tmp_offset;
421         hammer_off_t next_offset;
422         hammer_off_t layer1_offset;
423         hammer_off_t layer2_offset;
424         hammer_off_t base_off;
425         hammer_reserve_t resv;
426         hammer_reserve_t resx = NULL;
427         int loops = 0;
428         int offset;
429
430         /*
431          * Setup
432          */
433         KKASSERT(hammer_is_zone2_mapped_index(zone));
434         root_volume = hammer_get_root_volume(hmp, errorp);
435         if (*errorp)
436                 return(NULL);
437         blockmap = &hmp->blockmap[zone];
438         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
439         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
440
441         /*
442          * Deal with alignment and buffer-boundary issues.
443          *
444          * Be careful, certain primary alignments are used below to allocate
445          * new blockmap blocks.
446          */
447         bytes = HAMMER_DATA_DOALIGN(bytes);
448         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
449
450         next_offset = blockmap->next_offset;
451 again:
452         resv = NULL;
453         /*
454          * Check for wrap
455          */
456         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
457                 if (++loops == 2) {
458                         hmkprintf(hmp, "No space left for zone %d "
459                                 "reservation\n", zone);
460                         *errorp = ENOSPC;
461                         goto failed;
462                 }
463                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
464         }
465
466         /*
467          * The allocation request may not cross a buffer boundary.  Special
468          * large allocations must not cross a big-block boundary.
469          */
470         tmp_offset = next_offset + bytes - 1;
471         if (bytes <= HAMMER_BUFSIZE) {
472                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
473                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
474                         goto again;
475                 }
476         } else {
477                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
478                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
479                         goto again;
480                 }
481         }
482         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
483
484         /*
485          * Dive layer 1.
486          */
487         layer1_offset = freemap->phys_offset +
488                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
489         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
490         if (*errorp)
491                 goto failed;
492
493         /*
494          * Check CRC.
495          */
496         if (!hammer_crc_test_layer1(layer1)) {
497                 hammer_lock_ex(&hmp->blkmap_lock);
498                 if (!hammer_crc_test_layer1(layer1))
499                         hpanic("CRC FAILED: LAYER1");
500                 hammer_unlock(&hmp->blkmap_lock);
501         }
502
503         /*
504          * If we are at a big-block boundary and layer1 indicates no
505          * free big-blocks, then we cannot allocate a new big-block in
506          * layer2, skip to the next layer1 entry.
507          */
508         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
509             layer1->blocks_free == 0) {
510                 next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
511                 if (hammer_check_volume(hmp, &next_offset))
512                         goto failed;
513                 goto again;
514         }
515         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
516
517         /*
518          * Dive layer 2, each entry represents a big-block.
519          */
520         layer2_offset = layer1->phys_offset +
521                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
522         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
523         if (*errorp)
524                 goto failed;
525
526         /*
527          * Check CRC if not allocating into uninitialized space (which we
528          * aren't when reserving space).
529          */
530         if (!hammer_crc_test_layer2(layer2)) {
531                 hammer_lock_ex(&hmp->blkmap_lock);
532                 if (!hammer_crc_test_layer2(layer2))
533                         hpanic("CRC FAILED: LAYER2");
534                 hammer_unlock(&hmp->blkmap_lock);
535         }
536
537         /*
538          * Skip the layer if the zone is owned by someone other then us.
539          */
540         if (layer2->zone && layer2->zone != zone) {
541                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
542                 goto again;
543         }
544         if (offset < layer2->append_off) {
545                 next_offset += layer2->append_off - offset;
546                 goto again;
547         }
548
549         /*
550          * We need the lock from this point on.  We have to re-check zone
551          * ownership after acquiring the lock and also check for reservations.
552          */
553         hammer_lock_ex(&hmp->blkmap_lock);
554
555         if (layer2->zone && layer2->zone != zone) {
556                 hammer_unlock(&hmp->blkmap_lock);
557                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
558                 goto again;
559         }
560         if (offset < layer2->append_off) {
561                 hammer_unlock(&hmp->blkmap_lock);
562                 next_offset += layer2->append_off - offset;
563                 goto again;
564         }
565
566         /*
567          * The big-block might be reserved by another zone.  If it is reserved
568          * by our zone we may have to move next_offset past the append_off.
569          */
570         base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
571         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
572         if (resv) {
573                 if (resv->zone != zone) {
574                         hammer_unlock(&hmp->blkmap_lock);
575                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
576                                       ~HAMMER_BIGBLOCK_MASK64;
577                         goto again;
578                 }
579                 if (offset < resv->append_off) {
580                         hammer_unlock(&hmp->blkmap_lock);
581                         next_offset += resv->append_off - offset;
582                         goto again;
583                 }
584                 ++resv->refs;
585         } else {
586                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
587                                M_WAITOK | M_ZERO | M_USE_RESERVE);
588                 resx->refs = 1;
589                 resx->zone = zone;
590                 resx->zone_offset = base_off;
591                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
592                         resx->flags |= HAMMER_RESF_LAYER2FREE;
593                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
594                 KKASSERT(resv == NULL);
595                 resv = resx;
596                 ++hammer_count_reservations;
597         }
598         resv->append_off = offset + bytes;
599
600         /*
601          * If we are not reserving a whole buffer but are at the start of
602          * a new block, call hammer_bnew() to avoid a disk read.
603          *
604          * If we are reserving a whole buffer (or more), the caller will
605          * probably use a direct read, so do nothing.
606          *
607          * If we do not have a whole lot of system memory we really can't
608          * afford to block while holding the blkmap_lock!
609          */
610         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
611                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
612                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
613                         if (*errorp)
614                                 goto failed;
615                 }
616         }
617
618         /*
619          * Adjust our iterator and alloc_offset.  The layer1 and layer2
620          * space beyond alloc_offset is uninitialized.  alloc_offset must
621          * be big-block aligned.
622          */
623         blockmap->next_offset = next_offset + bytes;
624         hammer_unlock(&hmp->blkmap_lock);
625
626 failed:
627         if (buffer1)
628                 hammer_rel_buffer(buffer1, 0);
629         if (buffer2)
630                 hammer_rel_buffer(buffer2, 0);
631         if (buffer3)
632                 hammer_rel_buffer(buffer3, 0);
633         hammer_rel_volume(root_volume, 0);
634         *zone_offp = next_offset;
635
636         return(resv);
637 }
638
639 /*
640  * Frontend function - Dedup bytes in a zone.
641  *
642  * Dedup reservations work exactly the same as normal write reservations
643  * except we only adjust bytes_free field and don't touch append offset.
644  * Finalization mechanic for dedup reservations is also the same as for
645  * normal write ones - the backend finalizes the reservation with
646  * hammer_blockmap_finalize().
647  */
648 hammer_reserve_t
649 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
650                               hammer_off_t zone_offset, int *errorp)
651 {
652         hammer_volume_t root_volume;
653         hammer_blockmap_t freemap;
654         hammer_blockmap_layer1_t layer1;
655         hammer_blockmap_layer2_t layer2;
656         hammer_buffer_t buffer1 = NULL;
657         hammer_buffer_t buffer2 = NULL;
658         hammer_off_t layer1_offset;
659         hammer_off_t layer2_offset;
660         hammer_off_t base_off;
661         hammer_reserve_t resv = NULL;
662         hammer_reserve_t resx = NULL;
663
664         /*
665          * Setup
666          */
667         KKASSERT(hammer_is_zone2_mapped_index(zone));
668         root_volume = hammer_get_root_volume(hmp, errorp);
669         if (*errorp)
670                 return (NULL);
671         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
672         KKASSERT(freemap->phys_offset != 0);
673
674         bytes = HAMMER_DATA_DOALIGN(bytes);
675         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
676
677         /*
678          * Dive layer 1.
679          */
680         layer1_offset = freemap->phys_offset +
681                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
682         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
683         if (*errorp)
684                 goto failed;
685
686         /*
687          * Check CRC.
688          */
689         if (!hammer_crc_test_layer1(layer1)) {
690                 hammer_lock_ex(&hmp->blkmap_lock);
691                 if (!hammer_crc_test_layer1(layer1))
692                         hpanic("CRC FAILED: LAYER1");
693                 hammer_unlock(&hmp->blkmap_lock);
694         }
695         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
696
697         /*
698          * Dive layer 2, each entry represents a big-block.
699          */
700         layer2_offset = layer1->phys_offset +
701                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
702         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
703         if (*errorp)
704                 goto failed;
705
706         /*
707          * Check CRC.
708          */
709         if (!hammer_crc_test_layer2(layer2)) {
710                 hammer_lock_ex(&hmp->blkmap_lock);
711                 if (!hammer_crc_test_layer2(layer2))
712                         hpanic("CRC FAILED: LAYER2");
713                 hammer_unlock(&hmp->blkmap_lock);
714         }
715
716         /*
717          * Fail if the zone is owned by someone other than us.
718          */
719         if (layer2->zone && layer2->zone != zone)
720                 goto failed;
721
722         /*
723          * We need the lock from this point on.  We have to re-check zone
724          * ownership after acquiring the lock and also check for reservations.
725          */
726         hammer_lock_ex(&hmp->blkmap_lock);
727
728         if (layer2->zone && layer2->zone != zone) {
729                 hammer_unlock(&hmp->blkmap_lock);
730                 goto failed;
731         }
732
733         base_off = hammer_xlate_to_zone2(zone_offset & ~HAMMER_BIGBLOCK_MASK64);
734         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
735         if (resv) {
736                 if (resv->zone != zone) {
737                         hammer_unlock(&hmp->blkmap_lock);
738                         resv = NULL;
739                         goto failed;
740                 }
741                 /*
742                  * Due to possible big-block underflow we can't simply
743                  * subtract bytes from bytes_free.
744                  */
745                 if (update_bytes_free(resv, bytes) == 0) {
746                         hammer_unlock(&hmp->blkmap_lock);
747                         resv = NULL;
748                         goto failed;
749                 }
750                 ++resv->refs;
751         } else {
752                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
753                                M_WAITOK | M_ZERO | M_USE_RESERVE);
754                 resx->refs = 1;
755                 resx->zone = zone;
756                 resx->bytes_free = layer2->bytes_free;
757                 /*
758                  * Due to possible big-block underflow we can't simply
759                  * subtract bytes from bytes_free.
760                  */
761                 if (update_bytes_free(resx, bytes) == 0) {
762                         hammer_unlock(&hmp->blkmap_lock);
763                         kfree(resx, hmp->m_misc);
764                         goto failed;
765                 }
766                 resx->zone_offset = base_off;
767                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
768                 KKASSERT(resv == NULL);
769                 resv = resx;
770                 ++hammer_count_reservations;
771         }
772
773         hammer_unlock(&hmp->blkmap_lock);
774
775 failed:
776         if (buffer1)
777                 hammer_rel_buffer(buffer1, 0);
778         if (buffer2)
779                 hammer_rel_buffer(buffer2, 0);
780         hammer_rel_volume(root_volume, 0);
781
782         return(resv);
783 }
784
785 static int
786 update_bytes_free(hammer_reserve_t resv, int bytes)
787 {
788         int32_t temp;
789
790         /*
791          * Big-block underflow check
792          */
793         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
794         cpu_ccfence(); /* XXX do we really need it ? */
795         if (temp > resv->bytes_free) {
796                 hdkprintf("BIGBLOCK UNDERFLOW\n");
797                 return (0);
798         }
799
800         resv->bytes_free -= bytes;
801         return (1);
802 }
803
804 /*
805  * Dereference a reservation structure.  Upon the final release the
806  * underlying big-block is checked and if it is entirely free we delete
807  * any related HAMMER buffers to avoid potential conflicts with future
808  * reuse of the big-block.
809  */
810 void
811 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
812 {
813         hammer_off_t base_offset;
814         int error;
815
816         KKASSERT(resv->refs > 0);
817         KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
818
819         /*
820          * Setting append_off to the max prevents any new allocations
821          * from occuring while we are trying to dispose of the reservation,
822          * allowing us to safely delete any related HAMMER buffers.
823          *
824          * If we are unable to clean out all related HAMMER buffers we
825          * requeue the delay.
826          */
827         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
828                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
829                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
830                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
831                         hammer_dedup_cache_inval(hmp, base_offset);
832                 error = hammer_del_buffers(hmp, base_offset,
833                                            resv->zone_offset,
834                                            HAMMER_BIGBLOCK_SIZE,
835                                            1);
836                 if (hammer_debug_general & 0x20000) {
837                         hkprintf("delbgblk %016jx error %d\n",
838                                 (intmax_t)base_offset, error);
839                 }
840                 if (error)
841                         hammer_reserve_setdelay(hmp, resv);
842         }
843         if (--resv->refs == 0) {
844                 if (hammer_debug_general & 0x20000) {
845                         hkprintf("delresvr %016jx zone %02x\n",
846                                 (intmax_t)resv->zone_offset, resv->zone);
847                 }
848                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
849                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
850                 kfree(resv, hmp->m_misc);
851                 --hammer_count_reservations;
852         }
853 }
854
855 /*
856  * Prevent a potentially free big-block from being reused until after
857  * the related flushes have completely cycled, otherwise crash recovery
858  * could resurrect a data block that was already reused and overwritten.
859  *
860  * The caller might reset the underlying layer2 entry's append_off to 0, so
861  * our covering append_off must be set to max to prevent any reallocation
862  * until after the flush delays complete, not to mention proper invalidation
863  * of any underlying cached blocks.
864  */
865 static void
866 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
867                         int zone, hammer_blockmap_layer2_t layer2)
868 {
869         hammer_reserve_t resv;
870
871         /*
872          * Allocate the reservation if necessary.
873          *
874          * NOTE: need lock in future around resv lookup/allocation and
875          * the setdelay call, currently refs is not bumped until the call.
876          */
877 again:
878         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
879         if (resv == NULL) {
880                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
881                                M_WAITOK | M_ZERO | M_USE_RESERVE);
882                 resv->zone = zone;
883                 resv->zone_offset = base_offset;
884                 resv->refs = 0;
885                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
886
887                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
888                         resv->flags |= HAMMER_RESF_LAYER2FREE;
889                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
890                         kfree(resv, hmp->m_misc);
891                         goto again;
892                 }
893                 ++hammer_count_reservations;
894         } else {
895                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
896                         resv->flags |= HAMMER_RESF_LAYER2FREE;
897         }
898         hammer_reserve_setdelay(hmp, resv);
899 }
900
901 /*
902  * Enter the reservation on the on-delay list, or move it if it
903  * is already on the list.
904  */
905 static void
906 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
907 {
908         if (resv->flags & HAMMER_RESF_ONDELAY) {
909                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
910                 resv->flg_no = hmp->flusher.next + 1;
911                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912         } else {
913                 ++resv->refs;
914                 ++hmp->rsv_fromdelay;
915                 resv->flags |= HAMMER_RESF_ONDELAY;
916                 resv->flg_no = hmp->flusher.next + 1;
917                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
918         }
919 }
920
921 /*
922  * Reserve has reached its flush point, remove it from the delay list
923  * and finish it off.  hammer_blockmap_reserve_complete() inherits
924  * the ondelay reference.
925  */
926 void
927 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
928 {
929         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
930         resv->flags &= ~HAMMER_RESF_ONDELAY;
931         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
932         --hmp->rsv_fromdelay;
933         hammer_blockmap_reserve_complete(hmp, resv);
934 }
935
936 /*
937  * Backend function - free (offset, bytes) in a zone.
938  *
939  * XXX error return
940  */
941 void
942 hammer_blockmap_free(hammer_transaction_t trans,
943                      hammer_off_t zone_offset, int bytes)
944 {
945         hammer_mount_t hmp;
946         hammer_volume_t root_volume;
947         hammer_blockmap_t freemap;
948         hammer_blockmap_layer1_t layer1;
949         hammer_blockmap_layer2_t layer2;
950         hammer_buffer_t buffer1 = NULL;
951         hammer_buffer_t buffer2 = NULL;
952         hammer_off_t layer1_offset;
953         hammer_off_t layer2_offset;
954         hammer_off_t base_off;
955         int error;
956         int zone;
957
958         if (bytes == 0)
959                 return;
960         hmp = trans->hmp;
961
962         /*
963          * Alignment
964          */
965         bytes = HAMMER_DATA_DOALIGN(bytes);
966         KKASSERT(bytes <= HAMMER_XBUFSIZE);
967         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
968                   ~HAMMER_BIGBLOCK_MASK64) == 0);
969
970         /*
971          * Basic zone validation & locking
972          */
973         zone = HAMMER_ZONE_DECODE(zone_offset);
974         KKASSERT(hammer_is_zone2_mapped_index(zone));
975         root_volume = trans->rootvol;
976         error = 0;
977
978         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
979
980         /*
981          * Dive layer 1.
982          */
983         layer1_offset = freemap->phys_offset +
984                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
985         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
986         if (error)
987                 goto failed;
988         KKASSERT(layer1->phys_offset &&
989                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
990         if (!hammer_crc_test_layer1(layer1)) {
991                 hammer_lock_ex(&hmp->blkmap_lock);
992                 if (!hammer_crc_test_layer1(layer1))
993                         hpanic("CRC FAILED: LAYER1");
994                 hammer_unlock(&hmp->blkmap_lock);
995         }
996
997         /*
998          * Dive layer 2, each entry represents a big-block.
999          */
1000         layer2_offset = layer1->phys_offset +
1001                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1002         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1003         if (error)
1004                 goto failed;
1005         if (!hammer_crc_test_layer2(layer2)) {
1006                 hammer_lock_ex(&hmp->blkmap_lock);
1007                 if (!hammer_crc_test_layer2(layer2))
1008                         hpanic("CRC FAILED: LAYER2");
1009                 hammer_unlock(&hmp->blkmap_lock);
1010         }
1011
1012         hammer_lock_ex(&hmp->blkmap_lock);
1013
1014         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1015
1016         /*
1017          * Free space previously allocated via blockmap_alloc().
1018          *
1019          * NOTE: bytes_free can be and remain negative due to de-dup ops
1020          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1021          */
1022         KKASSERT(layer2->zone == zone);
1023         layer2->bytes_free += bytes;
1024         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1025
1026         /*
1027          * If a big-block becomes entirely free we must create a covering
1028          * reservation to prevent premature reuse.  Note, however, that
1029          * the big-block and/or reservation may still have an append_off
1030          * that allows further (non-reused) allocations.
1031          *
1032          * Once the reservation has been made we re-check layer2 and if
1033          * the big-block is still entirely free we reset the layer2 entry.
1034          * The reservation will prevent premature reuse.
1035          *
1036          * NOTE: hammer_buffer's are only invalidated when the reservation
1037          * is completed, if the layer2 entry is still completely free at
1038          * that time.  Any allocations from the reservation that may have
1039          * occured in the mean time, or active references on the reservation
1040          * from new pending allocations, will prevent the invalidation from
1041          * occuring.
1042          */
1043         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1044                 base_off = hammer_xlate_to_zone2(zone_offset &
1045                                                 ~HAMMER_BIGBLOCK_MASK64);
1046
1047                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1048                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1049                         layer2->zone = 0;
1050                         layer2->append_off = 0;
1051                         hammer_modify_buffer(trans, buffer1,
1052                                              layer1, sizeof(*layer1));
1053                         ++layer1->blocks_free;
1054                         hammer_crc_set_layer1(layer1);
1055                         hammer_modify_buffer_done(buffer1);
1056                         hammer_modify_volume_field(trans,
1057                                         trans->rootvol,
1058                                         vol0_stat_freebigblocks);
1059                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1060                         hmp->copy_stat_freebigblocks =
1061                            root_volume->ondisk->vol0_stat_freebigblocks;
1062                         hammer_modify_volume_done(trans->rootvol);
1063                 }
1064         }
1065         hammer_crc_set_layer2(layer2);
1066         hammer_modify_buffer_done(buffer2);
1067         hammer_unlock(&hmp->blkmap_lock);
1068
1069 failed:
1070         if (buffer1)
1071                 hammer_rel_buffer(buffer1, 0);
1072         if (buffer2)
1073                 hammer_rel_buffer(buffer2, 0);
1074 }
1075
1076 int
1077 hammer_blockmap_dedup(hammer_transaction_t trans,
1078                      hammer_off_t zone_offset, int bytes)
1079 {
1080         hammer_mount_t hmp;
1081         hammer_blockmap_t freemap;
1082         hammer_blockmap_layer1_t layer1;
1083         hammer_blockmap_layer2_t layer2;
1084         hammer_buffer_t buffer1 = NULL;
1085         hammer_buffer_t buffer2 = NULL;
1086         hammer_off_t layer1_offset;
1087         hammer_off_t layer2_offset;
1088         int32_t temp;
1089         int error;
1090         int zone __debugvar;
1091
1092         if (bytes == 0)
1093                 return (0);
1094         hmp = trans->hmp;
1095
1096         /*
1097          * Alignment
1098          */
1099         bytes = HAMMER_DATA_DOALIGN(bytes);
1100         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1101         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1102                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1103
1104         /*
1105          * Basic zone validation & locking
1106          */
1107         zone = HAMMER_ZONE_DECODE(zone_offset);
1108         KKASSERT(hammer_is_zone2_mapped_index(zone));
1109         error = 0;
1110
1111         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1112
1113         /*
1114          * Dive layer 1.
1115          */
1116         layer1_offset = freemap->phys_offset +
1117                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1118         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1119         if (error)
1120                 goto failed;
1121         KKASSERT(layer1->phys_offset &&
1122                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1123         if (!hammer_crc_test_layer1(layer1)) {
1124                 hammer_lock_ex(&hmp->blkmap_lock);
1125                 if (!hammer_crc_test_layer1(layer1))
1126                         hpanic("CRC FAILED: LAYER1");
1127                 hammer_unlock(&hmp->blkmap_lock);
1128         }
1129
1130         /*
1131          * Dive layer 2, each entry represents a big-block.
1132          */
1133         layer2_offset = layer1->phys_offset +
1134                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1135         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1136         if (error)
1137                 goto failed;
1138         if (!hammer_crc_test_layer2(layer2)) {
1139                 hammer_lock_ex(&hmp->blkmap_lock);
1140                 if (!hammer_crc_test_layer2(layer2))
1141                         hpanic("CRC FAILED: LAYER2");
1142                 hammer_unlock(&hmp->blkmap_lock);
1143         }
1144
1145         hammer_lock_ex(&hmp->blkmap_lock);
1146
1147         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1148
1149         /*
1150          * Free space previously allocated via blockmap_alloc().
1151          *
1152          * NOTE: bytes_free can be and remain negative due to de-dup ops
1153          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1154          */
1155         KKASSERT(layer2->zone == zone);
1156         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1157         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1158         if (temp > layer2->bytes_free) {
1159                 error = ERANGE;
1160                 goto underflow;
1161         }
1162         layer2->bytes_free -= bytes;
1163
1164         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1165
1166         hammer_crc_set_layer2(layer2);
1167 underflow:
1168         hammer_modify_buffer_done(buffer2);
1169         hammer_unlock(&hmp->blkmap_lock);
1170
1171 failed:
1172         if (buffer1)
1173                 hammer_rel_buffer(buffer1, 0);
1174         if (buffer2)
1175                 hammer_rel_buffer(buffer2, 0);
1176         return (error);
1177 }
1178
1179 /*
1180  * Backend function - finalize (offset, bytes) in a zone.
1181  *
1182  * Allocate space that was previously reserved by the frontend.
1183  */
1184 int
1185 hammer_blockmap_finalize(hammer_transaction_t trans,
1186                          hammer_reserve_t resv,
1187                          hammer_off_t zone_offset, int bytes)
1188 {
1189         hammer_mount_t hmp;
1190         hammer_volume_t root_volume;
1191         hammer_blockmap_t freemap;
1192         hammer_blockmap_layer1_t layer1;
1193         hammer_blockmap_layer2_t layer2;
1194         hammer_buffer_t buffer1 = NULL;
1195         hammer_buffer_t buffer2 = NULL;
1196         hammer_off_t layer1_offset;
1197         hammer_off_t layer2_offset;
1198         int error;
1199         int zone;
1200         int offset;
1201
1202         if (bytes == 0)
1203                 return(0);
1204         hmp = trans->hmp;
1205
1206         /*
1207          * Alignment
1208          */
1209         bytes = HAMMER_DATA_DOALIGN(bytes);
1210         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1211
1212         /*
1213          * Basic zone validation & locking
1214          */
1215         zone = HAMMER_ZONE_DECODE(zone_offset);
1216         KKASSERT(hammer_is_zone2_mapped_index(zone));
1217         root_volume = trans->rootvol;
1218         error = 0;
1219
1220         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1221
1222         /*
1223          * Dive layer 1.
1224          */
1225         layer1_offset = freemap->phys_offset +
1226                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1227         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1228         if (error)
1229                 goto failed;
1230         KKASSERT(layer1->phys_offset &&
1231                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1232         if (!hammer_crc_test_layer1(layer1)) {
1233                 hammer_lock_ex(&hmp->blkmap_lock);
1234                 if (!hammer_crc_test_layer1(layer1))
1235                         hpanic("CRC FAILED: LAYER1");
1236                 hammer_unlock(&hmp->blkmap_lock);
1237         }
1238
1239         /*
1240          * Dive layer 2, each entry represents a big-block.
1241          */
1242         layer2_offset = layer1->phys_offset +
1243                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1244         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1245         if (error)
1246                 goto failed;
1247         if (!hammer_crc_test_layer2(layer2)) {
1248                 hammer_lock_ex(&hmp->blkmap_lock);
1249                 if (!hammer_crc_test_layer2(layer2))
1250                         hpanic("CRC FAILED: LAYER2");
1251                 hammer_unlock(&hmp->blkmap_lock);
1252         }
1253
1254         hammer_lock_ex(&hmp->blkmap_lock);
1255
1256         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1257
1258         /*
1259          * Finalize some or all of the space covered by a current
1260          * reservation.  An allocation in the same layer may have
1261          * already assigned ownership.
1262          */
1263         if (layer2->zone == 0) {
1264                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1265                 --layer1->blocks_free;
1266                 hammer_crc_set_layer1(layer1);
1267                 hammer_modify_buffer_done(buffer1);
1268                 layer2->zone = zone;
1269                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1270                 KKASSERT(layer2->append_off == 0);
1271                 hammer_modify_volume_field(trans,
1272                                 trans->rootvol,
1273                                 vol0_stat_freebigblocks);
1274                 --root_volume->ondisk->vol0_stat_freebigblocks;
1275                 hmp->copy_stat_freebigblocks =
1276                    root_volume->ondisk->vol0_stat_freebigblocks;
1277                 hammer_modify_volume_done(trans->rootvol);
1278         }
1279         if (layer2->zone != zone)
1280                 hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1281         KKASSERT(layer2->zone == zone);
1282         KKASSERT(bytes != 0);
1283         layer2->bytes_free -= bytes;
1284
1285         if (resv) {
1286                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1287         }
1288
1289         /*
1290          * Finalizations can occur out of order, or combined with allocations.
1291          * append_off must be set to the highest allocated offset.
1292          */
1293         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1294         if (layer2->append_off < offset)
1295                 layer2->append_off = offset;
1296
1297         hammer_crc_set_layer2(layer2);
1298         hammer_modify_buffer_done(buffer2);
1299         hammer_unlock(&hmp->blkmap_lock);
1300
1301 failed:
1302         if (buffer1)
1303                 hammer_rel_buffer(buffer1, 0);
1304         if (buffer2)
1305                 hammer_rel_buffer(buffer2, 0);
1306         return(error);
1307 }
1308
1309 /*
1310  * Return the approximate number of free bytes in the big-block
1311  * containing the specified blockmap offset.
1312  *
1313  * WARNING: A negative number can be returned if data de-dup exists,
1314  *          and the result will also not represent he actual number
1315  *          of free bytes in this case.
1316  *
1317  *          This code is used only by the reblocker.
1318  */
1319 int
1320 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1321                         int *curp, int *errorp)
1322 {
1323         hammer_volume_t root_volume;
1324         hammer_blockmap_t blockmap;
1325         hammer_blockmap_t freemap;
1326         hammer_blockmap_layer1_t layer1;
1327         hammer_blockmap_layer2_t layer2;
1328         hammer_buffer_t buffer = NULL;
1329         hammer_off_t layer1_offset;
1330         hammer_off_t layer2_offset;
1331         int32_t bytes;
1332         int zone;
1333
1334         zone = HAMMER_ZONE_DECODE(zone_offset);
1335         KKASSERT(hammer_is_zone2_mapped_index(zone));
1336         root_volume = hammer_get_root_volume(hmp, errorp);
1337         if (*errorp) {
1338                 *curp = 0;
1339                 return(0);
1340         }
1341         blockmap = &hmp->blockmap[zone];
1342         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1343
1344         /*
1345          * Dive layer 1.
1346          */
1347         layer1_offset = freemap->phys_offset +
1348                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1349         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1350         if (*errorp) {
1351                 *curp = 0;
1352                 bytes = 0;
1353                 goto failed;
1354         }
1355         KKASSERT(layer1->phys_offset);
1356         if (!hammer_crc_test_layer1(layer1)) {
1357                 hammer_lock_ex(&hmp->blkmap_lock);
1358                 if (!hammer_crc_test_layer1(layer1))
1359                         hpanic("CRC FAILED: LAYER1");
1360                 hammer_unlock(&hmp->blkmap_lock);
1361         }
1362
1363         /*
1364          * Dive layer 2, each entry represents a big-block.
1365          *
1366          * (reuse buffer, layer1 pointer becomes invalid)
1367          */
1368         layer2_offset = layer1->phys_offset +
1369                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1370         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1371         if (*errorp) {
1372                 *curp = 0;
1373                 bytes = 0;
1374                 goto failed;
1375         }
1376         if (!hammer_crc_test_layer2(layer2)) {
1377                 hammer_lock_ex(&hmp->blkmap_lock);
1378                 if (!hammer_crc_test_layer2(layer2))
1379                         hpanic("CRC FAILED: LAYER2");
1380                 hammer_unlock(&hmp->blkmap_lock);
1381         }
1382         KKASSERT(layer2->zone == zone);
1383
1384         bytes = layer2->bytes_free;
1385
1386         /*
1387          * *curp becomes 1 only when no error and,
1388          * next_offset and zone_offset are in the same big-block.
1389          */
1390         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1391                 *curp = 0;  /* not same */
1392         else
1393                 *curp = 1;
1394 failed:
1395         if (buffer)
1396                 hammer_rel_buffer(buffer, 0);
1397         hammer_rel_volume(root_volume, 0);
1398         if (hammer_debug_general & 0x4000) {
1399                 hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1400         }
1401         return(bytes);
1402 }
1403
1404
1405 /*
1406  * Lookup a blockmap offset and verify blockmap layers.
1407  */
1408 hammer_off_t
1409 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1410                         int *errorp)
1411 {
1412         hammer_volume_t root_volume;
1413         hammer_blockmap_t freemap;
1414         hammer_blockmap_layer1_t layer1;
1415         hammer_blockmap_layer2_t layer2;
1416         hammer_buffer_t buffer = NULL;
1417         hammer_off_t layer1_offset;
1418         hammer_off_t layer2_offset;
1419         hammer_off_t result_offset;
1420         hammer_off_t base_off;
1421         hammer_reserve_t resv __debugvar;
1422         int zone;
1423
1424         /*
1425          * Calculate the zone-2 offset.
1426          */
1427         zone = HAMMER_ZONE_DECODE(zone_offset);
1428         result_offset = hammer_xlate_to_zone2(zone_offset);
1429
1430         /*
1431          * Validate the allocation zone
1432          */
1433         root_volume = hammer_get_root_volume(hmp, errorp);
1434         if (*errorp)
1435                 return(0);
1436         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1437         KKASSERT(freemap->phys_offset != 0);
1438
1439         /*
1440          * Dive layer 1.
1441          */
1442         layer1_offset = freemap->phys_offset +
1443                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1444         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1445         if (*errorp)
1446                 goto failed;
1447         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1448         if (!hammer_crc_test_layer1(layer1)) {
1449                 hammer_lock_ex(&hmp->blkmap_lock);
1450                 if (!hammer_crc_test_layer1(layer1))
1451                         hpanic("CRC FAILED: LAYER1");
1452                 hammer_unlock(&hmp->blkmap_lock);
1453         }
1454
1455         /*
1456          * Dive layer 2, each entry represents a big-block.
1457          */
1458         layer2_offset = layer1->phys_offset +
1459                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1460         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1461
1462         if (*errorp)
1463                 goto failed;
1464         if (layer2->zone == 0) {
1465                 base_off = hammer_xlate_to_zone2(zone_offset &
1466                                                 ~HAMMER_BIGBLOCK_MASK64);
1467                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1468                                  base_off);
1469                 KKASSERT(resv && resv->zone == zone);
1470
1471         } else if (layer2->zone != zone) {
1472                 hpanic("bad zone %d/%d", layer2->zone, zone);
1473         }
1474         if (!hammer_crc_test_layer2(layer2)) {
1475                 hammer_lock_ex(&hmp->blkmap_lock);
1476                 if (!hammer_crc_test_layer2(layer2))
1477                         hpanic("CRC FAILED: LAYER2");
1478                 hammer_unlock(&hmp->blkmap_lock);
1479         }
1480
1481 failed:
1482         if (buffer)
1483                 hammer_rel_buffer(buffer, 0);
1484         hammer_rel_volume(root_volume, 0);
1485         if (hammer_debug_general & 0x0800) {
1486                 hdkprintf("%016jx -> %016jx\n",
1487                         (intmax_t)zone_offset, (intmax_t)result_offset);
1488         }
1489         return(result_offset);
1490 }
1491
1492
1493 /*
1494  * Check space availability
1495  *
1496  * MPSAFE - does not require fs_token
1497  */
1498 int
1499 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1500 {
1501         const int in_size = sizeof(struct hammer_inode_data) +
1502                             sizeof(union hammer_btree_elm);
1503         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1504         int64_t usedbytes;
1505
1506         usedbytes = hmp->rsv_inodes * in_size +
1507                     hmp->rsv_recs * rec_size +
1508                     hmp->rsv_databytes +
1509                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1510                     ((int64_t)hammer_limit_dirtybufspace) +
1511                     (slop << HAMMER_BIGBLOCK_BITS);
1512
1513         if (resp)
1514                 *resp = usedbytes;
1515
1516         if (hmp->copy_stat_freebigblocks >=
1517             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1518                 return(0);
1519         }
1520
1521         return (ENOSPC);
1522 }
1523
1524 static int
1525 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1526 {
1527         hammer_blockmap_t freemap;
1528         hammer_blockmap_layer1_t layer1;
1529         hammer_buffer_t buffer1 = NULL;
1530         hammer_off_t layer1_offset;
1531         int error = 0;
1532
1533         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1534
1535         layer1_offset = freemap->phys_offset +
1536                         HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1537         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1538         if (error)
1539                 goto end;
1540
1541         /*
1542          * No more physically available space in layer1s
1543          * of the current volume, go to the next volume.
1544          */
1545         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1546                 hammer_skip_volume(offsetp);
1547 end:
1548         if (buffer1)
1549                 hammer_rel_buffer(buffer1, 0);
1550         return(error);
1551 }
1552
1553 static void
1554 hammer_skip_volume(hammer_off_t *offsetp)
1555 {
1556         hammer_off_t offset;
1557         int zone, vol_no;
1558
1559         offset = *offsetp;
1560         zone = HAMMER_ZONE_DECODE(offset);
1561         vol_no = HAMMER_VOL_DECODE(offset) + 1;
1562         KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1563
1564         if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1565                 vol_no = 0;
1566                 ++zone;
1567         }
1568
1569         *offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1570 }