1cab394db4803d2c116a60191e9dfa98a95ece78
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55              hammer_res_rb_compare, hammer_off_t, zone_offset);
56
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60         if (res1->zone_offset < res2->zone_offset)
61                 return(-1);
62         if (res1->zone_offset > res2->zone_offset)
63                 return(1);
64         return(0);
65 }
66
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72                       hammer_off_t hint, int *errorp)
73 {
74         hammer_mount_t hmp;
75         hammer_volume_t root_volume;
76         hammer_blockmap_t blockmap;
77         hammer_blockmap_t freemap;
78         hammer_reserve_t resv;
79         hammer_blockmap_layer1_t layer1;
80         hammer_blockmap_layer2_t layer2;
81         hammer_buffer_t buffer1 = NULL;
82         hammer_buffer_t buffer2 = NULL;
83         hammer_buffer_t buffer3 = NULL;
84         hammer_off_t tmp_offset;
85         hammer_off_t next_offset;
86         hammer_off_t result_offset;
87         hammer_off_t layer1_offset;
88         hammer_off_t layer2_offset;
89         hammer_off_t base_off;
90         int loops = 0;
91         int offset;             /* offset within big-block */
92         int use_hint;
93
94         hmp = trans->hmp;
95
96         /*
97          * Deal with alignment and buffer-boundary issues.
98          *
99          * Be careful, certain primary alignments are used below to allocate
100          * new blockmap blocks.
101          */
102         bytes = HAMMER_DATA_DOALIGN(bytes);
103         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104         KKASSERT(hammer_is_zone2_mapped_index(zone));
105
106         /*
107          * Setup
108          */
109         root_volume = trans->rootvol;
110         *errorp = 0;
111         blockmap = &hmp->blockmap[zone];
112         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114
115         /*
116          * Use the hint if we have one.
117          */
118         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119                 next_offset = HAMMER_DATA_DOALIGN_WITH(hammer_off_t, hint);
120                 use_hint = 1;
121         } else {
122                 next_offset = blockmap->next_offset;
123                 use_hint = 0;
124         }
125 again:
126
127         /*
128          * use_hint is turned off if we leave the hinted big-block.
129          */
130         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131                 next_offset = blockmap->next_offset;
132                 use_hint = 0;
133         }
134
135         /*
136          * Check for wrap
137          */
138         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139                 if (++loops == 2) {
140                         hmkprintf(hmp, "No space left for zone %d "
141                                 "allocation\n", zone);
142                         result_offset = 0;
143                         *errorp = ENOSPC;
144                         goto failed;
145                 }
146                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
147         }
148
149         /*
150          * The allocation request may not cross a buffer boundary.  Special
151          * large allocations must not cross a big-block boundary.
152          */
153         tmp_offset = next_offset + bytes - 1;
154         if (bytes <= HAMMER_BUFSIZE) {
155                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
156                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
157                         goto again;
158                 }
159         } else {
160                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
161                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
162                         goto again;
163                 }
164         }
165         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
166
167         /*
168          * Dive layer 1.
169          */
170         layer1_offset = freemap->phys_offset +
171                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
172
173         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
174         if (*errorp) {
175                 result_offset = 0;
176                 goto failed;
177         }
178
179         /*
180          * Check CRC.
181          */
182         if (!hammer_crc_test_layer1(layer1)) {
183                 hammer_lock_ex(&hmp->blkmap_lock);
184                 if (!hammer_crc_test_layer1(layer1))
185                         hpanic("CRC FAILED: LAYER1");
186                 hammer_unlock(&hmp->blkmap_lock);
187         }
188
189         /*
190          * If we are at a big-block boundary and layer1 indicates no
191          * free big-blocks, then we cannot allocate a new big-block in
192          * layer2, skip to the next layer1 entry.
193          */
194         if (offset == 0 && layer1->blocks_free == 0) {
195                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
196                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
197                 if (hammer_check_volume(hmp, &next_offset)) {
198                         result_offset = 0;
199                         goto failed;
200                 }
201                 goto again;
202         }
203         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
204
205         /*
206          * Skip the whole volume if it is pointing to a layer2 big-block
207          * on a volume that we are currently trying to remove from the
208          * file-system. This is used by the volume-del code together with
209          * the reblocker to free up a volume.
210          */
211         if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
212                 hammer_skip_volume(&next_offset);
213                 goto again;
214         }
215
216         /*
217          * Dive layer 2, each entry represents a big-block.
218          */
219         layer2_offset = layer1->phys_offset +
220                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
221         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
222         if (*errorp) {
223                 result_offset = 0;
224                 goto failed;
225         }
226
227         /*
228          * Check CRC.  This can race another thread holding the lock
229          * and in the middle of modifying layer2.
230          */
231         if (!hammer_crc_test_layer2(layer2)) {
232                 hammer_lock_ex(&hmp->blkmap_lock);
233                 if (!hammer_crc_test_layer2(layer2))
234                         hpanic("CRC FAILED: LAYER2");
235                 hammer_unlock(&hmp->blkmap_lock);
236         }
237
238         /*
239          * Skip the layer if the zone is owned by someone other then us.
240          */
241         if (layer2->zone && layer2->zone != zone) {
242                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
243                 goto again;
244         }
245         if (offset < layer2->append_off) {
246                 next_offset += layer2->append_off - offset;
247                 goto again;
248         }
249
250 #if 0
251         /*
252          * If operating in the current non-hint blockmap block, do not
253          * allow it to get over-full.  Also drop any active hinting so
254          * blockmap->next_offset is updated at the end.
255          *
256          * We do this for B-Tree and meta-data allocations to provide
257          * localization for updates.
258          */
259         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
260              zone == HAMMER_ZONE_META_INDEX) &&
261             offset >= HAMMER_BIGBLOCK_OVERFILL &&
262             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
263                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
264                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
265                         use_hint = 0;
266                         goto again;
267                 }
268         }
269 #endif
270
271         /*
272          * We need the lock from this point on.  We have to re-check zone
273          * ownership after acquiring the lock and also check for reservations.
274          */
275         hammer_lock_ex(&hmp->blkmap_lock);
276
277         if (layer2->zone && layer2->zone != zone) {
278                 hammer_unlock(&hmp->blkmap_lock);
279                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
280                 goto again;
281         }
282         if (offset < layer2->append_off) {
283                 hammer_unlock(&hmp->blkmap_lock);
284                 next_offset += layer2->append_off - offset;
285                 goto again;
286         }
287
288         /*
289          * The big-block might be reserved by another zone.  If it is reserved
290          * by our zone we may have to move next_offset past the append_off.
291          */
292         base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
293         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
294         if (resv) {
295                 if (resv->zone != zone) {
296                         hammer_unlock(&hmp->blkmap_lock);
297                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
298                                       ~HAMMER_BIGBLOCK_MASK64;
299                         goto again;
300                 }
301                 if (offset < resv->append_off) {
302                         hammer_unlock(&hmp->blkmap_lock);
303                         next_offset += resv->append_off - offset;
304                         goto again;
305                 }
306                 ++resv->refs;
307         }
308
309         /*
310          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
311          * of the layer for real.  At this point we've validated any
312          * reservation that might exist and can just ignore resv.
313          */
314         if (layer2->zone == 0) {
315                 /*
316                  * Assign the big-block to our zone
317                  */
318                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
319                 --layer1->blocks_free;
320                 hammer_crc_set_layer1(layer1);
321                 hammer_modify_buffer_done(buffer1);
322                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
323                 layer2->zone = zone;
324                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
325                 KKASSERT(layer2->append_off == 0);
326                 hammer_modify_volume_field(trans, trans->rootvol,
327                                            vol0_stat_freebigblocks);
328                 --root_volume->ondisk->vol0_stat_freebigblocks;
329                 hmp->copy_stat_freebigblocks =
330                         root_volume->ondisk->vol0_stat_freebigblocks;
331                 hammer_modify_volume_done(trans->rootvol);
332         } else {
333                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
334         }
335         KKASSERT(layer2->zone == zone);
336
337         /*
338          * NOTE: bytes_free can legally go negative due to de-dup.
339          */
340         layer2->bytes_free -= bytes;
341         KKASSERT(layer2->append_off <= offset);
342         layer2->append_off = offset + bytes;
343         hammer_crc_set_layer2(layer2);
344         hammer_modify_buffer_done(buffer2);
345
346         /*
347          * We hold the blockmap lock and should be the only ones
348          * capable of modifying resv->append_off.  Track the allocation
349          * as appropriate.
350          */
351         KKASSERT(bytes != 0);
352         if (resv) {
353                 KKASSERT(resv->append_off <= offset);
354                 resv->append_off = offset + bytes;
355                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
356                 hammer_blockmap_reserve_complete(hmp, resv);
357         }
358
359         /*
360          * If we are allocating from the base of a new buffer we can avoid
361          * a disk read by calling hammer_bnew_ext().
362          */
363         if ((next_offset & HAMMER_BUFMASK) == 0) {
364                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
365                                 errorp, &buffer3);
366                 if (*errorp) {
367                         result_offset = 0;
368                         goto failed;
369                 }
370         }
371         result_offset = next_offset;
372
373         /*
374          * If we weren't supplied with a hint or could not use the hint
375          * then we wound up using blockmap->next_offset as the hint and
376          * need to save it.
377          */
378         if (use_hint == 0) {
379                 hammer_modify_volume_noundo(NULL, root_volume);
380                 blockmap->next_offset = next_offset + bytes;
381                 hammer_modify_volume_done(root_volume);
382         }
383         hammer_unlock(&hmp->blkmap_lock);
384 failed:
385
386         /*
387          * Cleanup
388          */
389         if (buffer1)
390                 hammer_rel_buffer(buffer1, 0);
391         if (buffer2)
392                 hammer_rel_buffer(buffer2, 0);
393         if (buffer3)
394                 hammer_rel_buffer(buffer3, 0);
395
396         return(result_offset);
397 }
398
399 /*
400  * Frontend function - Reserve bytes in a zone.
401  *
402  * This code reserves bytes out of a blockmap without committing to any
403  * meta-data modifications, allowing the front-end to directly issue disk
404  * write I/O for big-blocks of data
405  *
406  * The backend later finalizes the reservation with hammer_blockmap_finalize()
407  * upon committing the related record.
408  */
409 hammer_reserve_t
410 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
411                         hammer_off_t *zone_offp, int *errorp)
412 {
413         hammer_volume_t root_volume;
414         hammer_blockmap_t blockmap;
415         hammer_blockmap_t freemap;
416         hammer_blockmap_layer1_t layer1;
417         hammer_blockmap_layer2_t layer2;
418         hammer_buffer_t buffer1 = NULL;
419         hammer_buffer_t buffer2 = NULL;
420         hammer_buffer_t buffer3 = NULL;
421         hammer_off_t tmp_offset;
422         hammer_off_t next_offset;
423         hammer_off_t layer1_offset;
424         hammer_off_t layer2_offset;
425         hammer_off_t base_off;
426         hammer_reserve_t resv;
427         hammer_reserve_t resx = NULL;
428         int loops = 0;
429         int offset;
430
431         /*
432          * Setup
433          */
434         KKASSERT(hammer_is_zone2_mapped_index(zone));
435         root_volume = hammer_get_root_volume(hmp, errorp);
436         if (*errorp)
437                 return(NULL);
438         blockmap = &hmp->blockmap[zone];
439         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
440         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
441
442         /*
443          * Deal with alignment and buffer-boundary issues.
444          *
445          * Be careful, certain primary alignments are used below to allocate
446          * new blockmap blocks.
447          */
448         bytes = HAMMER_DATA_DOALIGN(bytes);
449         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
450
451         next_offset = blockmap->next_offset;
452 again:
453         resv = NULL;
454         /*
455          * Check for wrap
456          */
457         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
458                 if (++loops == 2) {
459                         hmkprintf(hmp, "No space left for zone %d "
460                                 "reservation\n", zone);
461                         *errorp = ENOSPC;
462                         goto failed;
463                 }
464                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
465         }
466
467         /*
468          * The allocation request may not cross a buffer boundary.  Special
469          * large allocations must not cross a big-block boundary.
470          */
471         tmp_offset = next_offset + bytes - 1;
472         if (bytes <= HAMMER_BUFSIZE) {
473                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
474                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
475                         goto again;
476                 }
477         } else {
478                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
479                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
480                         goto again;
481                 }
482         }
483         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
484
485         /*
486          * Dive layer 1.
487          */
488         layer1_offset = freemap->phys_offset +
489                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
490         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
491         if (*errorp)
492                 goto failed;
493
494         /*
495          * Check CRC.
496          */
497         if (!hammer_crc_test_layer1(layer1)) {
498                 hammer_lock_ex(&hmp->blkmap_lock);
499                 if (!hammer_crc_test_layer1(layer1))
500                         hpanic("CRC FAILED: LAYER1");
501                 hammer_unlock(&hmp->blkmap_lock);
502         }
503
504         /*
505          * If we are at a big-block boundary and layer1 indicates no
506          * free big-blocks, then we cannot allocate a new big-block in
507          * layer2, skip to the next layer1 entry.
508          */
509         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
510             layer1->blocks_free == 0) {
511                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
512                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
513                 if (hammer_check_volume(hmp, &next_offset))
514                         goto failed;
515                 goto again;
516         }
517         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
518
519         /*
520          * Dive layer 2, each entry represents a big-block.
521          */
522         layer2_offset = layer1->phys_offset +
523                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
524         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
525         if (*errorp)
526                 goto failed;
527
528         /*
529          * Check CRC if not allocating into uninitialized space (which we
530          * aren't when reserving space).
531          */
532         if (!hammer_crc_test_layer2(layer2)) {
533                 hammer_lock_ex(&hmp->blkmap_lock);
534                 if (!hammer_crc_test_layer2(layer2))
535                         hpanic("CRC FAILED: LAYER2");
536                 hammer_unlock(&hmp->blkmap_lock);
537         }
538
539         /*
540          * Skip the layer if the zone is owned by someone other then us.
541          */
542         if (layer2->zone && layer2->zone != zone) {
543                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
544                 goto again;
545         }
546         if (offset < layer2->append_off) {
547                 next_offset += layer2->append_off - offset;
548                 goto again;
549         }
550
551         /*
552          * We need the lock from this point on.  We have to re-check zone
553          * ownership after acquiring the lock and also check for reservations.
554          */
555         hammer_lock_ex(&hmp->blkmap_lock);
556
557         if (layer2->zone && layer2->zone != zone) {
558                 hammer_unlock(&hmp->blkmap_lock);
559                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
560                 goto again;
561         }
562         if (offset < layer2->append_off) {
563                 hammer_unlock(&hmp->blkmap_lock);
564                 next_offset += layer2->append_off - offset;
565                 goto again;
566         }
567
568         /*
569          * The big-block might be reserved by another zone.  If it is reserved
570          * by our zone we may have to move next_offset past the append_off.
571          */
572         base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
573         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
574         if (resv) {
575                 if (resv->zone != zone) {
576                         hammer_unlock(&hmp->blkmap_lock);
577                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
578                                       ~HAMMER_BIGBLOCK_MASK64;
579                         goto again;
580                 }
581                 if (offset < resv->append_off) {
582                         hammer_unlock(&hmp->blkmap_lock);
583                         next_offset += resv->append_off - offset;
584                         goto again;
585                 }
586                 ++resv->refs;
587         } else {
588                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
589                                M_WAITOK | M_ZERO | M_USE_RESERVE);
590                 resx->refs = 1;
591                 resx->zone = zone;
592                 resx->zone_offset = base_off;
593                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
594                         resx->flags |= HAMMER_RESF_LAYER2FREE;
595                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
596                 KKASSERT(resv == NULL);
597                 resv = resx;
598                 ++hammer_count_reservations;
599         }
600         resv->append_off = offset + bytes;
601
602         /*
603          * If we are not reserving a whole buffer but are at the start of
604          * a new block, call hammer_bnew() to avoid a disk read.
605          *
606          * If we are reserving a whole buffer (or more), the caller will
607          * probably use a direct read, so do nothing.
608          *
609          * If we do not have a whole lot of system memory we really can't
610          * afford to block while holding the blkmap_lock!
611          */
612         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
613                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
614                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
615                         if (*errorp)
616                                 goto failed;
617                 }
618         }
619
620         /*
621          * Adjust our iterator and alloc_offset.  The layer1 and layer2
622          * space beyond alloc_offset is uninitialized.  alloc_offset must
623          * be big-block aligned.
624          */
625         blockmap->next_offset = next_offset + bytes;
626         hammer_unlock(&hmp->blkmap_lock);
627
628 failed:
629         if (buffer1)
630                 hammer_rel_buffer(buffer1, 0);
631         if (buffer2)
632                 hammer_rel_buffer(buffer2, 0);
633         if (buffer3)
634                 hammer_rel_buffer(buffer3, 0);
635         hammer_rel_volume(root_volume, 0);
636         *zone_offp = next_offset;
637
638         return(resv);
639 }
640
641 /*
642  * Frontend function - Dedup bytes in a zone.
643  *
644  * Dedup reservations work exactly the same as normal write reservations
645  * except we only adjust bytes_free field and don't touch append offset.
646  * Finalization mechanic for dedup reservations is also the same as for
647  * normal write ones - the backend finalizes the reservation with
648  * hammer_blockmap_finalize().
649  */
650 hammer_reserve_t
651 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
652                               hammer_off_t zone_offset, int *errorp)
653 {
654         hammer_volume_t root_volume;
655         hammer_blockmap_t freemap;
656         hammer_blockmap_layer1_t layer1;
657         hammer_blockmap_layer2_t layer2;
658         hammer_buffer_t buffer1 = NULL;
659         hammer_buffer_t buffer2 = NULL;
660         hammer_off_t layer1_offset;
661         hammer_off_t layer2_offset;
662         hammer_off_t base_off;
663         hammer_reserve_t resv = NULL;
664         hammer_reserve_t resx = NULL;
665
666         /*
667          * Setup
668          */
669         KKASSERT(hammer_is_zone2_mapped_index(zone));
670         root_volume = hammer_get_root_volume(hmp, errorp);
671         if (*errorp)
672                 return (NULL);
673         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
674         KKASSERT(freemap->phys_offset != 0);
675
676         bytes = HAMMER_DATA_DOALIGN(bytes);
677         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
678
679         /*
680          * Dive layer 1.
681          */
682         layer1_offset = freemap->phys_offset +
683                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
684         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
685         if (*errorp)
686                 goto failed;
687
688         /*
689          * Check CRC.
690          */
691         if (!hammer_crc_test_layer1(layer1)) {
692                 hammer_lock_ex(&hmp->blkmap_lock);
693                 if (!hammer_crc_test_layer1(layer1))
694                         hpanic("CRC FAILED: LAYER1");
695                 hammer_unlock(&hmp->blkmap_lock);
696         }
697         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
698
699         /*
700          * Dive layer 2, each entry represents a big-block.
701          */
702         layer2_offset = layer1->phys_offset +
703                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
704         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
705         if (*errorp)
706                 goto failed;
707
708         /*
709          * Check CRC.
710          */
711         if (!hammer_crc_test_layer2(layer2)) {
712                 hammer_lock_ex(&hmp->blkmap_lock);
713                 if (!hammer_crc_test_layer2(layer2))
714                         hpanic("CRC FAILED: LAYER2");
715                 hammer_unlock(&hmp->blkmap_lock);
716         }
717
718         /*
719          * Fail if the zone is owned by someone other than us.
720          */
721         if (layer2->zone && layer2->zone != zone)
722                 goto failed;
723
724         /*
725          * We need the lock from this point on.  We have to re-check zone
726          * ownership after acquiring the lock and also check for reservations.
727          */
728         hammer_lock_ex(&hmp->blkmap_lock);
729
730         if (layer2->zone && layer2->zone != zone) {
731                 hammer_unlock(&hmp->blkmap_lock);
732                 goto failed;
733         }
734
735         base_off = hammer_xlate_to_zone2(zone_offset & ~HAMMER_BIGBLOCK_MASK64);
736         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
737         if (resv) {
738                 if (resv->zone != zone) {
739                         hammer_unlock(&hmp->blkmap_lock);
740                         resv = NULL;
741                         goto failed;
742                 }
743                 /*
744                  * Due to possible big-block underflow we can't simply
745                  * subtract bytes from bytes_free.
746                  */
747                 if (update_bytes_free(resv, bytes) == 0) {
748                         hammer_unlock(&hmp->blkmap_lock);
749                         resv = NULL;
750                         goto failed;
751                 }
752                 ++resv->refs;
753         } else {
754                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
755                                M_WAITOK | M_ZERO | M_USE_RESERVE);
756                 resx->refs = 1;
757                 resx->zone = zone;
758                 resx->bytes_free = layer2->bytes_free;
759                 /*
760                  * Due to possible big-block underflow we can't simply
761                  * subtract bytes from bytes_free.
762                  */
763                 if (update_bytes_free(resx, bytes) == 0) {
764                         hammer_unlock(&hmp->blkmap_lock);
765                         kfree(resx, hmp->m_misc);
766                         goto failed;
767                 }
768                 resx->zone_offset = base_off;
769                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
770                 KKASSERT(resv == NULL);
771                 resv = resx;
772                 ++hammer_count_reservations;
773         }
774
775         hammer_unlock(&hmp->blkmap_lock);
776
777 failed:
778         if (buffer1)
779                 hammer_rel_buffer(buffer1, 0);
780         if (buffer2)
781                 hammer_rel_buffer(buffer2, 0);
782         hammer_rel_volume(root_volume, 0);
783
784         return(resv);
785 }
786
787 static int
788 update_bytes_free(hammer_reserve_t resv, int bytes)
789 {
790         int32_t temp;
791
792         /*
793          * Big-block underflow check
794          */
795         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
796         cpu_ccfence(); /* XXX do we really need it ? */
797         if (temp > resv->bytes_free) {
798                 hdkprintf("BIGBLOCK UNDERFLOW\n");
799                 return (0);
800         }
801
802         resv->bytes_free -= bytes;
803         return (1);
804 }
805
806 /*
807  * Dereference a reservation structure.  Upon the final release the
808  * underlying big-block is checked and if it is entirely free we delete
809  * any related HAMMER buffers to avoid potential conflicts with future
810  * reuse of the big-block.
811  */
812 void
813 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
814 {
815         hammer_off_t base_offset;
816         int error;
817
818         KKASSERT(resv->refs > 0);
819         KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
820
821         /*
822          * Setting append_off to the max prevents any new allocations
823          * from occuring while we are trying to dispose of the reservation,
824          * allowing us to safely delete any related HAMMER buffers.
825          *
826          * If we are unable to clean out all related HAMMER buffers we
827          * requeue the delay.
828          */
829         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
830                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
831                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
832                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
833                         hammer_dedup_cache_inval(hmp, base_offset);
834                 error = hammer_del_buffers(hmp, base_offset,
835                                            resv->zone_offset,
836                                            HAMMER_BIGBLOCK_SIZE,
837                                            1);
838                 if (hammer_debug_general & 0x20000) {
839                         hkprintf("delbgblk %016jx error %d\n",
840                                 (intmax_t)base_offset, error);
841                 }
842                 if (error)
843                         hammer_reserve_setdelay(hmp, resv);
844         }
845         if (--resv->refs == 0) {
846                 if (hammer_debug_general & 0x20000) {
847                         hkprintf("delresvr %016jx zone %02x\n",
848                                 (intmax_t)resv->zone_offset, resv->zone);
849                 }
850                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
851                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
852                 kfree(resv, hmp->m_misc);
853                 --hammer_count_reservations;
854         }
855 }
856
857 /*
858  * Prevent a potentially free big-block from being reused until after
859  * the related flushes have completely cycled, otherwise crash recovery
860  * could resurrect a data block that was already reused and overwritten.
861  *
862  * The caller might reset the underlying layer2 entry's append_off to 0, so
863  * our covering append_off must be set to max to prevent any reallocation
864  * until after the flush delays complete, not to mention proper invalidation
865  * of any underlying cached blocks.
866  */
867 static void
868 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
869                         int zone, hammer_blockmap_layer2_t layer2)
870 {
871         hammer_reserve_t resv;
872
873         /*
874          * Allocate the reservation if necessary.
875          *
876          * NOTE: need lock in future around resv lookup/allocation and
877          * the setdelay call, currently refs is not bumped until the call.
878          */
879 again:
880         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
881         if (resv == NULL) {
882                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
883                                M_WAITOK | M_ZERO | M_USE_RESERVE);
884                 resv->zone = zone;
885                 resv->zone_offset = base_offset;
886                 resv->refs = 0;
887                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
888
889                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
890                         resv->flags |= HAMMER_RESF_LAYER2FREE;
891                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
892                         kfree(resv, hmp->m_misc);
893                         goto again;
894                 }
895                 ++hammer_count_reservations;
896         } else {
897                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
898                         resv->flags |= HAMMER_RESF_LAYER2FREE;
899         }
900         hammer_reserve_setdelay(hmp, resv);
901 }
902
903 /*
904  * Enter the reservation on the on-delay list, or move it if it
905  * is already on the list.
906  */
907 static void
908 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
909 {
910         if (resv->flags & HAMMER_RESF_ONDELAY) {
911                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
912                 resv->flg_no = hmp->flusher.next + 1;
913                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
914         } else {
915                 ++resv->refs;
916                 ++hmp->rsv_fromdelay;
917                 resv->flags |= HAMMER_RESF_ONDELAY;
918                 resv->flg_no = hmp->flusher.next + 1;
919                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
920         }
921 }
922
923 /*
924  * Reserve has reached its flush point, remove it from the delay list
925  * and finish it off.  hammer_blockmap_reserve_complete() inherits
926  * the ondelay reference.
927  */
928 void
929 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
930 {
931         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
932         resv->flags &= ~HAMMER_RESF_ONDELAY;
933         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
934         --hmp->rsv_fromdelay;
935         hammer_blockmap_reserve_complete(hmp, resv);
936 }
937
938 /*
939  * Backend function - free (offset, bytes) in a zone.
940  *
941  * XXX error return
942  */
943 void
944 hammer_blockmap_free(hammer_transaction_t trans,
945                      hammer_off_t zone_offset, int bytes)
946 {
947         hammer_mount_t hmp;
948         hammer_volume_t root_volume;
949         hammer_blockmap_t freemap;
950         hammer_blockmap_layer1_t layer1;
951         hammer_blockmap_layer2_t layer2;
952         hammer_buffer_t buffer1 = NULL;
953         hammer_buffer_t buffer2 = NULL;
954         hammer_off_t layer1_offset;
955         hammer_off_t layer2_offset;
956         hammer_off_t base_off;
957         int error;
958         int zone;
959
960         if (bytes == 0)
961                 return;
962         hmp = trans->hmp;
963
964         /*
965          * Alignment
966          */
967         bytes = HAMMER_DATA_DOALIGN(bytes);
968         KKASSERT(bytes <= HAMMER_XBUFSIZE);
969         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
970                   ~HAMMER_BIGBLOCK_MASK64) == 0);
971
972         /*
973          * Basic zone validation & locking
974          */
975         zone = HAMMER_ZONE_DECODE(zone_offset);
976         KKASSERT(hammer_is_zone2_mapped_index(zone));
977         root_volume = trans->rootvol;
978         error = 0;
979
980         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
981
982         /*
983          * Dive layer 1.
984          */
985         layer1_offset = freemap->phys_offset +
986                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
987         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
988         if (error)
989                 goto failed;
990         KKASSERT(layer1->phys_offset &&
991                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
992         if (!hammer_crc_test_layer1(layer1)) {
993                 hammer_lock_ex(&hmp->blkmap_lock);
994                 if (!hammer_crc_test_layer1(layer1))
995                         hpanic("CRC FAILED: LAYER1");
996                 hammer_unlock(&hmp->blkmap_lock);
997         }
998
999         /*
1000          * Dive layer 2, each entry represents a big-block.
1001          */
1002         layer2_offset = layer1->phys_offset +
1003                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1004         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1005         if (error)
1006                 goto failed;
1007         if (!hammer_crc_test_layer2(layer2)) {
1008                 hammer_lock_ex(&hmp->blkmap_lock);
1009                 if (!hammer_crc_test_layer2(layer2))
1010                         hpanic("CRC FAILED: LAYER2");
1011                 hammer_unlock(&hmp->blkmap_lock);
1012         }
1013
1014         hammer_lock_ex(&hmp->blkmap_lock);
1015
1016         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1017
1018         /*
1019          * Free space previously allocated via blockmap_alloc().
1020          *
1021          * NOTE: bytes_free can be and remain negative due to de-dup ops
1022          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1023          */
1024         KKASSERT(layer2->zone == zone);
1025         layer2->bytes_free += bytes;
1026         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1027
1028         /*
1029          * If a big-block becomes entirely free we must create a covering
1030          * reservation to prevent premature reuse.  Note, however, that
1031          * the big-block and/or reservation may still have an append_off
1032          * that allows further (non-reused) allocations.
1033          *
1034          * Once the reservation has been made we re-check layer2 and if
1035          * the big-block is still entirely free we reset the layer2 entry.
1036          * The reservation will prevent premature reuse.
1037          *
1038          * NOTE: hammer_buffer's are only invalidated when the reservation
1039          * is completed, if the layer2 entry is still completely free at
1040          * that time.  Any allocations from the reservation that may have
1041          * occured in the mean time, or active references on the reservation
1042          * from new pending allocations, will prevent the invalidation from
1043          * occuring.
1044          */
1045         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1046                 base_off = hammer_xlate_to_zone2(zone_offset &
1047                                                 ~HAMMER_BIGBLOCK_MASK64);
1048
1049                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1050                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1051                         layer2->zone = 0;
1052                         layer2->append_off = 0;
1053                         hammer_modify_buffer(trans, buffer1,
1054                                              layer1, sizeof(*layer1));
1055                         ++layer1->blocks_free;
1056                         hammer_crc_set_layer1(layer1);
1057                         hammer_modify_buffer_done(buffer1);
1058                         hammer_modify_volume_field(trans,
1059                                         trans->rootvol,
1060                                         vol0_stat_freebigblocks);
1061                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1062                         hmp->copy_stat_freebigblocks =
1063                            root_volume->ondisk->vol0_stat_freebigblocks;
1064                         hammer_modify_volume_done(trans->rootvol);
1065                 }
1066         }
1067         hammer_crc_set_layer2(layer2);
1068         hammer_modify_buffer_done(buffer2);
1069         hammer_unlock(&hmp->blkmap_lock);
1070
1071 failed:
1072         if (buffer1)
1073                 hammer_rel_buffer(buffer1, 0);
1074         if (buffer2)
1075                 hammer_rel_buffer(buffer2, 0);
1076 }
1077
1078 int
1079 hammer_blockmap_dedup(hammer_transaction_t trans,
1080                      hammer_off_t zone_offset, int bytes)
1081 {
1082         hammer_mount_t hmp;
1083         hammer_blockmap_t freemap;
1084         hammer_blockmap_layer1_t layer1;
1085         hammer_blockmap_layer2_t layer2;
1086         hammer_buffer_t buffer1 = NULL;
1087         hammer_buffer_t buffer2 = NULL;
1088         hammer_off_t layer1_offset;
1089         hammer_off_t layer2_offset;
1090         int32_t temp;
1091         int error;
1092         int zone __debugvar;
1093
1094         if (bytes == 0)
1095                 return (0);
1096         hmp = trans->hmp;
1097
1098         /*
1099          * Alignment
1100          */
1101         bytes = HAMMER_DATA_DOALIGN(bytes);
1102         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1103         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1104                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1105
1106         /*
1107          * Basic zone validation & locking
1108          */
1109         zone = HAMMER_ZONE_DECODE(zone_offset);
1110         KKASSERT(hammer_is_zone2_mapped_index(zone));
1111         error = 0;
1112
1113         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1114
1115         /*
1116          * Dive layer 1.
1117          */
1118         layer1_offset = freemap->phys_offset +
1119                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1120         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1121         if (error)
1122                 goto failed;
1123         KKASSERT(layer1->phys_offset &&
1124                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1125         if (!hammer_crc_test_layer1(layer1)) {
1126                 hammer_lock_ex(&hmp->blkmap_lock);
1127                 if (!hammer_crc_test_layer1(layer1))
1128                         hpanic("CRC FAILED: LAYER1");
1129                 hammer_unlock(&hmp->blkmap_lock);
1130         }
1131
1132         /*
1133          * Dive layer 2, each entry represents a big-block.
1134          */
1135         layer2_offset = layer1->phys_offset +
1136                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1137         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1138         if (error)
1139                 goto failed;
1140         if (!hammer_crc_test_layer2(layer2)) {
1141                 hammer_lock_ex(&hmp->blkmap_lock);
1142                 if (!hammer_crc_test_layer2(layer2))
1143                         hpanic("CRC FAILED: LAYER2");
1144                 hammer_unlock(&hmp->blkmap_lock);
1145         }
1146
1147         hammer_lock_ex(&hmp->blkmap_lock);
1148
1149         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1150
1151         /*
1152          * Free space previously allocated via blockmap_alloc().
1153          *
1154          * NOTE: bytes_free can be and remain negative due to de-dup ops
1155          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1156          */
1157         KKASSERT(layer2->zone == zone);
1158         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1159         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1160         if (temp > layer2->bytes_free) {
1161                 error = ERANGE;
1162                 goto underflow;
1163         }
1164         layer2->bytes_free -= bytes;
1165
1166         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1167
1168         hammer_crc_set_layer2(layer2);
1169 underflow:
1170         hammer_modify_buffer_done(buffer2);
1171         hammer_unlock(&hmp->blkmap_lock);
1172
1173 failed:
1174         if (buffer1)
1175                 hammer_rel_buffer(buffer1, 0);
1176         if (buffer2)
1177                 hammer_rel_buffer(buffer2, 0);
1178         return (error);
1179 }
1180
1181 /*
1182  * Backend function - finalize (offset, bytes) in a zone.
1183  *
1184  * Allocate space that was previously reserved by the frontend.
1185  */
1186 int
1187 hammer_blockmap_finalize(hammer_transaction_t trans,
1188                          hammer_reserve_t resv,
1189                          hammer_off_t zone_offset, int bytes)
1190 {
1191         hammer_mount_t hmp;
1192         hammer_volume_t root_volume;
1193         hammer_blockmap_t freemap;
1194         hammer_blockmap_layer1_t layer1;
1195         hammer_blockmap_layer2_t layer2;
1196         hammer_buffer_t buffer1 = NULL;
1197         hammer_buffer_t buffer2 = NULL;
1198         hammer_off_t layer1_offset;
1199         hammer_off_t layer2_offset;
1200         int error;
1201         int zone;
1202         int offset;
1203
1204         if (bytes == 0)
1205                 return(0);
1206         hmp = trans->hmp;
1207
1208         /*
1209          * Alignment
1210          */
1211         bytes = HAMMER_DATA_DOALIGN(bytes);
1212         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1213
1214         /*
1215          * Basic zone validation & locking
1216          */
1217         zone = HAMMER_ZONE_DECODE(zone_offset);
1218         KKASSERT(hammer_is_zone2_mapped_index(zone));
1219         root_volume = trans->rootvol;
1220         error = 0;
1221
1222         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1223
1224         /*
1225          * Dive layer 1.
1226          */
1227         layer1_offset = freemap->phys_offset +
1228                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1229         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1230         if (error)
1231                 goto failed;
1232         KKASSERT(layer1->phys_offset &&
1233                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1234         if (!hammer_crc_test_layer1(layer1)) {
1235                 hammer_lock_ex(&hmp->blkmap_lock);
1236                 if (!hammer_crc_test_layer1(layer1))
1237                         hpanic("CRC FAILED: LAYER1");
1238                 hammer_unlock(&hmp->blkmap_lock);
1239         }
1240
1241         /*
1242          * Dive layer 2, each entry represents a big-block.
1243          */
1244         layer2_offset = layer1->phys_offset +
1245                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1246         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1247         if (error)
1248                 goto failed;
1249         if (!hammer_crc_test_layer2(layer2)) {
1250                 hammer_lock_ex(&hmp->blkmap_lock);
1251                 if (!hammer_crc_test_layer2(layer2))
1252                         hpanic("CRC FAILED: LAYER2");
1253                 hammer_unlock(&hmp->blkmap_lock);
1254         }
1255
1256         hammer_lock_ex(&hmp->blkmap_lock);
1257
1258         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1259
1260         /*
1261          * Finalize some or all of the space covered by a current
1262          * reservation.  An allocation in the same layer may have
1263          * already assigned ownership.
1264          */
1265         if (layer2->zone == 0) {
1266                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1267                 --layer1->blocks_free;
1268                 hammer_crc_set_layer1(layer1);
1269                 hammer_modify_buffer_done(buffer1);
1270                 layer2->zone = zone;
1271                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1272                 KKASSERT(layer2->append_off == 0);
1273                 hammer_modify_volume_field(trans,
1274                                 trans->rootvol,
1275                                 vol0_stat_freebigblocks);
1276                 --root_volume->ondisk->vol0_stat_freebigblocks;
1277                 hmp->copy_stat_freebigblocks =
1278                    root_volume->ondisk->vol0_stat_freebigblocks;
1279                 hammer_modify_volume_done(trans->rootvol);
1280         }
1281         if (layer2->zone != zone)
1282                 hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1283         KKASSERT(layer2->zone == zone);
1284         KKASSERT(bytes != 0);
1285         layer2->bytes_free -= bytes;
1286
1287         if (resv) {
1288                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1289         }
1290
1291         /*
1292          * Finalizations can occur out of order, or combined with allocations.
1293          * append_off must be set to the highest allocated offset.
1294          */
1295         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1296         if (layer2->append_off < offset)
1297                 layer2->append_off = offset;
1298
1299         hammer_crc_set_layer2(layer2);
1300         hammer_modify_buffer_done(buffer2);
1301         hammer_unlock(&hmp->blkmap_lock);
1302
1303 failed:
1304         if (buffer1)
1305                 hammer_rel_buffer(buffer1, 0);
1306         if (buffer2)
1307                 hammer_rel_buffer(buffer2, 0);
1308         return(error);
1309 }
1310
1311 /*
1312  * Return the approximate number of free bytes in the big-block
1313  * containing the specified blockmap offset.
1314  *
1315  * WARNING: A negative number can be returned if data de-dup exists,
1316  *          and the result will also not represent he actual number
1317  *          of free bytes in this case.
1318  *
1319  *          This code is used only by the reblocker.
1320  */
1321 int
1322 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1323                         int *curp, int *errorp)
1324 {
1325         hammer_volume_t root_volume;
1326         hammer_blockmap_t blockmap;
1327         hammer_blockmap_t freemap;
1328         hammer_blockmap_layer1_t layer1;
1329         hammer_blockmap_layer2_t layer2;
1330         hammer_buffer_t buffer = NULL;
1331         hammer_off_t layer1_offset;
1332         hammer_off_t layer2_offset;
1333         int32_t bytes;
1334         int zone;
1335
1336         zone = HAMMER_ZONE_DECODE(zone_offset);
1337         KKASSERT(hammer_is_zone2_mapped_index(zone));
1338         root_volume = hammer_get_root_volume(hmp, errorp);
1339         if (*errorp) {
1340                 *curp = 0;
1341                 return(0);
1342         }
1343         blockmap = &hmp->blockmap[zone];
1344         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1345
1346         /*
1347          * Dive layer 1.
1348          */
1349         layer1_offset = freemap->phys_offset +
1350                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1351         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1352         if (*errorp) {
1353                 *curp = 0;
1354                 bytes = 0;
1355                 goto failed;
1356         }
1357         KKASSERT(layer1->phys_offset);
1358         if (!hammer_crc_test_layer1(layer1)) {
1359                 hammer_lock_ex(&hmp->blkmap_lock);
1360                 if (!hammer_crc_test_layer1(layer1))
1361                         hpanic("CRC FAILED: LAYER1");
1362                 hammer_unlock(&hmp->blkmap_lock);
1363         }
1364
1365         /*
1366          * Dive layer 2, each entry represents a big-block.
1367          *
1368          * (reuse buffer, layer1 pointer becomes invalid)
1369          */
1370         layer2_offset = layer1->phys_offset +
1371                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1372         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1373         if (*errorp) {
1374                 *curp = 0;
1375                 bytes = 0;
1376                 goto failed;
1377         }
1378         if (!hammer_crc_test_layer2(layer2)) {
1379                 hammer_lock_ex(&hmp->blkmap_lock);
1380                 if (!hammer_crc_test_layer2(layer2))
1381                         hpanic("CRC FAILED: LAYER2");
1382                 hammer_unlock(&hmp->blkmap_lock);
1383         }
1384         KKASSERT(layer2->zone == zone);
1385
1386         bytes = layer2->bytes_free;
1387
1388         /*
1389          * *curp becomes 1 only when no error and,
1390          * next_offset and zone_offset are in the same big-block.
1391          */
1392         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1393                 *curp = 0;  /* not same */
1394         else
1395                 *curp = 1;
1396 failed:
1397         if (buffer)
1398                 hammer_rel_buffer(buffer, 0);
1399         hammer_rel_volume(root_volume, 0);
1400         if (hammer_debug_general & 0x4000) {
1401                 hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1402         }
1403         return(bytes);
1404 }
1405
1406
1407 /*
1408  * Lookup a blockmap offset and verify blockmap layers.
1409  */
1410 hammer_off_t
1411 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1412                         int *errorp)
1413 {
1414         hammer_volume_t root_volume;
1415         hammer_blockmap_t freemap;
1416         hammer_blockmap_layer1_t layer1;
1417         hammer_blockmap_layer2_t layer2;
1418         hammer_buffer_t buffer = NULL;
1419         hammer_off_t layer1_offset;
1420         hammer_off_t layer2_offset;
1421         hammer_off_t result_offset;
1422         hammer_off_t base_off;
1423         hammer_reserve_t resv __debugvar;
1424         int zone;
1425
1426         /*
1427          * Calculate the zone-2 offset.
1428          */
1429         zone = HAMMER_ZONE_DECODE(zone_offset);
1430         result_offset = hammer_xlate_to_zone2(zone_offset);
1431
1432         /*
1433          * Validate the allocation zone
1434          */
1435         root_volume = hammer_get_root_volume(hmp, errorp);
1436         if (*errorp)
1437                 return(0);
1438         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1439         KKASSERT(freemap->phys_offset != 0);
1440
1441         /*
1442          * Dive layer 1.
1443          */
1444         layer1_offset = freemap->phys_offset +
1445                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1446         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1447         if (*errorp)
1448                 goto failed;
1449         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1450         if (!hammer_crc_test_layer1(layer1)) {
1451                 hammer_lock_ex(&hmp->blkmap_lock);
1452                 if (!hammer_crc_test_layer1(layer1))
1453                         hpanic("CRC FAILED: LAYER1");
1454                 hammer_unlock(&hmp->blkmap_lock);
1455         }
1456
1457         /*
1458          * Dive layer 2, each entry represents a big-block.
1459          */
1460         layer2_offset = layer1->phys_offset +
1461                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1462         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1463
1464         if (*errorp)
1465                 goto failed;
1466         if (layer2->zone == 0) {
1467                 base_off = hammer_xlate_to_zone2(zone_offset &
1468                                                 ~HAMMER_BIGBLOCK_MASK64);
1469                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1470                                  base_off);
1471                 KKASSERT(resv && resv->zone == zone);
1472
1473         } else if (layer2->zone != zone) {
1474                 hpanic("bad zone %d/%d", layer2->zone, zone);
1475         }
1476         if (!hammer_crc_test_layer2(layer2)) {
1477                 hammer_lock_ex(&hmp->blkmap_lock);
1478                 if (!hammer_crc_test_layer2(layer2))
1479                         hpanic("CRC FAILED: LAYER2");
1480                 hammer_unlock(&hmp->blkmap_lock);
1481         }
1482
1483 failed:
1484         if (buffer)
1485                 hammer_rel_buffer(buffer, 0);
1486         hammer_rel_volume(root_volume, 0);
1487         if (hammer_debug_general & 0x0800) {
1488                 hdkprintf("%016jx -> %016jx\n",
1489                         (intmax_t)zone_offset, (intmax_t)result_offset);
1490         }
1491         return(result_offset);
1492 }
1493
1494
1495 /*
1496  * Check space availability
1497  *
1498  * MPSAFE - does not require fs_token
1499  */
1500 int
1501 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1502 {
1503         const int in_size = sizeof(struct hammer_inode_data) +
1504                             sizeof(union hammer_btree_elm);
1505         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1506         int64_t usedbytes;
1507
1508         usedbytes = hmp->rsv_inodes * in_size +
1509                     hmp->rsv_recs * rec_size +
1510                     hmp->rsv_databytes +
1511                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1512                     ((int64_t)hammer_limit_dirtybufspace) +
1513                     (slop << HAMMER_BIGBLOCK_BITS);
1514
1515         if (resp)
1516                 *resp = usedbytes;
1517
1518         if (hmp->copy_stat_freebigblocks >=
1519             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1520                 return(0);
1521         }
1522
1523         return (ENOSPC);
1524 }
1525
1526 static int
1527 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1528 {
1529         hammer_blockmap_t freemap;
1530         hammer_blockmap_layer1_t layer1;
1531         hammer_buffer_t buffer1 = NULL;
1532         hammer_off_t layer1_offset;
1533         int error = 0;
1534
1535         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1536
1537         layer1_offset = freemap->phys_offset +
1538                         HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1539         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1540         if (error)
1541                 goto end;
1542
1543         /*
1544          * No more physically available space in layer1s
1545          * of the current volume, go to the next volume.
1546          */
1547         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1548                 hammer_skip_volume(offsetp);
1549 end:
1550         if (buffer1)
1551                 hammer_rel_buffer(buffer1, 0);
1552         return(error);
1553 }
1554
1555 static void
1556 hammer_skip_volume(hammer_off_t *offsetp)
1557 {
1558         hammer_off_t offset;
1559         int zone, vol_no;
1560
1561         offset = *offsetp;
1562         zone = HAMMER_ZONE_DECODE(offset);
1563         vol_no = HAMMER_VOL_DECODE(offset) + 1;
1564         KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1565
1566         if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1567                 vol_no = 0;
1568                 ++zone;
1569         }
1570
1571         *offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1572 }