HAMMER - Add hammer dedup directive and support
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52              hammer_res_rb_compare, hammer_off_t, zone_offset);
53
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57         if (res1->zone_offset < res2->zone_offset)
58                 return(-1);
59         if (res1->zone_offset > res2->zone_offset)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69                       hammer_off_t hint, int *errorp)
70 {
71         hammer_mount_t hmp;
72         hammer_volume_t root_volume;
73         hammer_blockmap_t blockmap;
74         hammer_blockmap_t freemap;
75         hammer_reserve_t resv;
76         struct hammer_blockmap_layer1 *layer1;
77         struct hammer_blockmap_layer2 *layer2;
78         hammer_buffer_t buffer1 = NULL;
79         hammer_buffer_t buffer2 = NULL;
80         hammer_buffer_t buffer3 = NULL;
81         hammer_off_t tmp_offset;
82         hammer_off_t next_offset;
83         hammer_off_t result_offset;
84         hammer_off_t layer1_offset;
85         hammer_off_t layer2_offset;
86         hammer_off_t base_off;
87         int loops = 0;
88         int offset;             /* offset within big-block */
89         int use_hint;
90
91         hmp = trans->hmp;
92
93         /*
94          * Deal with alignment and buffer-boundary issues.
95          *
96          * Be careful, certain primary alignments are used below to allocate
97          * new blockmap blocks.
98          */
99         bytes = (bytes + 15) & ~15;
100         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
102
103         /*
104          * Setup
105          */
106         root_volume = trans->rootvol;
107         *errorp = 0;
108         blockmap = &hmp->blockmap[zone];
109         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
111
112         /*
113          * Use the hint if we have one.
114          */
115         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116                 next_offset = (hint + 15) & ~(hammer_off_t)15;
117                 use_hint = 1;
118         } else {
119                 next_offset = blockmap->next_offset;
120                 use_hint = 0;
121         }
122 again:
123
124         /*
125          * use_hint is turned off if we leave the hinted big-block.
126          */
127         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128                 next_offset = blockmap->next_offset;
129                 use_hint = 0;
130         }
131
132         /*
133          * Check for wrap
134          */
135         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
136                 if (++loops == 2) {
137                         result_offset = 0;
138                         *errorp = ENOSPC;
139                         goto failed;
140                 }
141                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
142         }
143
144         /*
145          * The allocation request may not cross a buffer boundary.  Special
146          * large allocations must not cross a large-block boundary.
147          */
148         tmp_offset = next_offset + bytes - 1;
149         if (bytes <= HAMMER_BUFSIZE) {
150                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
152                         goto again;
153                 }
154         } else {
155                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
156                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
157                         goto again;
158                 }
159         }
160         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
161
162         /*
163          * Dive layer 1.
164          */
165         layer1_offset = freemap->phys_offset +
166                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
167
168         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
169         if (*errorp) {
170                 result_offset = 0;
171                 goto failed;
172         }
173
174         /*
175          * Check CRC.
176          */
177         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178                 hammer_lock_ex(&hmp->blkmap_lock);
179                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180                         panic("CRC FAILED: LAYER1");
181                 hammer_unlock(&hmp->blkmap_lock);
182         }
183
184         /*
185          * If we are at a big-block boundary and layer1 indicates no 
186          * free big-blocks, then we cannot allocate a new bigblock in
187          * layer2, skip to the next layer1 entry.
188          */
189         if (offset == 0 && layer1->blocks_free == 0) {
190                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
192                 goto again;
193         }
194         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
195
196         /*
197          * Skip this layer1 entry if it is pointing to a layer2 big-block
198          * on a volume that we are currently trying to remove from the
199          * file-system. This is used by the volume-del code together with
200          * the reblocker to free up a volume.
201          */
202         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
203             hmp->volume_to_remove) {
204                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
205                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
206                 goto again;
207         }
208
209         /*
210          * Dive layer 2, each entry represents a large-block.
211          */
212         layer2_offset = layer1->phys_offset +
213                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
214         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
215         if (*errorp) {
216                 result_offset = 0;
217                 goto failed;
218         }
219
220         /*
221          * Check CRC.  This can race another thread holding the lock
222          * and in the middle of modifying layer2.
223          */
224         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
225                 hammer_lock_ex(&hmp->blkmap_lock);
226                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
227                         panic("CRC FAILED: LAYER2");
228                 hammer_unlock(&hmp->blkmap_lock);
229         }
230
231         /*
232          * Skip the layer if the zone is owned by someone other then us.
233          */
234         if (layer2->zone && layer2->zone != zone) {
235                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
236                 goto again;
237         }
238         if (offset < layer2->append_off) {
239                 next_offset += layer2->append_off - offset;
240                 goto again;
241         }
242
243         /*
244          * If operating in the current non-hint blockmap block, do not
245          * allow it to get over-full.  Also drop any active hinting so
246          * blockmap->next_offset is updated at the end.
247          *
248          * We do this for B-Tree and meta-data allocations to provide
249          * localization for updates.
250          */
251         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252              zone == HAMMER_ZONE_META_INDEX) &&
253             offset >= HAMMER_LARGEBLOCK_OVERFILL &&
254             !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
255         ) {
256                 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
257                         next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
258                         use_hint = 0;
259                         goto again;
260                 }
261         }
262
263         /*
264          * We need the lock from this point on.  We have to re-check zone
265          * ownership after acquiring the lock and also check for reservations.
266          */
267         hammer_lock_ex(&hmp->blkmap_lock);
268
269         if (layer2->zone && layer2->zone != zone) {
270                 hammer_unlock(&hmp->blkmap_lock);
271                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
272                 goto again;
273         }
274         if (offset < layer2->append_off) {
275                 hammer_unlock(&hmp->blkmap_lock);
276                 next_offset += layer2->append_off - offset;
277                 goto again;
278         }
279
280         /*
281          * The bigblock might be reserved by another zone.  If it is reserved
282          * by our zone we may have to move next_offset past the append_off.
283          */
284         base_off = (next_offset &
285                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
286                     HAMMER_ZONE_RAW_BUFFER;
287         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
288         if (resv) {
289                 if (resv->zone != zone) {
290                         hammer_unlock(&hmp->blkmap_lock);
291                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
292                                       ~HAMMER_LARGEBLOCK_MASK64;
293                         goto again;
294                 }
295                 if (offset < resv->append_off) {
296                         hammer_unlock(&hmp->blkmap_lock);
297                         next_offset += resv->append_off - offset;
298                         goto again;
299                 }
300                 ++resv->refs;
301         }
302
303         /*
304          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
305          * of the layer for real.  At this point we've validated any
306          * reservation that might exist and can just ignore resv.
307          */
308         if (layer2->zone == 0) {
309                 /*
310                  * Assign the bigblock to our zone
311                  */
312                 hammer_modify_buffer(trans, buffer1,
313                                      layer1, sizeof(*layer1));
314                 --layer1->blocks_free;
315                 layer1->layer1_crc = crc32(layer1,
316                                            HAMMER_LAYER1_CRCSIZE);
317                 hammer_modify_buffer_done(buffer1);
318                 hammer_modify_buffer(trans, buffer2,
319                                      layer2, sizeof(*layer2));
320                 layer2->zone = zone;
321                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
322                 KKASSERT(layer2->append_off == 0);
323                 hammer_modify_volume_field(trans, trans->rootvol,
324                                            vol0_stat_freebigblocks);
325                 --root_volume->ondisk->vol0_stat_freebigblocks;
326                 hmp->copy_stat_freebigblocks =
327                         root_volume->ondisk->vol0_stat_freebigblocks;
328                 hammer_modify_volume_done(trans->rootvol);
329         } else {
330                 hammer_modify_buffer(trans, buffer2,
331                                      layer2, sizeof(*layer2));
332         }
333         KKASSERT(layer2->zone == zone);
334
335         /*
336          * NOTE: bytes_free can legally go negative due to de-dup.
337          */
338         layer2->bytes_free -= bytes;
339         KKASSERT(layer2->append_off <= offset);
340         layer2->append_off = offset + bytes;
341         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342         hammer_modify_buffer_done(buffer2);
343
344         /*
345          * We hold the blockmap lock and should be the only ones
346          * capable of modifying resv->append_off.  Track the allocation
347          * as appropriate.
348          */
349         KKASSERT(bytes != 0);
350         if (resv) {
351                 KKASSERT(resv->append_off <= offset);
352                 resv->append_off = offset + bytes;
353                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354                 hammer_blockmap_reserve_complete(hmp, resv);
355         }
356
357         /*
358          * If we are allocating from the base of a new buffer we can avoid
359          * a disk read by calling hammer_bnew().
360          */
361         if ((next_offset & HAMMER_BUFMASK) == 0) {
362                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
363                                 errorp, &buffer3);
364         }
365         result_offset = next_offset;
366
367         /*
368          * If we weren't supplied with a hint or could not use the hint
369          * then we wound up using blockmap->next_offset as the hint and
370          * need to save it.
371          */
372         if (use_hint == 0) {
373                 hammer_modify_volume(NULL, root_volume, NULL, 0);
374                 blockmap->next_offset = next_offset + bytes;
375                 hammer_modify_volume_done(root_volume);
376         }
377         hammer_unlock(&hmp->blkmap_lock);
378 failed:
379
380         /*
381          * Cleanup
382          */
383         if (buffer1)
384                 hammer_rel_buffer(buffer1, 0);
385         if (buffer2)
386                 hammer_rel_buffer(buffer2, 0);
387         if (buffer3)
388                 hammer_rel_buffer(buffer3, 0);
389
390         return(result_offset);
391 }
392
393 /*
394  * Frontend function - Reserve bytes in a zone.
395  *
396  * This code reserves bytes out of a blockmap without committing to any
397  * meta-data modifications, allowing the front-end to directly issue disk
398  * write I/O for large blocks of data
399  *
400  * The backend later finalizes the reservation with hammer_blockmap_finalize()
401  * upon committing the related record.
402  */
403 hammer_reserve_t
404 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
405                         hammer_off_t *zone_offp, int *errorp)
406 {
407         hammer_volume_t root_volume;
408         hammer_blockmap_t blockmap;
409         hammer_blockmap_t freemap;
410         struct hammer_blockmap_layer1 *layer1;
411         struct hammer_blockmap_layer2 *layer2;
412         hammer_buffer_t buffer1 = NULL;
413         hammer_buffer_t buffer2 = NULL;
414         hammer_buffer_t buffer3 = NULL;
415         hammer_off_t tmp_offset;
416         hammer_off_t next_offset;
417         hammer_off_t layer1_offset;
418         hammer_off_t layer2_offset;
419         hammer_off_t base_off;
420         hammer_reserve_t resv;
421         hammer_reserve_t resx;
422         int loops = 0;
423         int offset;
424
425         /*
426          * Setup
427          */
428         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
429         root_volume = hammer_get_root_volume(hmp, errorp);
430         if (*errorp)
431                 return(NULL);
432         blockmap = &hmp->blockmap[zone];
433         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
434         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
435
436         /*
437          * Deal with alignment and buffer-boundary issues.
438          *
439          * Be careful, certain primary alignments are used below to allocate
440          * new blockmap blocks.
441          */
442         bytes = (bytes + 15) & ~15;
443         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
444
445         next_offset = blockmap->next_offset;
446 again:
447         resv = NULL;
448         /*
449          * Check for wrap
450          */
451         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
452                 if (++loops == 2) {
453                         *errorp = ENOSPC;
454                         goto failed;
455                 }
456                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
457         }
458
459         /*
460          * The allocation request may not cross a buffer boundary.  Special
461          * large allocations must not cross a large-block boundary.
462          */
463         tmp_offset = next_offset + bytes - 1;
464         if (bytes <= HAMMER_BUFSIZE) {
465                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
466                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
467                         goto again;
468                 }
469         } else {
470                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
471                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
472                         goto again;
473                 }
474         }
475         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
476
477         /*
478          * Dive layer 1.
479          */
480         layer1_offset = freemap->phys_offset +
481                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
482         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
483         if (*errorp)
484                 goto failed;
485
486         /*
487          * Check CRC.
488          */
489         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
490                 hammer_lock_ex(&hmp->blkmap_lock);
491                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
492                         panic("CRC FAILED: LAYER1");
493                 hammer_unlock(&hmp->blkmap_lock);
494         }
495
496         /*
497          * If we are at a big-block boundary and layer1 indicates no 
498          * free big-blocks, then we cannot allocate a new bigblock in
499          * layer2, skip to the next layer1 entry.
500          */
501         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
502             layer1->blocks_free == 0) {
503                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
504                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
505                 goto again;
506         }
507         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
508
509         /*
510          * Dive layer 2, each entry represents a large-block.
511          */
512         layer2_offset = layer1->phys_offset +
513                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
514         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
515         if (*errorp)
516                 goto failed;
517
518         /*
519          * Check CRC if not allocating into uninitialized space (which we
520          * aren't when reserving space).
521          */
522         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
523                 hammer_lock_ex(&hmp->blkmap_lock);
524                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
525                         panic("CRC FAILED: LAYER2");
526                 hammer_unlock(&hmp->blkmap_lock);
527         }
528
529         /*
530          * Skip the layer if the zone is owned by someone other then us.
531          */
532         if (layer2->zone && layer2->zone != zone) {
533                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
534                 goto again;
535         }
536         if (offset < layer2->append_off) {
537                 next_offset += layer2->append_off - offset;
538                 goto again;
539         }
540
541         /*
542          * We need the lock from this point on.  We have to re-check zone
543          * ownership after acquiring the lock and also check for reservations.
544          */
545         hammer_lock_ex(&hmp->blkmap_lock);
546
547         if (layer2->zone && layer2->zone != zone) {
548                 hammer_unlock(&hmp->blkmap_lock);
549                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
550                 goto again;
551         }
552         if (offset < layer2->append_off) {
553                 hammer_unlock(&hmp->blkmap_lock);
554                 next_offset += layer2->append_off - offset;
555                 goto again;
556         }
557
558         /*
559          * The bigblock might be reserved by another zone.  If it is reserved
560          * by our zone we may have to move next_offset past the append_off.
561          */
562         base_off = (next_offset &
563                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
564                     HAMMER_ZONE_RAW_BUFFER;
565         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
566         if (resv) {
567                 if (resv->zone != zone) {
568                         hammer_unlock(&hmp->blkmap_lock);
569                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
570                                       ~HAMMER_LARGEBLOCK_MASK64;
571                         goto again;
572                 }
573                 if (offset < resv->append_off) {
574                         hammer_unlock(&hmp->blkmap_lock);
575                         next_offset += resv->append_off - offset;
576                         goto again;
577                 }
578                 ++resv->refs;
579                 resx = NULL;
580         } else {
581                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
582                                M_WAITOK | M_ZERO | M_USE_RESERVE);
583                 resx->refs = 1;
584                 resx->zone = zone;
585                 resx->zone_offset = base_off;
586                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
587                         resx->flags |= HAMMER_RESF_LAYER2FREE;
588                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
589                 KKASSERT(resv == NULL);
590                 resv = resx;
591                 ++hammer_count_reservations;
592         }
593         resv->append_off = offset + bytes;
594
595         /*
596          * If we are not reserving a whole buffer but are at the start of
597          * a new block, call hammer_bnew() to avoid a disk read.
598          *
599          * If we are reserving a whole buffer (or more), the caller will
600          * probably use a direct read, so do nothing.
601          */
602         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
603                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
604         }
605
606         /*
607          * Adjust our iterator and alloc_offset.  The layer1 and layer2
608          * space beyond alloc_offset is uninitialized.  alloc_offset must
609          * be big-block aligned.
610          */
611         blockmap->next_offset = next_offset + bytes;
612         hammer_unlock(&hmp->blkmap_lock);
613
614 failed:
615         if (buffer1)
616                 hammer_rel_buffer(buffer1, 0);
617         if (buffer2)
618                 hammer_rel_buffer(buffer2, 0);
619         if (buffer3)
620                 hammer_rel_buffer(buffer3, 0);
621         hammer_rel_volume(root_volume, 0);
622         *zone_offp = next_offset;
623
624         return(resv);
625 }
626
627 /*
628  * Dereference a reservation structure.  Upon the final release the
629  * underlying big-block is checked and if it is entirely free we delete
630  * any related HAMMER buffers to avoid potential conflicts with future
631  * reuse of the big-block.
632  */
633 void
634 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
635 {
636         hammer_off_t base_offset;
637         int error;
638
639         KKASSERT(resv->refs > 0);
640         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
641                  HAMMER_ZONE_RAW_BUFFER);
642
643         /*
644          * Setting append_off to the max prevents any new allocations
645          * from occuring while we are trying to dispose of the reservation,
646          * allowing us to safely delete any related HAMMER buffers.
647          *
648          * If we are unable to clean out all related HAMMER buffers we
649          * requeue the delay.
650          */
651         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
652                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
653                 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
654                 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
655                 error = hammer_del_buffers(hmp, base_offset,
656                                            resv->zone_offset,
657                                            HAMMER_LARGEBLOCK_SIZE,
658                                            1);
659                 if (hammer_debug_general & 0x20000) {
660                         kprintf("hammer: dellgblk %016jx error %d\n",
661                                 (intmax_t)base_offset, error);
662                 }
663                 if (error)
664                         hammer_reserve_setdelay(hmp, resv);
665         }
666         if (--resv->refs == 0) {
667                 if (hammer_debug_general & 0x20000) {
668                         kprintf("hammer: delresvr %016jx zone %02x\n",
669                                 (intmax_t)resv->zone_offset, resv->zone);
670                 }
671                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
672                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
673                 kfree(resv, hmp->m_misc);
674                 --hammer_count_reservations;
675         }
676 }
677
678 /*
679  * Prevent a potentially free big-block from being reused until after
680  * the related flushes have completely cycled, otherwise crash recovery
681  * could resurrect a data block that was already reused and overwritten.
682  *
683  * The caller might reset the underlying layer2 entry's append_off to 0, so
684  * our covering append_off must be set to max to prevent any reallocation
685  * until after the flush delays complete, not to mention proper invalidation
686  * of any underlying cached blocks.
687  */
688 static void
689 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
690                         int zone, struct hammer_blockmap_layer2 *layer2)
691 {
692         hammer_reserve_t resv;
693
694         /*
695          * Allocate the reservation if necessary.
696          *
697          * NOTE: need lock in future around resv lookup/allocation and
698          * the setdelay call, currently refs is not bumped until the call.
699          */
700 again:
701         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
702         if (resv == NULL) {
703                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
704                                M_WAITOK | M_ZERO | M_USE_RESERVE);
705                 resv->zone = zone;
706                 resv->zone_offset = base_offset;
707                 resv->refs = 0;
708                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
709
710                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
711                         resv->flags |= HAMMER_RESF_LAYER2FREE;
712                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
713                         kfree(resv, hmp->m_misc);
714                         goto again;
715                 }
716                 ++hammer_count_reservations;
717         } else {
718                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
719                         resv->flags |= HAMMER_RESF_LAYER2FREE;
720         }
721         hammer_reserve_setdelay(hmp, resv);
722 }
723
724 /*
725  * Enter the reservation on the on-delay list, or move it if it
726  * is already on the list.
727  */
728 static void
729 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
730 {
731         if (resv->flags & HAMMER_RESF_ONDELAY) {
732                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
733                 resv->flush_group = hmp->flusher.next + 1;
734                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
735         } else {
736                 ++resv->refs;
737                 ++hmp->rsv_fromdelay;
738                 resv->flags |= HAMMER_RESF_ONDELAY;
739                 resv->flush_group = hmp->flusher.next + 1;
740                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
741         }
742 }
743
744 /*
745  * Reserve has reached its flush point, remove it from the delay list
746  * and finish it off.  hammer_blockmap_reserve_complete() inherits
747  * the ondelay reference.
748  */
749 void
750 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
751 {
752         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
753         resv->flags &= ~HAMMER_RESF_ONDELAY;
754         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
755         --hmp->rsv_fromdelay;
756         hammer_blockmap_reserve_complete(hmp, resv);
757 }
758
759 /*
760  * Backend function - free (offset, bytes) in a zone.
761  *
762  * XXX error return
763  */
764 void
765 hammer_blockmap_free(hammer_transaction_t trans,
766                      hammer_off_t zone_offset, int bytes)
767 {
768         hammer_mount_t hmp;
769         hammer_volume_t root_volume;
770         hammer_blockmap_t blockmap;
771         hammer_blockmap_t freemap;
772         struct hammer_blockmap_layer1 *layer1;
773         struct hammer_blockmap_layer2 *layer2;
774         hammer_buffer_t buffer1 = NULL;
775         hammer_buffer_t buffer2 = NULL;
776         hammer_off_t layer1_offset;
777         hammer_off_t layer2_offset;
778         hammer_off_t base_off;
779         int error;
780         int zone;
781
782         if (bytes == 0)
783                 return;
784         hmp = trans->hmp;
785
786         /*
787          * Alignment
788          */
789         bytes = (bytes + 15) & ~15;
790         KKASSERT(bytes <= HAMMER_XBUFSIZE);
791         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
792                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
793
794         /*
795          * Basic zone validation & locking
796          */
797         zone = HAMMER_ZONE_DECODE(zone_offset);
798         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
799         root_volume = trans->rootvol;
800         error = 0;
801
802         blockmap = &hmp->blockmap[zone];
803         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
804
805         /*
806          * Dive layer 1.
807          */
808         layer1_offset = freemap->phys_offset +
809                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
810         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
811         if (error)
812                 goto failed;
813         KKASSERT(layer1->phys_offset &&
814                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
815         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
816                 hammer_lock_ex(&hmp->blkmap_lock);
817                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
818                         panic("CRC FAILED: LAYER1");
819                 hammer_unlock(&hmp->blkmap_lock);
820         }
821
822         /*
823          * Dive layer 2, each entry represents a large-block.
824          */
825         layer2_offset = layer1->phys_offset +
826                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
827         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
828         if (error)
829                 goto failed;
830         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
831                 hammer_lock_ex(&hmp->blkmap_lock);
832                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
833                         panic("CRC FAILED: LAYER2");
834                 hammer_unlock(&hmp->blkmap_lock);
835         }
836
837         hammer_lock_ex(&hmp->blkmap_lock);
838
839         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
840
841         /*
842          * Free space previously allocated via blockmap_alloc().
843          *
844          * NOTE: bytes_free can be and remain negative due to de-dup ops
845          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
846          */
847         KKASSERT(layer2->zone == zone);
848         layer2->bytes_free += bytes;
849         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
850
851         /*
852          * If a big-block becomes entirely free we must create a covering
853          * reservation to prevent premature reuse.  Note, however, that
854          * the big-block and/or reservation may still have an append_off
855          * that allows further (non-reused) allocations.
856          *
857          * Once the reservation has been made we re-check layer2 and if
858          * the big-block is still entirely free we reset the layer2 entry.
859          * The reservation will prevent premature reuse.
860          *
861          * NOTE: hammer_buffer's are only invalidated when the reservation
862          * is completed, if the layer2 entry is still completely free at
863          * that time.  Any allocations from the reservation that may have
864          * occured in the mean time, or active references on the reservation
865          * from new pending allocations, will prevent the invalidation from
866          * occuring.
867          */
868         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
869                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
870
871                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
872                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
873                         layer2->zone = 0;
874                         layer2->append_off = 0;
875                         hammer_modify_buffer(trans, buffer1,
876                                              layer1, sizeof(*layer1));
877                         ++layer1->blocks_free;
878                         layer1->layer1_crc = crc32(layer1,
879                                                    HAMMER_LAYER1_CRCSIZE);
880                         hammer_modify_buffer_done(buffer1);
881                         hammer_modify_volume_field(trans,
882                                         trans->rootvol,
883                                         vol0_stat_freebigblocks);
884                         ++root_volume->ondisk->vol0_stat_freebigblocks;
885                         hmp->copy_stat_freebigblocks =
886                            root_volume->ondisk->vol0_stat_freebigblocks;
887                         hammer_modify_volume_done(trans->rootvol);
888                 }
889         }
890         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
891         hammer_modify_buffer_done(buffer2);
892         hammer_unlock(&hmp->blkmap_lock);
893
894 failed:
895         if (buffer1)
896                 hammer_rel_buffer(buffer1, 0);
897         if (buffer2)
898                 hammer_rel_buffer(buffer2, 0);
899 }
900
901 int
902 hammer_blockmap_dedup(hammer_transaction_t trans,
903                      hammer_off_t zone_offset, int bytes)
904 {
905         hammer_mount_t hmp;
906         hammer_volume_t root_volume;
907         hammer_blockmap_t blockmap;
908         hammer_blockmap_t freemap;
909         struct hammer_blockmap_layer1 *layer1;
910         struct hammer_blockmap_layer2 *layer2;
911         hammer_buffer_t buffer1 = NULL;
912         hammer_buffer_t buffer2 = NULL;
913         hammer_off_t layer1_offset;
914         hammer_off_t layer2_offset;
915         int32_t temp;
916         int error;
917         int zone;
918
919         if (bytes == 0)
920                 return (0);
921         hmp = trans->hmp;
922
923         /*
924          * Alignment
925          */
926         bytes = (bytes + 15) & ~15;
927         KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
928         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
929                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
930
931         /*
932          * Basic zone validation & locking
933          */
934         zone = HAMMER_ZONE_DECODE(zone_offset);
935         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
936         root_volume = trans->rootvol;
937         error = 0;
938
939         blockmap = &hmp->blockmap[zone];
940         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
941
942         /*
943          * Dive layer 1.
944          */
945         layer1_offset = freemap->phys_offset +
946                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
947         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
948         if (error)
949                 goto failed;
950         KKASSERT(layer1->phys_offset &&
951                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
952         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
953                 hammer_lock_ex(&hmp->blkmap_lock);
954                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
955                         panic("CRC FAILED: LAYER1");
956                 hammer_unlock(&hmp->blkmap_lock);
957         }
958
959         /*
960          * Dive layer 2, each entry represents a large-block.
961          */
962         layer2_offset = layer1->phys_offset +
963                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
964         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
965         if (error)
966                 goto failed;
967         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
968                 hammer_lock_ex(&hmp->blkmap_lock);
969                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
970                         panic("CRC FAILED: LAYER2");
971                 hammer_unlock(&hmp->blkmap_lock);
972         }
973
974         hammer_lock_ex(&hmp->blkmap_lock);
975
976         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
977
978         /*
979          * Free space previously allocated via blockmap_alloc().
980          *
981          * NOTE: bytes_free can be and remain negative due to de-dup ops
982          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
983          */
984         KKASSERT(layer2->zone == zone);
985         temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
986         cpu_ccfence(); /* prevent gcc from optimizing temp out */
987         if (temp > layer2->bytes_free) {
988                 error = ERANGE;
989                 goto underflow;
990         }
991         layer2->bytes_free -= bytes;
992
993         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
994
995         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
996 underflow:
997         hammer_modify_buffer_done(buffer2);
998         hammer_unlock(&hmp->blkmap_lock);
999
1000 failed:
1001         if (buffer1)
1002                 hammer_rel_buffer(buffer1, 0);
1003         if (buffer2)
1004                 hammer_rel_buffer(buffer2, 0);
1005         return (error);
1006 }
1007
1008 /*
1009  * Backend function - finalize (offset, bytes) in a zone.
1010  *
1011  * Allocate space that was previously reserved by the frontend.
1012  */
1013 int
1014 hammer_blockmap_finalize(hammer_transaction_t trans,
1015                          hammer_reserve_t resv,
1016                          hammer_off_t zone_offset, int bytes)
1017 {
1018         hammer_mount_t hmp;
1019         hammer_volume_t root_volume;
1020         hammer_blockmap_t blockmap;
1021         hammer_blockmap_t freemap;
1022         struct hammer_blockmap_layer1 *layer1;
1023         struct hammer_blockmap_layer2 *layer2;
1024         hammer_buffer_t buffer1 = NULL;
1025         hammer_buffer_t buffer2 = NULL;
1026         hammer_off_t layer1_offset;
1027         hammer_off_t layer2_offset;
1028         int error;
1029         int zone;
1030         int offset;
1031
1032         if (bytes == 0)
1033                 return(0);
1034         hmp = trans->hmp;
1035
1036         /*
1037          * Alignment
1038          */
1039         bytes = (bytes + 15) & ~15;
1040         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1041
1042         /*
1043          * Basic zone validation & locking
1044          */
1045         zone = HAMMER_ZONE_DECODE(zone_offset);
1046         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1047         root_volume = trans->rootvol;
1048         error = 0;
1049
1050         blockmap = &hmp->blockmap[zone];
1051         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1052
1053         /*
1054          * Dive layer 1.
1055          */
1056         layer1_offset = freemap->phys_offset +
1057                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1058         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1059         if (error)
1060                 goto failed;
1061         KKASSERT(layer1->phys_offset &&
1062                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1063         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1064                 hammer_lock_ex(&hmp->blkmap_lock);
1065                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1066                         panic("CRC FAILED: LAYER1");
1067                 hammer_unlock(&hmp->blkmap_lock);
1068         }
1069
1070         /*
1071          * Dive layer 2, each entry represents a large-block.
1072          */
1073         layer2_offset = layer1->phys_offset +
1074                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1075         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1076         if (error)
1077                 goto failed;
1078         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1079                 hammer_lock_ex(&hmp->blkmap_lock);
1080                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1081                         panic("CRC FAILED: LAYER2");
1082                 hammer_unlock(&hmp->blkmap_lock);
1083         }
1084
1085         hammer_lock_ex(&hmp->blkmap_lock);
1086
1087         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1088
1089         /*
1090          * Finalize some or all of the space covered by a current
1091          * reservation.  An allocation in the same layer may have
1092          * already assigned ownership.
1093          */
1094         if (layer2->zone == 0) {
1095                 hammer_modify_buffer(trans, buffer1,
1096                                      layer1, sizeof(*layer1));
1097                 --layer1->blocks_free;
1098                 layer1->layer1_crc = crc32(layer1,
1099                                            HAMMER_LAYER1_CRCSIZE);
1100                 hammer_modify_buffer_done(buffer1);
1101                 layer2->zone = zone;
1102                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1103                 KKASSERT(layer2->append_off == 0);
1104                 hammer_modify_volume_field(trans,
1105                                 trans->rootvol,
1106                                 vol0_stat_freebigblocks);
1107                 --root_volume->ondisk->vol0_stat_freebigblocks;
1108                 hmp->copy_stat_freebigblocks =
1109                    root_volume->ondisk->vol0_stat_freebigblocks;
1110                 hammer_modify_volume_done(trans->rootvol);
1111         }
1112         if (layer2->zone != zone)
1113                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1114         KKASSERT(layer2->zone == zone);
1115         KKASSERT(bytes != 0);
1116         layer2->bytes_free -= bytes;
1117         if (resv)
1118                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1119
1120         /*
1121          * Finalizations can occur out of order, or combined with allocations.
1122          * append_off must be set to the highest allocated offset.
1123          */
1124         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1125         if (layer2->append_off < offset)
1126                 layer2->append_off = offset;
1127
1128         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1129         hammer_modify_buffer_done(buffer2);
1130         hammer_unlock(&hmp->blkmap_lock);
1131
1132 failed:
1133         if (buffer1)
1134                 hammer_rel_buffer(buffer1, 0);
1135         if (buffer2)
1136                 hammer_rel_buffer(buffer2, 0);
1137         return(error);
1138 }
1139
1140 /*
1141  * Return the approximate number of free bytes in the big-block
1142  * containing the specified blockmap offset.
1143  *
1144  * WARNING: A negative number can be returned if data de-dup exists,
1145  *          and the result will also not represent he actual number
1146  *          of free bytes in this case.
1147  *
1148  *          This code is used only by the reblocker.
1149  */
1150 int
1151 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1152                         int *curp, int *errorp)
1153 {
1154         hammer_volume_t root_volume;
1155         hammer_blockmap_t blockmap;
1156         hammer_blockmap_t freemap;
1157         struct hammer_blockmap_layer1 *layer1;
1158         struct hammer_blockmap_layer2 *layer2;
1159         hammer_buffer_t buffer = NULL;
1160         hammer_off_t layer1_offset;
1161         hammer_off_t layer2_offset;
1162         int32_t bytes;
1163         int zone;
1164
1165         zone = HAMMER_ZONE_DECODE(zone_offset);
1166         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1167         root_volume = hammer_get_root_volume(hmp, errorp);
1168         if (*errorp) {
1169                 *curp = 0;
1170                 return(0);
1171         }
1172         blockmap = &hmp->blockmap[zone];
1173         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1174
1175         /*
1176          * Dive layer 1.
1177          */
1178         layer1_offset = freemap->phys_offset +
1179                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1180         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1181         if (*errorp) {
1182                 bytes = 0;
1183                 goto failed;
1184         }
1185         KKASSERT(layer1->phys_offset);
1186         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1187                 hammer_lock_ex(&hmp->blkmap_lock);
1188                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1189                         panic("CRC FAILED: LAYER1");
1190                 hammer_unlock(&hmp->blkmap_lock);
1191         }
1192
1193         /*
1194          * Dive layer 2, each entry represents a large-block.
1195          *
1196          * (reuse buffer, layer1 pointer becomes invalid)
1197          */
1198         layer2_offset = layer1->phys_offset +
1199                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1200         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1201         if (*errorp) {
1202                 bytes = 0;
1203                 goto failed;
1204         }
1205         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1206                 hammer_lock_ex(&hmp->blkmap_lock);
1207                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1208                         panic("CRC FAILED: LAYER2");
1209                 hammer_unlock(&hmp->blkmap_lock);
1210         }
1211         KKASSERT(layer2->zone == zone);
1212
1213         bytes = layer2->bytes_free;
1214
1215         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1216                 *curp = 0;
1217         else
1218                 *curp = 1;
1219 failed:
1220         if (buffer)
1221                 hammer_rel_buffer(buffer, 0);
1222         hammer_rel_volume(root_volume, 0);
1223         if (hammer_debug_general & 0x0800) {
1224                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1225                         (long long)zone_offset, bytes);
1226         }
1227         return(bytes);
1228 }
1229
1230
1231 /*
1232  * Lookup a blockmap offset.
1233  */
1234 hammer_off_t
1235 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1236                        int *errorp)
1237 {
1238         hammer_volume_t root_volume;
1239         hammer_blockmap_t freemap;
1240         struct hammer_blockmap_layer1 *layer1;
1241         struct hammer_blockmap_layer2 *layer2;
1242         hammer_buffer_t buffer = NULL;
1243         hammer_off_t layer1_offset;
1244         hammer_off_t layer2_offset;
1245         hammer_off_t result_offset;
1246         hammer_off_t base_off;
1247         hammer_reserve_t resv;
1248         int zone;
1249
1250         /*
1251          * Calculate the zone-2 offset.
1252          */
1253         zone = HAMMER_ZONE_DECODE(zone_offset);
1254         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1255
1256         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1257                         HAMMER_ZONE_RAW_BUFFER;
1258
1259         /*
1260          * We can actually stop here, normal blockmaps are now direct-mapped
1261          * onto the freemap and so represent zone-2 addresses.
1262          */
1263         if (hammer_verify_zone == 0) {
1264                 *errorp = 0;
1265                 return(result_offset);
1266         }
1267
1268         /*
1269          * Validate the allocation zone
1270          */
1271         root_volume = hammer_get_root_volume(hmp, errorp);
1272         if (*errorp)
1273                 return(0);
1274         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1275         KKASSERT(freemap->phys_offset != 0);
1276
1277         /*
1278          * Dive layer 1.
1279          */
1280         layer1_offset = freemap->phys_offset +
1281                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1282         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1283         if (*errorp)
1284                 goto failed;
1285         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1286         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1287                 hammer_lock_ex(&hmp->blkmap_lock);
1288                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1289                         panic("CRC FAILED: LAYER1");
1290                 hammer_unlock(&hmp->blkmap_lock);
1291         }
1292
1293         /*
1294          * Dive layer 2, each entry represents a large-block.
1295          */
1296         layer2_offset = layer1->phys_offset +
1297                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1298         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1299
1300         if (*errorp)
1301                 goto failed;
1302         if (layer2->zone == 0) {
1303                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1304                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1305                                  base_off);
1306                 KKASSERT(resv && resv->zone == zone);
1307
1308         } else if (layer2->zone != zone) {
1309                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1310                         layer2->zone, zone);
1311         }
1312         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1313                 hammer_lock_ex(&hmp->blkmap_lock);
1314                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1315                         panic("CRC FAILED: LAYER2");
1316                 hammer_unlock(&hmp->blkmap_lock);
1317         }
1318
1319 failed:
1320         if (buffer)
1321                 hammer_rel_buffer(buffer, 0);
1322         hammer_rel_volume(root_volume, 0);
1323         if (hammer_debug_general & 0x0800) {
1324                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1325                         (long long)zone_offset, (long long)result_offset);
1326         }
1327         return(result_offset);
1328 }
1329
1330
1331 /*
1332  * Check space availability
1333  *
1334  * MPSAFE - does not require fs_token
1335  */
1336 int
1337 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1338 {
1339         const int in_size = sizeof(struct hammer_inode_data) +
1340                             sizeof(union hammer_btree_elm);
1341         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1342         int64_t usedbytes;
1343
1344         usedbytes = hmp->rsv_inodes * in_size +
1345                     hmp->rsv_recs * rec_size +
1346                     hmp->rsv_databytes +
1347                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1348                     ((int64_t)hidirtybufspace << 2) +
1349                     (slop << HAMMER_LARGEBLOCK_BITS);
1350
1351         hammer_count_extra_space_used = usedbytes;      /* debugging */
1352         if (resp)
1353                 *resp = usedbytes;
1354
1355         if (hmp->copy_stat_freebigblocks >=
1356             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1357                 return(0);
1358         }
1359         return (ENOSPC);
1360 }
1361