sys/vfs/hammer: Add error handling
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42                                     hammer_off_t base_offset, int zone,
43                                     struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
47
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52              hammer_res_rb_compare, hammer_off_t, zone_offset);
53
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57         if (res1->zone_offset < res2->zone_offset)
58                 return(-1);
59         if (res1->zone_offset > res2->zone_offset)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69                       hammer_off_t hint, int *errorp)
70 {
71         hammer_mount_t hmp;
72         hammer_volume_t root_volume;
73         hammer_blockmap_t blockmap;
74         hammer_blockmap_t freemap;
75         hammer_reserve_t resv;
76         struct hammer_blockmap_layer1 *layer1;
77         struct hammer_blockmap_layer2 *layer2;
78         hammer_buffer_t buffer1 = NULL;
79         hammer_buffer_t buffer2 = NULL;
80         hammer_buffer_t buffer3 = NULL;
81         hammer_off_t tmp_offset;
82         hammer_off_t next_offset;
83         hammer_off_t result_offset;
84         hammer_off_t layer1_offset;
85         hammer_off_t layer2_offset;
86         hammer_off_t base_off;
87         int loops = 0;
88         int offset;             /* offset within big-block */
89         int use_hint;
90
91         hmp = trans->hmp;
92
93         /*
94          * Deal with alignment and buffer-boundary issues.
95          *
96          * Be careful, certain primary alignments are used below to allocate
97          * new blockmap blocks.
98          */
99         bytes = (bytes + 15) & ~15;
100         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
102
103         /*
104          * Setup
105          */
106         root_volume = trans->rootvol;
107         *errorp = 0;
108         blockmap = &hmp->blockmap[zone];
109         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
111
112         /*
113          * Use the hint if we have one.
114          */
115         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116                 next_offset = (hint + 15) & ~(hammer_off_t)15;
117                 use_hint = 1;
118         } else {
119                 next_offset = blockmap->next_offset;
120                 use_hint = 0;
121         }
122 again:
123
124         /*
125          * use_hint is turned off if we leave the hinted big-block.
126          */
127         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128                 next_offset = blockmap->next_offset;
129                 use_hint = 0;
130         }
131
132         /*
133          * Check for wrap
134          */
135         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
136                 if (++loops == 2) {
137                         result_offset = 0;
138                         *errorp = ENOSPC;
139                         goto failed;
140                 }
141                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
142         }
143
144         /*
145          * The allocation request may not cross a buffer boundary.  Special
146          * large allocations must not cross a big-block boundary.
147          */
148         tmp_offset = next_offset + bytes - 1;
149         if (bytes <= HAMMER_BUFSIZE) {
150                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
152                         goto again;
153                 }
154         } else {
155                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
156                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
157                         goto again;
158                 }
159         }
160         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
161
162         /*
163          * Dive layer 1.
164          */
165         layer1_offset = freemap->phys_offset +
166                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
167
168         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
169         if (*errorp) {
170                 result_offset = 0;
171                 goto failed;
172         }
173
174         /*
175          * Check CRC.
176          */
177         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178                 hammer_lock_ex(&hmp->blkmap_lock);
179                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180                         panic("CRC FAILED: LAYER1");
181                 hammer_unlock(&hmp->blkmap_lock);
182         }
183
184         /*
185          * If we are at a big-block boundary and layer1 indicates no
186          * free big-blocks, then we cannot allocate a new big-block in
187          * layer2, skip to the next layer1 entry.
188          */
189         if (offset == 0 && layer1->blocks_free == 0) {
190                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
192                 if (hammer_check_volume(hmp, &next_offset)) {
193                         result_offset = 0;
194                         goto failed;
195                 }
196                 goto again;
197         }
198         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
199
200         /*
201          * Skip this layer1 entry if it is pointing to a layer2 big-block
202          * on a volume that we are currently trying to remove from the
203          * file-system. This is used by the volume-del code together with
204          * the reblocker to free up a volume.
205          */
206         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
207             hmp->volume_to_remove) {
208                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
209                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
210                 goto again;
211         }
212
213         /*
214          * Dive layer 2, each entry represents a big-block.
215          */
216         layer2_offset = layer1->phys_offset +
217                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
218         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
219         if (*errorp) {
220                 result_offset = 0;
221                 goto failed;
222         }
223
224         /*
225          * Check CRC.  This can race another thread holding the lock
226          * and in the middle of modifying layer2.
227          */
228         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
229                 hammer_lock_ex(&hmp->blkmap_lock);
230                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
231                         panic("CRC FAILED: LAYER2");
232                 hammer_unlock(&hmp->blkmap_lock);
233         }
234
235         /*
236          * Skip the layer if the zone is owned by someone other then us.
237          */
238         if (layer2->zone && layer2->zone != zone) {
239                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
240                 goto again;
241         }
242         if (offset < layer2->append_off) {
243                 next_offset += layer2->append_off - offset;
244                 goto again;
245         }
246
247 #if 0
248         /*
249          * If operating in the current non-hint blockmap block, do not
250          * allow it to get over-full.  Also drop any active hinting so
251          * blockmap->next_offset is updated at the end.
252          *
253          * We do this for B-Tree and meta-data allocations to provide
254          * localization for updates.
255          */
256         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
257              zone == HAMMER_ZONE_META_INDEX) &&
258             offset >= HAMMER_BIGBLOCK_OVERFILL &&
259             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)
260         ) {
261                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
262                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
263                         use_hint = 0;
264                         goto again;
265                 }
266         }
267 #endif
268
269         /*
270          * We need the lock from this point on.  We have to re-check zone
271          * ownership after acquiring the lock and also check for reservations.
272          */
273         hammer_lock_ex(&hmp->blkmap_lock);
274
275         if (layer2->zone && layer2->zone != zone) {
276                 hammer_unlock(&hmp->blkmap_lock);
277                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
278                 goto again;
279         }
280         if (offset < layer2->append_off) {
281                 hammer_unlock(&hmp->blkmap_lock);
282                 next_offset += layer2->append_off - offset;
283                 goto again;
284         }
285
286         /*
287          * The big-block might be reserved by another zone.  If it is reserved
288          * by our zone we may have to move next_offset past the append_off.
289          */
290         base_off = hammer_xlate_to_zone2(next_offset &
291                                         ~HAMMER_BIGBLOCK_MASK64);
292         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293         if (resv) {
294                 if (resv->zone != zone) {
295                         hammer_unlock(&hmp->blkmap_lock);
296                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
297                                       ~HAMMER_BIGBLOCK_MASK64;
298                         goto again;
299                 }
300                 if (offset < resv->append_off) {
301                         hammer_unlock(&hmp->blkmap_lock);
302                         next_offset += resv->append_off - offset;
303                         goto again;
304                 }
305                 ++resv->refs;
306         }
307
308         /*
309          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
310          * of the layer for real.  At this point we've validated any
311          * reservation that might exist and can just ignore resv.
312          */
313         if (layer2->zone == 0) {
314                 /*
315                  * Assign the big-block to our zone
316                  */
317                 hammer_modify_buffer(trans, buffer1,
318                                      layer1, sizeof(*layer1));
319                 --layer1->blocks_free;
320                 layer1->layer1_crc = crc32(layer1,
321                                            HAMMER_LAYER1_CRCSIZE);
322                 hammer_modify_buffer_done(buffer1);
323                 hammer_modify_buffer(trans, buffer2,
324                                      layer2, sizeof(*layer2));
325                 layer2->zone = zone;
326                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
327                 KKASSERT(layer2->append_off == 0);
328                 hammer_modify_volume_field(trans, trans->rootvol,
329                                            vol0_stat_freebigblocks);
330                 --root_volume->ondisk->vol0_stat_freebigblocks;
331                 hmp->copy_stat_freebigblocks =
332                         root_volume->ondisk->vol0_stat_freebigblocks;
333                 hammer_modify_volume_done(trans->rootvol);
334         } else {
335                 hammer_modify_buffer(trans, buffer2,
336                                      layer2, sizeof(*layer2));
337         }
338         KKASSERT(layer2->zone == zone);
339
340         /*
341          * NOTE: bytes_free can legally go negative due to de-dup.
342          */
343         layer2->bytes_free -= bytes;
344         KKASSERT(layer2->append_off <= offset);
345         layer2->append_off = offset + bytes;
346         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
347         hammer_modify_buffer_done(buffer2);
348
349         /*
350          * We hold the blockmap lock and should be the only ones
351          * capable of modifying resv->append_off.  Track the allocation
352          * as appropriate.
353          */
354         KKASSERT(bytes != 0);
355         if (resv) {
356                 KKASSERT(resv->append_off <= offset);
357                 resv->append_off = offset + bytes;
358                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
359                 hammer_blockmap_reserve_complete(hmp, resv);
360         }
361
362         /*
363          * If we are allocating from the base of a new buffer we can avoid
364          * a disk read by calling hammer_bnew_ext().
365          */
366         if ((next_offset & HAMMER_BUFMASK) == 0) {
367                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
368                                 errorp, &buffer3);
369                 if (*errorp) {
370                         result_offset = 0;
371                         goto failed;
372                 }
373         }
374         result_offset = next_offset;
375
376         /*
377          * If we weren't supplied with a hint or could not use the hint
378          * then we wound up using blockmap->next_offset as the hint and
379          * need to save it.
380          */
381         if (use_hint == 0) {
382                 hammer_modify_volume_noundo(NULL, root_volume);
383                 blockmap->next_offset = next_offset + bytes;
384                 hammer_modify_volume_done(root_volume);
385         }
386         hammer_unlock(&hmp->blkmap_lock);
387 failed:
388
389         /*
390          * Cleanup
391          */
392         if (buffer1)
393                 hammer_rel_buffer(buffer1, 0);
394         if (buffer2)
395                 hammer_rel_buffer(buffer2, 0);
396         if (buffer3)
397                 hammer_rel_buffer(buffer3, 0);
398
399         return(result_offset);
400 }
401
402 /*
403  * Frontend function - Reserve bytes in a zone.
404  *
405  * This code reserves bytes out of a blockmap without committing to any
406  * meta-data modifications, allowing the front-end to directly issue disk
407  * write I/O for big-blocks of data
408  *
409  * The backend later finalizes the reservation with hammer_blockmap_finalize()
410  * upon committing the related record.
411  */
412 hammer_reserve_t
413 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
414                         hammer_off_t *zone_offp, int *errorp)
415 {
416         hammer_volume_t root_volume;
417         hammer_blockmap_t blockmap;
418         hammer_blockmap_t freemap;
419         struct hammer_blockmap_layer1 *layer1;
420         struct hammer_blockmap_layer2 *layer2;
421         hammer_buffer_t buffer1 = NULL;
422         hammer_buffer_t buffer2 = NULL;
423         hammer_buffer_t buffer3 = NULL;
424         hammer_off_t tmp_offset;
425         hammer_off_t next_offset;
426         hammer_off_t layer1_offset;
427         hammer_off_t layer2_offset;
428         hammer_off_t base_off;
429         hammer_reserve_t resv;
430         hammer_reserve_t resx;
431         int loops = 0;
432         int offset;
433
434         /*
435          * Setup
436          */
437         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
438         root_volume = hammer_get_root_volume(hmp, errorp);
439         if (*errorp)
440                 return(NULL);
441         blockmap = &hmp->blockmap[zone];
442         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
443         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
444
445         /*
446          * Deal with alignment and buffer-boundary issues.
447          *
448          * Be careful, certain primary alignments are used below to allocate
449          * new blockmap blocks.
450          */
451         bytes = (bytes + 15) & ~15;
452         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
453
454         next_offset = blockmap->next_offset;
455 again:
456         resv = NULL;
457         /*
458          * Check for wrap
459          */
460         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
461                 if (++loops == 2) {
462                         *errorp = ENOSPC;
463                         goto failed;
464                 }
465                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
466         }
467
468         /*
469          * The allocation request may not cross a buffer boundary.  Special
470          * large allocations must not cross a big-block boundary.
471          */
472         tmp_offset = next_offset + bytes - 1;
473         if (bytes <= HAMMER_BUFSIZE) {
474                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
475                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
476                         goto again;
477                 }
478         } else {
479                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
480                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
481                         goto again;
482                 }
483         }
484         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
485
486         /*
487          * Dive layer 1.
488          */
489         layer1_offset = freemap->phys_offset +
490                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
491         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
492         if (*errorp)
493                 goto failed;
494
495         /*
496          * Check CRC.
497          */
498         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
499                 hammer_lock_ex(&hmp->blkmap_lock);
500                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
501                         panic("CRC FAILED: LAYER1");
502                 hammer_unlock(&hmp->blkmap_lock);
503         }
504
505         /*
506          * If we are at a big-block boundary and layer1 indicates no
507          * free big-blocks, then we cannot allocate a new big-block in
508          * layer2, skip to the next layer1 entry.
509          */
510         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
511             layer1->blocks_free == 0) {
512                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
513                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
514                 if (hammer_check_volume(hmp, &next_offset))
515                         goto failed;
516                 goto again;
517         }
518         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
519
520         /*
521          * Dive layer 2, each entry represents a big-block.
522          */
523         layer2_offset = layer1->phys_offset +
524                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
525         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
526         if (*errorp)
527                 goto failed;
528
529         /*
530          * Check CRC if not allocating into uninitialized space (which we
531          * aren't when reserving space).
532          */
533         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
534                 hammer_lock_ex(&hmp->blkmap_lock);
535                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
536                         panic("CRC FAILED: LAYER2");
537                 hammer_unlock(&hmp->blkmap_lock);
538         }
539
540         /*
541          * Skip the layer if the zone is owned by someone other then us.
542          */
543         if (layer2->zone && layer2->zone != zone) {
544                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
545                 goto again;
546         }
547         if (offset < layer2->append_off) {
548                 next_offset += layer2->append_off - offset;
549                 goto again;
550         }
551
552         /*
553          * We need the lock from this point on.  We have to re-check zone
554          * ownership after acquiring the lock and also check for reservations.
555          */
556         hammer_lock_ex(&hmp->blkmap_lock);
557
558         if (layer2->zone && layer2->zone != zone) {
559                 hammer_unlock(&hmp->blkmap_lock);
560                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
561                 goto again;
562         }
563         if (offset < layer2->append_off) {
564                 hammer_unlock(&hmp->blkmap_lock);
565                 next_offset += layer2->append_off - offset;
566                 goto again;
567         }
568
569         /*
570          * The big-block might be reserved by another zone.  If it is reserved
571          * by our zone we may have to move next_offset past the append_off.
572          */
573         base_off = hammer_xlate_to_zone2(next_offset &
574                                         ~HAMMER_BIGBLOCK_MASK64);
575         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
576         if (resv) {
577                 if (resv->zone != zone) {
578                         hammer_unlock(&hmp->blkmap_lock);
579                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
580                                       ~HAMMER_BIGBLOCK_MASK64;
581                         goto again;
582                 }
583                 if (offset < resv->append_off) {
584                         hammer_unlock(&hmp->blkmap_lock);
585                         next_offset += resv->append_off - offset;
586                         goto again;
587                 }
588                 ++resv->refs;
589                 resx = NULL;
590         } else {
591                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
592                                M_WAITOK | M_ZERO | M_USE_RESERVE);
593                 resx->refs = 1;
594                 resx->zone = zone;
595                 resx->zone_offset = base_off;
596                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
597                         resx->flags |= HAMMER_RESF_LAYER2FREE;
598                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
599                 KKASSERT(resv == NULL);
600                 resv = resx;
601                 ++hammer_count_reservations;
602         }
603         resv->append_off = offset + bytes;
604
605         /*
606          * If we are not reserving a whole buffer but are at the start of
607          * a new block, call hammer_bnew() to avoid a disk read.
608          *
609          * If we are reserving a whole buffer (or more), the caller will
610          * probably use a direct read, so do nothing.
611          *
612          * If we do not have a whole lot of system memory we really can't
613          * afford to block while holding the blkmap_lock!
614          */
615         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
616                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
617                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
618                         if (*errorp)
619                                 goto failed;
620                 }
621         }
622
623         /*
624          * Adjust our iterator and alloc_offset.  The layer1 and layer2
625          * space beyond alloc_offset is uninitialized.  alloc_offset must
626          * be big-block aligned.
627          */
628         blockmap->next_offset = next_offset + bytes;
629         hammer_unlock(&hmp->blkmap_lock);
630
631 failed:
632         if (buffer1)
633                 hammer_rel_buffer(buffer1, 0);
634         if (buffer2)
635                 hammer_rel_buffer(buffer2, 0);
636         if (buffer3)
637                 hammer_rel_buffer(buffer3, 0);
638         hammer_rel_volume(root_volume, 0);
639         *zone_offp = next_offset;
640
641         return(resv);
642 }
643
644 /*
645  * Frontend function - Dedup bytes in a zone.
646  *
647  * Dedup reservations work exactly the same as normal write reservations
648  * except we only adjust bytes_free field and don't touch append offset.
649  * Finalization mechanic for dedup reservations is also the same as for
650  * normal write ones - the backend finalizes the reservation with
651  * hammer_blockmap_finalize().
652  */
653 hammer_reserve_t
654 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
655                               hammer_off_t zone_offset, int *errorp)
656 {
657         hammer_volume_t root_volume;
658         hammer_blockmap_t freemap;
659         struct hammer_blockmap_layer1 *layer1;
660         struct hammer_blockmap_layer2 *layer2;
661         hammer_buffer_t buffer1 = NULL;
662         hammer_buffer_t buffer2 = NULL;
663         hammer_off_t layer1_offset;
664         hammer_off_t layer2_offset;
665         hammer_off_t base_off;
666         hammer_reserve_t resv = NULL;
667         hammer_reserve_t resx = NULL;
668
669         /*
670          * Setup
671          */
672         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
673         root_volume = hammer_get_root_volume(hmp, errorp);
674         if (*errorp)
675                 return (NULL);
676         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
677         KKASSERT(freemap->phys_offset != 0);
678
679         bytes = (bytes + 15) & ~15;
680         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
681
682         /*
683          * Dive layer 1.
684          */
685         layer1_offset = freemap->phys_offset +
686                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
687         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
688         if (*errorp)
689                 goto failed;
690
691         /*
692          * Check CRC.
693          */
694         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
695                 hammer_lock_ex(&hmp->blkmap_lock);
696                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
697                         panic("CRC FAILED: LAYER1");
698                 hammer_unlock(&hmp->blkmap_lock);
699         }
700         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
701
702         /*
703          * Dive layer 2, each entry represents a big-block.
704          */
705         layer2_offset = layer1->phys_offset +
706                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
707         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
708         if (*errorp)
709                 goto failed;
710
711         /*
712          * Check CRC.
713          */
714         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
715                 hammer_lock_ex(&hmp->blkmap_lock);
716                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
717                         panic("CRC FAILED: LAYER2");
718                 hammer_unlock(&hmp->blkmap_lock);
719         }
720
721         /*
722          * Fail if the zone is owned by someone other than us.
723          */
724         if (layer2->zone && layer2->zone != zone)
725                 goto failed;
726
727         /*
728          * We need the lock from this point on.  We have to re-check zone
729          * ownership after acquiring the lock and also check for reservations.
730          */
731         hammer_lock_ex(&hmp->blkmap_lock);
732
733         if (layer2->zone && layer2->zone != zone) {
734                 hammer_unlock(&hmp->blkmap_lock);
735                 goto failed;
736         }
737
738         base_off = hammer_xlate_to_zone2(zone_offset &
739                                         ~HAMMER_BIGBLOCK_MASK64);
740         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
741         if (resv) {
742                 if (resv->zone != zone) {
743                         hammer_unlock(&hmp->blkmap_lock);
744                         resv = NULL;
745                         goto failed;
746                 }
747                 /*
748                  * Due to possible big-block underflow we can't simply
749                  * subtract bytes from bytes_free.
750                  */
751                 if (update_bytes_free(resv, bytes) == 0) {
752                         hammer_unlock(&hmp->blkmap_lock);
753                         resv = NULL;
754                         goto failed;
755                 }
756                 ++resv->refs;
757                 resx = NULL;
758         } else {
759                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
760                                M_WAITOK | M_ZERO | M_USE_RESERVE);
761                 resx->refs = 1;
762                 resx->zone = zone;
763                 resx->bytes_free = layer2->bytes_free;
764                 /*
765                  * Due to possible big-block underflow we can't simply
766                  * subtract bytes from bytes_free.
767                  */
768                 if (update_bytes_free(resx, bytes) == 0) {
769                         hammer_unlock(&hmp->blkmap_lock);
770                         kfree(resx, hmp->m_misc);
771                         goto failed;
772                 }
773                 resx->zone_offset = base_off;
774                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
775                 KKASSERT(resv == NULL);
776                 resv = resx;
777                 ++hammer_count_reservations;
778         }
779
780         hammer_unlock(&hmp->blkmap_lock);
781
782 failed:
783         if (buffer1)
784                 hammer_rel_buffer(buffer1, 0);
785         if (buffer2)
786                 hammer_rel_buffer(buffer2, 0);
787         hammer_rel_volume(root_volume, 0);
788
789         return(resv);
790 }
791
792 static int
793 update_bytes_free(hammer_reserve_t resv, int bytes)
794 {
795         int32_t temp;
796
797         /*
798          * Big-block underflow check
799          */
800         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
801         cpu_ccfence(); /* XXX do we really need it ? */
802         if (temp > resv->bytes_free) {
803                 kprintf("BIGBLOCK UNDERFLOW\n");
804                 return (0);
805         }
806
807         resv->bytes_free -= bytes;
808         return (1);
809 }
810
811 /*
812  * Dereference a reservation structure.  Upon the final release the
813  * underlying big-block is checked and if it is entirely free we delete
814  * any related HAMMER buffers to avoid potential conflicts with future
815  * reuse of the big-block.
816  */
817 void
818 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
819 {
820         hammer_off_t base_offset;
821         int error;
822
823         KKASSERT(resv->refs > 0);
824         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
825                  HAMMER_ZONE_RAW_BUFFER);
826
827         /*
828          * Setting append_off to the max prevents any new allocations
829          * from occuring while we are trying to dispose of the reservation,
830          * allowing us to safely delete any related HAMMER buffers.
831          *
832          * If we are unable to clean out all related HAMMER buffers we
833          * requeue the delay.
834          */
835         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
836                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
837                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
838                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
839                         hammer_dedup_cache_inval(hmp, base_offset);
840                 error = hammer_del_buffers(hmp, base_offset,
841                                            resv->zone_offset,
842                                            HAMMER_BIGBLOCK_SIZE,
843                                            1);
844                 if (hammer_debug_general & 0x20000) {
845                         kprintf("hammer: delbgblk %016jx error %d\n",
846                                 (intmax_t)base_offset, error);
847                 }
848                 if (error)
849                         hammer_reserve_setdelay(hmp, resv);
850         }
851         if (--resv->refs == 0) {
852                 if (hammer_debug_general & 0x20000) {
853                         kprintf("hammer: delresvr %016jx zone %02x\n",
854                                 (intmax_t)resv->zone_offset, resv->zone);
855                 }
856                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
857                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
858                 kfree(resv, hmp->m_misc);
859                 --hammer_count_reservations;
860         }
861 }
862
863 /*
864  * Prevent a potentially free big-block from being reused until after
865  * the related flushes have completely cycled, otherwise crash recovery
866  * could resurrect a data block that was already reused and overwritten.
867  *
868  * The caller might reset the underlying layer2 entry's append_off to 0, so
869  * our covering append_off must be set to max to prevent any reallocation
870  * until after the flush delays complete, not to mention proper invalidation
871  * of any underlying cached blocks.
872  */
873 static void
874 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
875                         int zone, struct hammer_blockmap_layer2 *layer2)
876 {
877         hammer_reserve_t resv;
878
879         /*
880          * Allocate the reservation if necessary.
881          *
882          * NOTE: need lock in future around resv lookup/allocation and
883          * the setdelay call, currently refs is not bumped until the call.
884          */
885 again:
886         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
887         if (resv == NULL) {
888                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
889                                M_WAITOK | M_ZERO | M_USE_RESERVE);
890                 resv->zone = zone;
891                 resv->zone_offset = base_offset;
892                 resv->refs = 0;
893                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
894
895                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
896                         resv->flags |= HAMMER_RESF_LAYER2FREE;
897                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
898                         kfree(resv, hmp->m_misc);
899                         goto again;
900                 }
901                 ++hammer_count_reservations;
902         } else {
903                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
904                         resv->flags |= HAMMER_RESF_LAYER2FREE;
905         }
906         hammer_reserve_setdelay(hmp, resv);
907 }
908
909 /*
910  * Enter the reservation on the on-delay list, or move it if it
911  * is already on the list.
912  */
913 static void
914 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
915 {
916         if (resv->flags & HAMMER_RESF_ONDELAY) {
917                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
918                 resv->flush_group = hmp->flusher.next + 1;
919                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
920         } else {
921                 ++resv->refs;
922                 ++hmp->rsv_fromdelay;
923                 resv->flags |= HAMMER_RESF_ONDELAY;
924                 resv->flush_group = hmp->flusher.next + 1;
925                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
926         }
927 }
928
929 /*
930  * Reserve has reached its flush point, remove it from the delay list
931  * and finish it off.  hammer_blockmap_reserve_complete() inherits
932  * the ondelay reference.
933  */
934 void
935 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
936 {
937         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
938         resv->flags &= ~HAMMER_RESF_ONDELAY;
939         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
940         --hmp->rsv_fromdelay;
941         hammer_blockmap_reserve_complete(hmp, resv);
942 }
943
944 /*
945  * Backend function - free (offset, bytes) in a zone.
946  *
947  * XXX error return
948  */
949 void
950 hammer_blockmap_free(hammer_transaction_t trans,
951                      hammer_off_t zone_offset, int bytes)
952 {
953         hammer_mount_t hmp;
954         hammer_volume_t root_volume;
955         hammer_blockmap_t freemap;
956         struct hammer_blockmap_layer1 *layer1;
957         struct hammer_blockmap_layer2 *layer2;
958         hammer_buffer_t buffer1 = NULL;
959         hammer_buffer_t buffer2 = NULL;
960         hammer_off_t layer1_offset;
961         hammer_off_t layer2_offset;
962         hammer_off_t base_off;
963         int error;
964         int zone;
965
966         if (bytes == 0)
967                 return;
968         hmp = trans->hmp;
969
970         /*
971          * Alignment
972          */
973         bytes = (bytes + 15) & ~15;
974         KKASSERT(bytes <= HAMMER_XBUFSIZE);
975         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
976                   ~HAMMER_BIGBLOCK_MASK64) == 0);
977
978         /*
979          * Basic zone validation & locking
980          */
981         zone = HAMMER_ZONE_DECODE(zone_offset);
982         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
983         root_volume = trans->rootvol;
984         error = 0;
985
986         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
987
988         /*
989          * Dive layer 1.
990          */
991         layer1_offset = freemap->phys_offset +
992                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
993         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
994         if (error)
995                 goto failed;
996         KKASSERT(layer1->phys_offset &&
997                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
998         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
999                 hammer_lock_ex(&hmp->blkmap_lock);
1000                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1001                         panic("CRC FAILED: LAYER1");
1002                 hammer_unlock(&hmp->blkmap_lock);
1003         }
1004
1005         /*
1006          * Dive layer 2, each entry represents a big-block.
1007          */
1008         layer2_offset = layer1->phys_offset +
1009                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1010         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1011         if (error)
1012                 goto failed;
1013         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1014                 hammer_lock_ex(&hmp->blkmap_lock);
1015                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1016                         panic("CRC FAILED: LAYER2");
1017                 hammer_unlock(&hmp->blkmap_lock);
1018         }
1019
1020         hammer_lock_ex(&hmp->blkmap_lock);
1021
1022         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1023
1024         /*
1025          * Free space previously allocated via blockmap_alloc().
1026          *
1027          * NOTE: bytes_free can be and remain negative due to de-dup ops
1028          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1029          */
1030         KKASSERT(layer2->zone == zone);
1031         layer2->bytes_free += bytes;
1032         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1033
1034         /*
1035          * If a big-block becomes entirely free we must create a covering
1036          * reservation to prevent premature reuse.  Note, however, that
1037          * the big-block and/or reservation may still have an append_off
1038          * that allows further (non-reused) allocations.
1039          *
1040          * Once the reservation has been made we re-check layer2 and if
1041          * the big-block is still entirely free we reset the layer2 entry.
1042          * The reservation will prevent premature reuse.
1043          *
1044          * NOTE: hammer_buffer's are only invalidated when the reservation
1045          * is completed, if the layer2 entry is still completely free at
1046          * that time.  Any allocations from the reservation that may have
1047          * occured in the mean time, or active references on the reservation
1048          * from new pending allocations, will prevent the invalidation from
1049          * occuring.
1050          */
1051         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1052                 base_off = hammer_xlate_to_zone2(zone_offset &
1053                                                 ~HAMMER_BIGBLOCK_MASK64);
1054
1055                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1056                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1057                         layer2->zone = 0;
1058                         layer2->append_off = 0;
1059                         hammer_modify_buffer(trans, buffer1,
1060                                              layer1, sizeof(*layer1));
1061                         ++layer1->blocks_free;
1062                         layer1->layer1_crc = crc32(layer1,
1063                                                    HAMMER_LAYER1_CRCSIZE);
1064                         hammer_modify_buffer_done(buffer1);
1065                         hammer_modify_volume_field(trans,
1066                                         trans->rootvol,
1067                                         vol0_stat_freebigblocks);
1068                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1069                         hmp->copy_stat_freebigblocks =
1070                            root_volume->ondisk->vol0_stat_freebigblocks;
1071                         hammer_modify_volume_done(trans->rootvol);
1072                 }
1073         }
1074         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1075         hammer_modify_buffer_done(buffer2);
1076         hammer_unlock(&hmp->blkmap_lock);
1077
1078 failed:
1079         if (buffer1)
1080                 hammer_rel_buffer(buffer1, 0);
1081         if (buffer2)
1082                 hammer_rel_buffer(buffer2, 0);
1083 }
1084
1085 int
1086 hammer_blockmap_dedup(hammer_transaction_t trans,
1087                      hammer_off_t zone_offset, int bytes)
1088 {
1089         hammer_mount_t hmp;
1090         hammer_blockmap_t freemap;
1091         struct hammer_blockmap_layer1 *layer1;
1092         struct hammer_blockmap_layer2 *layer2;
1093         hammer_buffer_t buffer1 = NULL;
1094         hammer_buffer_t buffer2 = NULL;
1095         hammer_off_t layer1_offset;
1096         hammer_off_t layer2_offset;
1097         int32_t temp;
1098         int error;
1099         int zone __debugvar;
1100
1101         if (bytes == 0)
1102                 return (0);
1103         hmp = trans->hmp;
1104
1105         /*
1106          * Alignment
1107          */
1108         bytes = (bytes + 15) & ~15;
1109         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1110         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1111                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1112
1113         /*
1114          * Basic zone validation & locking
1115          */
1116         zone = HAMMER_ZONE_DECODE(zone_offset);
1117         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1118         error = 0;
1119
1120         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1121
1122         /*
1123          * Dive layer 1.
1124          */
1125         layer1_offset = freemap->phys_offset +
1126                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1127         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1128         if (error)
1129                 goto failed;
1130         KKASSERT(layer1->phys_offset &&
1131                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1132         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1133                 hammer_lock_ex(&hmp->blkmap_lock);
1134                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1135                         panic("CRC FAILED: LAYER1");
1136                 hammer_unlock(&hmp->blkmap_lock);
1137         }
1138
1139         /*
1140          * Dive layer 2, each entry represents a big-block.
1141          */
1142         layer2_offset = layer1->phys_offset +
1143                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1144         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1145         if (error)
1146                 goto failed;
1147         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1148                 hammer_lock_ex(&hmp->blkmap_lock);
1149                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1150                         panic("CRC FAILED: LAYER2");
1151                 hammer_unlock(&hmp->blkmap_lock);
1152         }
1153
1154         hammer_lock_ex(&hmp->blkmap_lock);
1155
1156         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1157
1158         /*
1159          * Free space previously allocated via blockmap_alloc().
1160          *
1161          * NOTE: bytes_free can be and remain negative due to de-dup ops
1162          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1163          */
1164         KKASSERT(layer2->zone == zone);
1165         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1166         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1167         if (temp > layer2->bytes_free) {
1168                 error = ERANGE;
1169                 goto underflow;
1170         }
1171         layer2->bytes_free -= bytes;
1172
1173         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1174
1175         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1176 underflow:
1177         hammer_modify_buffer_done(buffer2);
1178         hammer_unlock(&hmp->blkmap_lock);
1179
1180 failed:
1181         if (buffer1)
1182                 hammer_rel_buffer(buffer1, 0);
1183         if (buffer2)
1184                 hammer_rel_buffer(buffer2, 0);
1185         return (error);
1186 }
1187
1188 /*
1189  * Backend function - finalize (offset, bytes) in a zone.
1190  *
1191  * Allocate space that was previously reserved by the frontend.
1192  */
1193 int
1194 hammer_blockmap_finalize(hammer_transaction_t trans,
1195                          hammer_reserve_t resv,
1196                          hammer_off_t zone_offset, int bytes)
1197 {
1198         hammer_mount_t hmp;
1199         hammer_volume_t root_volume;
1200         hammer_blockmap_t freemap;
1201         struct hammer_blockmap_layer1 *layer1;
1202         struct hammer_blockmap_layer2 *layer2;
1203         hammer_buffer_t buffer1 = NULL;
1204         hammer_buffer_t buffer2 = NULL;
1205         hammer_off_t layer1_offset;
1206         hammer_off_t layer2_offset;
1207         int error;
1208         int zone;
1209         int offset;
1210
1211         if (bytes == 0)
1212                 return(0);
1213         hmp = trans->hmp;
1214
1215         /*
1216          * Alignment
1217          */
1218         bytes = (bytes + 15) & ~15;
1219         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1220
1221         /*
1222          * Basic zone validation & locking
1223          */
1224         zone = HAMMER_ZONE_DECODE(zone_offset);
1225         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1226         root_volume = trans->rootvol;
1227         error = 0;
1228
1229         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1230
1231         /*
1232          * Dive layer 1.
1233          */
1234         layer1_offset = freemap->phys_offset +
1235                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1236         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1237         if (error)
1238                 goto failed;
1239         KKASSERT(layer1->phys_offset &&
1240                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1241         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1242                 hammer_lock_ex(&hmp->blkmap_lock);
1243                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1244                         panic("CRC FAILED: LAYER1");
1245                 hammer_unlock(&hmp->blkmap_lock);
1246         }
1247
1248         /*
1249          * Dive layer 2, each entry represents a big-block.
1250          */
1251         layer2_offset = layer1->phys_offset +
1252                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1253         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1254         if (error)
1255                 goto failed;
1256         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1257                 hammer_lock_ex(&hmp->blkmap_lock);
1258                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1259                         panic("CRC FAILED: LAYER2");
1260                 hammer_unlock(&hmp->blkmap_lock);
1261         }
1262
1263         hammer_lock_ex(&hmp->blkmap_lock);
1264
1265         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1266
1267         /*
1268          * Finalize some or all of the space covered by a current
1269          * reservation.  An allocation in the same layer may have
1270          * already assigned ownership.
1271          */
1272         if (layer2->zone == 0) {
1273                 hammer_modify_buffer(trans, buffer1,
1274                                      layer1, sizeof(*layer1));
1275                 --layer1->blocks_free;
1276                 layer1->layer1_crc = crc32(layer1,
1277                                            HAMMER_LAYER1_CRCSIZE);
1278                 hammer_modify_buffer_done(buffer1);
1279                 layer2->zone = zone;
1280                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1281                 KKASSERT(layer2->append_off == 0);
1282                 hammer_modify_volume_field(trans,
1283                                 trans->rootvol,
1284                                 vol0_stat_freebigblocks);
1285                 --root_volume->ondisk->vol0_stat_freebigblocks;
1286                 hmp->copy_stat_freebigblocks =
1287                    root_volume->ondisk->vol0_stat_freebigblocks;
1288                 hammer_modify_volume_done(trans->rootvol);
1289         }
1290         if (layer2->zone != zone)
1291                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1292         KKASSERT(layer2->zone == zone);
1293         KKASSERT(bytes != 0);
1294         layer2->bytes_free -= bytes;
1295
1296         if (resv) {
1297                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1298         }
1299
1300         /*
1301          * Finalizations can occur out of order, or combined with allocations.
1302          * append_off must be set to the highest allocated offset.
1303          */
1304         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1305         if (layer2->append_off < offset)
1306                 layer2->append_off = offset;
1307
1308         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1309         hammer_modify_buffer_done(buffer2);
1310         hammer_unlock(&hmp->blkmap_lock);
1311
1312 failed:
1313         if (buffer1)
1314                 hammer_rel_buffer(buffer1, 0);
1315         if (buffer2)
1316                 hammer_rel_buffer(buffer2, 0);
1317         return(error);
1318 }
1319
1320 /*
1321  * Return the approximate number of free bytes in the big-block
1322  * containing the specified blockmap offset.
1323  *
1324  * WARNING: A negative number can be returned if data de-dup exists,
1325  *          and the result will also not represent he actual number
1326  *          of free bytes in this case.
1327  *
1328  *          This code is used only by the reblocker.
1329  */
1330 int
1331 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1332                         int *curp, int *errorp)
1333 {
1334         hammer_volume_t root_volume;
1335         hammer_blockmap_t blockmap;
1336         hammer_blockmap_t freemap;
1337         struct hammer_blockmap_layer1 *layer1;
1338         struct hammer_blockmap_layer2 *layer2;
1339         hammer_buffer_t buffer = NULL;
1340         hammer_off_t layer1_offset;
1341         hammer_off_t layer2_offset;
1342         int32_t bytes;
1343         int zone;
1344
1345         zone = HAMMER_ZONE_DECODE(zone_offset);
1346         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1347         root_volume = hammer_get_root_volume(hmp, errorp);
1348         if (*errorp) {
1349                 *curp = 0;
1350                 return(0);
1351         }
1352         blockmap = &hmp->blockmap[zone];
1353         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1354
1355         /*
1356          * Dive layer 1.
1357          */
1358         layer1_offset = freemap->phys_offset +
1359                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1360         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1361         if (*errorp) {
1362                 *curp = 0;
1363                 bytes = 0;
1364                 goto failed;
1365         }
1366         KKASSERT(layer1->phys_offset);
1367         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1368                 hammer_lock_ex(&hmp->blkmap_lock);
1369                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1370                         panic("CRC FAILED: LAYER1");
1371                 hammer_unlock(&hmp->blkmap_lock);
1372         }
1373
1374         /*
1375          * Dive layer 2, each entry represents a big-block.
1376          *
1377          * (reuse buffer, layer1 pointer becomes invalid)
1378          */
1379         layer2_offset = layer1->phys_offset +
1380                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1381         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1382         if (*errorp) {
1383                 *curp = 0;
1384                 bytes = 0;
1385                 goto failed;
1386         }
1387         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1388                 hammer_lock_ex(&hmp->blkmap_lock);
1389                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1390                         panic("CRC FAILED: LAYER2");
1391                 hammer_unlock(&hmp->blkmap_lock);
1392         }
1393         KKASSERT(layer2->zone == zone);
1394
1395         bytes = layer2->bytes_free;
1396
1397         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1398                 *curp = 0;
1399         else
1400                 *curp = 1;
1401 failed:
1402         if (buffer)
1403                 hammer_rel_buffer(buffer, 0);
1404         hammer_rel_volume(root_volume, 0);
1405         if (hammer_debug_general & 0x0800) {
1406                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1407                         (long long)zone_offset, bytes);
1408         }
1409         return(bytes);
1410 }
1411
1412
1413 /*
1414  * Lookup a blockmap offset and verify blockmap layers.
1415  */
1416 hammer_off_t
1417 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1418                         int *errorp)
1419 {
1420         hammer_volume_t root_volume;
1421         hammer_blockmap_t freemap;
1422         struct hammer_blockmap_layer1 *layer1;
1423         struct hammer_blockmap_layer2 *layer2;
1424         hammer_buffer_t buffer = NULL;
1425         hammer_off_t layer1_offset;
1426         hammer_off_t layer2_offset;
1427         hammer_off_t result_offset;
1428         hammer_off_t base_off;
1429         hammer_reserve_t resv __debugvar;
1430         int zone;
1431
1432         /*
1433          * Calculate the zone-2 offset.
1434          */
1435         zone = HAMMER_ZONE_DECODE(zone_offset);
1436         result_offset = hammer_xlate_to_zone2(zone_offset);
1437
1438         /*
1439          * Validate the allocation zone
1440          */
1441         root_volume = hammer_get_root_volume(hmp, errorp);
1442         if (*errorp)
1443                 return(0);
1444         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1445         KKASSERT(freemap->phys_offset != 0);
1446
1447         /*
1448          * Dive layer 1.
1449          */
1450         layer1_offset = freemap->phys_offset +
1451                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1452         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1453         if (*errorp)
1454                 goto failed;
1455         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1456         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1457                 hammer_lock_ex(&hmp->blkmap_lock);
1458                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1459                         panic("CRC FAILED: LAYER1");
1460                 hammer_unlock(&hmp->blkmap_lock);
1461         }
1462
1463         /*
1464          * Dive layer 2, each entry represents a big-block.
1465          */
1466         layer2_offset = layer1->phys_offset +
1467                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1468         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1469
1470         if (*errorp)
1471                 goto failed;
1472         if (layer2->zone == 0) {
1473                 base_off = hammer_xlate_to_zone2(zone_offset &
1474                                                 ~HAMMER_BIGBLOCK_MASK64);
1475                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1476                                  base_off);
1477                 KKASSERT(resv && resv->zone == zone);
1478
1479         } else if (layer2->zone != zone) {
1480                 panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1481                         layer2->zone, zone);
1482         }
1483         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1484                 hammer_lock_ex(&hmp->blkmap_lock);
1485                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1486                         panic("CRC FAILED: LAYER2");
1487                 hammer_unlock(&hmp->blkmap_lock);
1488         }
1489
1490 failed:
1491         if (buffer)
1492                 hammer_rel_buffer(buffer, 0);
1493         hammer_rel_volume(root_volume, 0);
1494         if (hammer_debug_general & 0x0800) {
1495                 kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1496                         (long long)zone_offset, (long long)result_offset);
1497         }
1498         return(result_offset);
1499 }
1500
1501
1502 /*
1503  * Check space availability
1504  *
1505  * MPSAFE - does not require fs_token
1506  */
1507 int
1508 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1509 {
1510         const int in_size = sizeof(struct hammer_inode_data) +
1511                             sizeof(union hammer_btree_elm);
1512         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1513         int64_t usedbytes;
1514
1515         usedbytes = hmp->rsv_inodes * in_size +
1516                     hmp->rsv_recs * rec_size +
1517                     hmp->rsv_databytes +
1518                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1519                     ((int64_t)hammer_limit_dirtybufspace) +
1520                     (slop << HAMMER_BIGBLOCK_BITS);
1521
1522         hammer_count_extra_space_used = usedbytes;      /* debugging */
1523         if (resp)
1524                 *resp = usedbytes;
1525
1526         if (hmp->copy_stat_freebigblocks >=
1527             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1528                 return(0);
1529         }
1530         return (ENOSPC);
1531 }
1532
1533 static int
1534 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1535 {
1536         hammer_blockmap_t freemap;
1537         struct hammer_blockmap_layer1 *layer1;
1538         hammer_buffer_t buffer1 = NULL;
1539         hammer_off_t layer1_offset, offset;
1540         int zone, vol_no, error = 0;
1541
1542         offset = *offsetp;
1543         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1544
1545         layer1_offset = freemap->phys_offset +
1546                         HAMMER_BLOCKMAP_LAYER1_OFFSET(offset);
1547
1548         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1549         if (error)
1550                 goto end;
1551
1552         /*
1553          * No more available space in layer1s of this volume.
1554          */
1555         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
1556                 zone = HAMMER_ZONE_DECODE(offset);
1557                 vol_no = HAMMER_VOL_DECODE(offset) + 1;
1558                 KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1559                 if (vol_no == HAMMER_MAX_VOLUMES) {
1560                         vol_no = 0;
1561                         ++zone;
1562                 }
1563                 offset &= HAMMER_BLOCKMAP_LAYER2_MASK;
1564                 *offsetp = HAMMER_ENCODE(zone, vol_no, offset);
1565         }
1566 end:
1567         if (buffer1)
1568                 hammer_rel_buffer(buffer1, 0);
1569         return(error);
1570 }