Merge branch 'vendor/LIBARCHIVE'
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55              hammer_res_rb_compare, hammer_off_t, zone_offset);
56
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60         if (res1->zone_offset < res2->zone_offset)
61                 return(-1);
62         if (res1->zone_offset > res2->zone_offset)
63                 return(1);
64         return(0);
65 }
66
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72                       hammer_off_t hint, int *errorp)
73 {
74         hammer_mount_t hmp;
75         hammer_volume_t root_volume;
76         hammer_blockmap_t blockmap;
77         hammer_blockmap_t freemap;
78         hammer_reserve_t resv;
79         struct hammer_blockmap_layer1 *layer1;
80         struct hammer_blockmap_layer2 *layer2;
81         hammer_buffer_t buffer1 = NULL;
82         hammer_buffer_t buffer2 = NULL;
83         hammer_buffer_t buffer3 = NULL;
84         hammer_off_t tmp_offset;
85         hammer_off_t next_offset;
86         hammer_off_t result_offset;
87         hammer_off_t layer1_offset;
88         hammer_off_t layer2_offset;
89         hammer_off_t base_off;
90         int loops = 0;
91         int offset;             /* offset within big-block */
92         int use_hint;
93
94         hmp = trans->hmp;
95
96         /*
97          * Deal with alignment and buffer-boundary issues.
98          *
99          * Be careful, certain primary alignments are used below to allocate
100          * new blockmap blocks.
101          */
102         bytes = (bytes + 15) & ~15;
103         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104         KKASSERT(hammer_is_zone2_mapped_index(zone));
105
106         /*
107          * Setup
108          */
109         root_volume = trans->rootvol;
110         *errorp = 0;
111         blockmap = &hmp->blockmap[zone];
112         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114
115         /*
116          * Use the hint if we have one.
117          */
118         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119                 next_offset = (hint + 15) & ~(hammer_off_t)15;
120                 use_hint = 1;
121         } else {
122                 next_offset = blockmap->next_offset;
123                 use_hint = 0;
124         }
125 again:
126
127         /*
128          * use_hint is turned off if we leave the hinted big-block.
129          */
130         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131                 next_offset = blockmap->next_offset;
132                 use_hint = 0;
133         }
134
135         /*
136          * Check for wrap
137          */
138         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139                 if (++loops == 2) {
140                         result_offset = 0;
141                         *errorp = ENOSPC;
142                         goto failed;
143                 }
144                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
145         }
146
147         /*
148          * The allocation request may not cross a buffer boundary.  Special
149          * large allocations must not cross a big-block boundary.
150          */
151         tmp_offset = next_offset + bytes - 1;
152         if (bytes <= HAMMER_BUFSIZE) {
153                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
154                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
155                         goto again;
156                 }
157         } else {
158                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
159                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
160                         goto again;
161                 }
162         }
163         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
164
165         /*
166          * Dive layer 1.
167          */
168         layer1_offset = freemap->phys_offset +
169                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
170
171         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
172         if (*errorp) {
173                 result_offset = 0;
174                 goto failed;
175         }
176
177         /*
178          * Check CRC.
179          */
180         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
181                 hammer_lock_ex(&hmp->blkmap_lock);
182                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
183                         hpanic("CRC FAILED: LAYER1");
184                 hammer_unlock(&hmp->blkmap_lock);
185         }
186
187         /*
188          * If we are at a big-block boundary and layer1 indicates no
189          * free big-blocks, then we cannot allocate a new big-block in
190          * layer2, skip to the next layer1 entry.
191          */
192         if (offset == 0 && layer1->blocks_free == 0) {
193                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
194                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
195                 if (hammer_check_volume(hmp, &next_offset)) {
196                         result_offset = 0;
197                         goto failed;
198                 }
199                 goto again;
200         }
201         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
202
203         /*
204          * Skip the whole volume if it is pointing to a layer2 big-block
205          * on a volume that we are currently trying to remove from the
206          * file-system. This is used by the volume-del code together with
207          * the reblocker to free up a volume.
208          */
209         if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
210                 hammer_skip_volume(&next_offset);
211                 goto again;
212         }
213
214         /*
215          * Dive layer 2, each entry represents a big-block.
216          */
217         layer2_offset = layer1->phys_offset +
218                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
219         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
220         if (*errorp) {
221                 result_offset = 0;
222                 goto failed;
223         }
224
225         /*
226          * Check CRC.  This can race another thread holding the lock
227          * and in the middle of modifying layer2.
228          */
229         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
230                 hammer_lock_ex(&hmp->blkmap_lock);
231                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
232                         hpanic("CRC FAILED: LAYER2");
233                 hammer_unlock(&hmp->blkmap_lock);
234         }
235
236         /*
237          * Skip the layer if the zone is owned by someone other then us.
238          */
239         if (layer2->zone && layer2->zone != zone) {
240                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
241                 goto again;
242         }
243         if (offset < layer2->append_off) {
244                 next_offset += layer2->append_off - offset;
245                 goto again;
246         }
247
248 #if 0
249         /*
250          * If operating in the current non-hint blockmap block, do not
251          * allow it to get over-full.  Also drop any active hinting so
252          * blockmap->next_offset is updated at the end.
253          *
254          * We do this for B-Tree and meta-data allocations to provide
255          * localization for updates.
256          */
257         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
258              zone == HAMMER_ZONE_META_INDEX) &&
259             offset >= HAMMER_BIGBLOCK_OVERFILL &&
260             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
261                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
262                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
263                         use_hint = 0;
264                         goto again;
265                 }
266         }
267 #endif
268
269         /*
270          * We need the lock from this point on.  We have to re-check zone
271          * ownership after acquiring the lock and also check for reservations.
272          */
273         hammer_lock_ex(&hmp->blkmap_lock);
274
275         if (layer2->zone && layer2->zone != zone) {
276                 hammer_unlock(&hmp->blkmap_lock);
277                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
278                 goto again;
279         }
280         if (offset < layer2->append_off) {
281                 hammer_unlock(&hmp->blkmap_lock);
282                 next_offset += layer2->append_off - offset;
283                 goto again;
284         }
285
286         /*
287          * The big-block might be reserved by another zone.  If it is reserved
288          * by our zone we may have to move next_offset past the append_off.
289          */
290         base_off = hammer_xlate_to_zone2(next_offset &
291                                         ~HAMMER_BIGBLOCK_MASK64);
292         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293         if (resv) {
294                 if (resv->zone != zone) {
295                         hammer_unlock(&hmp->blkmap_lock);
296                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
297                                       ~HAMMER_BIGBLOCK_MASK64;
298                         goto again;
299                 }
300                 if (offset < resv->append_off) {
301                         hammer_unlock(&hmp->blkmap_lock);
302                         next_offset += resv->append_off - offset;
303                         goto again;
304                 }
305                 ++resv->refs;
306         }
307
308         /*
309          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
310          * of the layer for real.  At this point we've validated any
311          * reservation that might exist and can just ignore resv.
312          */
313         if (layer2->zone == 0) {
314                 /*
315                  * Assign the big-block to our zone
316                  */
317                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
318                 --layer1->blocks_free;
319                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
320                 hammer_modify_buffer_done(buffer1);
321                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
322                 layer2->zone = zone;
323                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
324                 KKASSERT(layer2->append_off == 0);
325                 hammer_modify_volume_field(trans, trans->rootvol,
326                                            vol0_stat_freebigblocks);
327                 --root_volume->ondisk->vol0_stat_freebigblocks;
328                 hmp->copy_stat_freebigblocks =
329                         root_volume->ondisk->vol0_stat_freebigblocks;
330                 hammer_modify_volume_done(trans->rootvol);
331         } else {
332                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
333         }
334         KKASSERT(layer2->zone == zone);
335
336         /*
337          * NOTE: bytes_free can legally go negative due to de-dup.
338          */
339         layer2->bytes_free -= bytes;
340         KKASSERT(layer2->append_off <= offset);
341         layer2->append_off = offset + bytes;
342         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
343         hammer_modify_buffer_done(buffer2);
344
345         /*
346          * We hold the blockmap lock and should be the only ones
347          * capable of modifying resv->append_off.  Track the allocation
348          * as appropriate.
349          */
350         KKASSERT(bytes != 0);
351         if (resv) {
352                 KKASSERT(resv->append_off <= offset);
353                 resv->append_off = offset + bytes;
354                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
355                 hammer_blockmap_reserve_complete(hmp, resv);
356         }
357
358         /*
359          * If we are allocating from the base of a new buffer we can avoid
360          * a disk read by calling hammer_bnew_ext().
361          */
362         if ((next_offset & HAMMER_BUFMASK) == 0) {
363                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
364                                 errorp, &buffer3);
365                 if (*errorp) {
366                         result_offset = 0;
367                         goto failed;
368                 }
369         }
370         result_offset = next_offset;
371
372         /*
373          * If we weren't supplied with a hint or could not use the hint
374          * then we wound up using blockmap->next_offset as the hint and
375          * need to save it.
376          */
377         if (use_hint == 0) {
378                 hammer_modify_volume_noundo(NULL, root_volume);
379                 blockmap->next_offset = next_offset + bytes;
380                 hammer_modify_volume_done(root_volume);
381         }
382         hammer_unlock(&hmp->blkmap_lock);
383 failed:
384
385         /*
386          * Cleanup
387          */
388         if (buffer1)
389                 hammer_rel_buffer(buffer1, 0);
390         if (buffer2)
391                 hammer_rel_buffer(buffer2, 0);
392         if (buffer3)
393                 hammer_rel_buffer(buffer3, 0);
394
395         return(result_offset);
396 }
397
398 /*
399  * Frontend function - Reserve bytes in a zone.
400  *
401  * This code reserves bytes out of a blockmap without committing to any
402  * meta-data modifications, allowing the front-end to directly issue disk
403  * write I/O for big-blocks of data
404  *
405  * The backend later finalizes the reservation with hammer_blockmap_finalize()
406  * upon committing the related record.
407  */
408 hammer_reserve_t
409 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
410                         hammer_off_t *zone_offp, int *errorp)
411 {
412         hammer_volume_t root_volume;
413         hammer_blockmap_t blockmap;
414         hammer_blockmap_t freemap;
415         struct hammer_blockmap_layer1 *layer1;
416         struct hammer_blockmap_layer2 *layer2;
417         hammer_buffer_t buffer1 = NULL;
418         hammer_buffer_t buffer2 = NULL;
419         hammer_buffer_t buffer3 = NULL;
420         hammer_off_t tmp_offset;
421         hammer_off_t next_offset;
422         hammer_off_t layer1_offset;
423         hammer_off_t layer2_offset;
424         hammer_off_t base_off;
425         hammer_reserve_t resv;
426         hammer_reserve_t resx;
427         int loops = 0;
428         int offset;
429
430         /*
431          * Setup
432          */
433         KKASSERT(hammer_is_zone2_mapped_index(zone));
434         root_volume = hammer_get_root_volume(hmp, errorp);
435         if (*errorp)
436                 return(NULL);
437         blockmap = &hmp->blockmap[zone];
438         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
439         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
440
441         /*
442          * Deal with alignment and buffer-boundary issues.
443          *
444          * Be careful, certain primary alignments are used below to allocate
445          * new blockmap blocks.
446          */
447         bytes = (bytes + 15) & ~15;
448         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
449
450         next_offset = blockmap->next_offset;
451 again:
452         resv = NULL;
453         /*
454          * Check for wrap
455          */
456         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
457                 if (++loops == 2) {
458                         *errorp = ENOSPC;
459                         goto failed;
460                 }
461                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
462         }
463
464         /*
465          * The allocation request may not cross a buffer boundary.  Special
466          * large allocations must not cross a big-block boundary.
467          */
468         tmp_offset = next_offset + bytes - 1;
469         if (bytes <= HAMMER_BUFSIZE) {
470                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
471                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
472                         goto again;
473                 }
474         } else {
475                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
476                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
477                         goto again;
478                 }
479         }
480         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
481
482         /*
483          * Dive layer 1.
484          */
485         layer1_offset = freemap->phys_offset +
486                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
487         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
488         if (*errorp)
489                 goto failed;
490
491         /*
492          * Check CRC.
493          */
494         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
495                 hammer_lock_ex(&hmp->blkmap_lock);
496                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
497                         hpanic("CRC FAILED: LAYER1");
498                 hammer_unlock(&hmp->blkmap_lock);
499         }
500
501         /*
502          * If we are at a big-block boundary and layer1 indicates no
503          * free big-blocks, then we cannot allocate a new big-block in
504          * layer2, skip to the next layer1 entry.
505          */
506         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
507             layer1->blocks_free == 0) {
508                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
509                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
510                 if (hammer_check_volume(hmp, &next_offset))
511                         goto failed;
512                 goto again;
513         }
514         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
515
516         /*
517          * Dive layer 2, each entry represents a big-block.
518          */
519         layer2_offset = layer1->phys_offset +
520                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
521         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
522         if (*errorp)
523                 goto failed;
524
525         /*
526          * Check CRC if not allocating into uninitialized space (which we
527          * aren't when reserving space).
528          */
529         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
530                 hammer_lock_ex(&hmp->blkmap_lock);
531                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
532                         hpanic("CRC FAILED: LAYER2");
533                 hammer_unlock(&hmp->blkmap_lock);
534         }
535
536         /*
537          * Skip the layer if the zone is owned by someone other then us.
538          */
539         if (layer2->zone && layer2->zone != zone) {
540                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
541                 goto again;
542         }
543         if (offset < layer2->append_off) {
544                 next_offset += layer2->append_off - offset;
545                 goto again;
546         }
547
548         /*
549          * We need the lock from this point on.  We have to re-check zone
550          * ownership after acquiring the lock and also check for reservations.
551          */
552         hammer_lock_ex(&hmp->blkmap_lock);
553
554         if (layer2->zone && layer2->zone != zone) {
555                 hammer_unlock(&hmp->blkmap_lock);
556                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
557                 goto again;
558         }
559         if (offset < layer2->append_off) {
560                 hammer_unlock(&hmp->blkmap_lock);
561                 next_offset += layer2->append_off - offset;
562                 goto again;
563         }
564
565         /*
566          * The big-block might be reserved by another zone.  If it is reserved
567          * by our zone we may have to move next_offset past the append_off.
568          */
569         base_off = hammer_xlate_to_zone2(next_offset &
570                                         ~HAMMER_BIGBLOCK_MASK64);
571         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
572         if (resv) {
573                 if (resv->zone != zone) {
574                         hammer_unlock(&hmp->blkmap_lock);
575                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
576                                       ~HAMMER_BIGBLOCK_MASK64;
577                         goto again;
578                 }
579                 if (offset < resv->append_off) {
580                         hammer_unlock(&hmp->blkmap_lock);
581                         next_offset += resv->append_off - offset;
582                         goto again;
583                 }
584                 ++resv->refs;
585                 resx = NULL;
586         } else {
587                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
588                                M_WAITOK | M_ZERO | M_USE_RESERVE);
589                 resx->refs = 1;
590                 resx->zone = zone;
591                 resx->zone_offset = base_off;
592                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
593                         resx->flags |= HAMMER_RESF_LAYER2FREE;
594                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
595                 KKASSERT(resv == NULL);
596                 resv = resx;
597                 ++hammer_count_reservations;
598         }
599         resv->append_off = offset + bytes;
600
601         /*
602          * If we are not reserving a whole buffer but are at the start of
603          * a new block, call hammer_bnew() to avoid a disk read.
604          *
605          * If we are reserving a whole buffer (or more), the caller will
606          * probably use a direct read, so do nothing.
607          *
608          * If we do not have a whole lot of system memory we really can't
609          * afford to block while holding the blkmap_lock!
610          */
611         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
612                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
613                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
614                         if (*errorp)
615                                 goto failed;
616                 }
617         }
618
619         /*
620          * Adjust our iterator and alloc_offset.  The layer1 and layer2
621          * space beyond alloc_offset is uninitialized.  alloc_offset must
622          * be big-block aligned.
623          */
624         blockmap->next_offset = next_offset + bytes;
625         hammer_unlock(&hmp->blkmap_lock);
626
627 failed:
628         if (buffer1)
629                 hammer_rel_buffer(buffer1, 0);
630         if (buffer2)
631                 hammer_rel_buffer(buffer2, 0);
632         if (buffer3)
633                 hammer_rel_buffer(buffer3, 0);
634         hammer_rel_volume(root_volume, 0);
635         *zone_offp = next_offset;
636
637         return(resv);
638 }
639
640 /*
641  * Frontend function - Dedup bytes in a zone.
642  *
643  * Dedup reservations work exactly the same as normal write reservations
644  * except we only adjust bytes_free field and don't touch append offset.
645  * Finalization mechanic for dedup reservations is also the same as for
646  * normal write ones - the backend finalizes the reservation with
647  * hammer_blockmap_finalize().
648  */
649 hammer_reserve_t
650 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
651                               hammer_off_t zone_offset, int *errorp)
652 {
653         hammer_volume_t root_volume;
654         hammer_blockmap_t freemap;
655         struct hammer_blockmap_layer1 *layer1;
656         struct hammer_blockmap_layer2 *layer2;
657         hammer_buffer_t buffer1 = NULL;
658         hammer_buffer_t buffer2 = NULL;
659         hammer_off_t layer1_offset;
660         hammer_off_t layer2_offset;
661         hammer_off_t base_off;
662         hammer_reserve_t resv = NULL;
663         hammer_reserve_t resx = NULL;
664
665         /*
666          * Setup
667          */
668         KKASSERT(hammer_is_zone2_mapped_index(zone));
669         root_volume = hammer_get_root_volume(hmp, errorp);
670         if (*errorp)
671                 return (NULL);
672         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
673         KKASSERT(freemap->phys_offset != 0);
674
675         bytes = (bytes + 15) & ~15;
676         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
677
678         /*
679          * Dive layer 1.
680          */
681         layer1_offset = freemap->phys_offset +
682                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
683         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
684         if (*errorp)
685                 goto failed;
686
687         /*
688          * Check CRC.
689          */
690         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
691                 hammer_lock_ex(&hmp->blkmap_lock);
692                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
693                         hpanic("CRC FAILED: LAYER1");
694                 hammer_unlock(&hmp->blkmap_lock);
695         }
696         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
697
698         /*
699          * Dive layer 2, each entry represents a big-block.
700          */
701         layer2_offset = layer1->phys_offset +
702                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
703         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
704         if (*errorp)
705                 goto failed;
706
707         /*
708          * Check CRC.
709          */
710         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
711                 hammer_lock_ex(&hmp->blkmap_lock);
712                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
713                         hpanic("CRC FAILED: LAYER2");
714                 hammer_unlock(&hmp->blkmap_lock);
715         }
716
717         /*
718          * Fail if the zone is owned by someone other than us.
719          */
720         if (layer2->zone && layer2->zone != zone)
721                 goto failed;
722
723         /*
724          * We need the lock from this point on.  We have to re-check zone
725          * ownership after acquiring the lock and also check for reservations.
726          */
727         hammer_lock_ex(&hmp->blkmap_lock);
728
729         if (layer2->zone && layer2->zone != zone) {
730                 hammer_unlock(&hmp->blkmap_lock);
731                 goto failed;
732         }
733
734         base_off = hammer_xlate_to_zone2(zone_offset &
735                                         ~HAMMER_BIGBLOCK_MASK64);
736         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
737         if (resv) {
738                 if (resv->zone != zone) {
739                         hammer_unlock(&hmp->blkmap_lock);
740                         resv = NULL;
741                         goto failed;
742                 }
743                 /*
744                  * Due to possible big-block underflow we can't simply
745                  * subtract bytes from bytes_free.
746                  */
747                 if (update_bytes_free(resv, bytes) == 0) {
748                         hammer_unlock(&hmp->blkmap_lock);
749                         resv = NULL;
750                         goto failed;
751                 }
752                 ++resv->refs;
753                 resx = NULL;
754         } else {
755                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
756                                M_WAITOK | M_ZERO | M_USE_RESERVE);
757                 resx->refs = 1;
758                 resx->zone = zone;
759                 resx->bytes_free = layer2->bytes_free;
760                 /*
761                  * Due to possible big-block underflow we can't simply
762                  * subtract bytes from bytes_free.
763                  */
764                 if (update_bytes_free(resx, bytes) == 0) {
765                         hammer_unlock(&hmp->blkmap_lock);
766                         kfree(resx, hmp->m_misc);
767                         goto failed;
768                 }
769                 resx->zone_offset = base_off;
770                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
771                 KKASSERT(resv == NULL);
772                 resv = resx;
773                 ++hammer_count_reservations;
774         }
775
776         hammer_unlock(&hmp->blkmap_lock);
777
778 failed:
779         if (buffer1)
780                 hammer_rel_buffer(buffer1, 0);
781         if (buffer2)
782                 hammer_rel_buffer(buffer2, 0);
783         hammer_rel_volume(root_volume, 0);
784
785         return(resv);
786 }
787
788 static int
789 update_bytes_free(hammer_reserve_t resv, int bytes)
790 {
791         int32_t temp;
792
793         /*
794          * Big-block underflow check
795          */
796         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
797         cpu_ccfence(); /* XXX do we really need it ? */
798         if (temp > resv->bytes_free) {
799                 hdkprintf("BIGBLOCK UNDERFLOW\n");
800                 return (0);
801         }
802
803         resv->bytes_free -= bytes;
804         return (1);
805 }
806
807 /*
808  * Dereference a reservation structure.  Upon the final release the
809  * underlying big-block is checked and if it is entirely free we delete
810  * any related HAMMER buffers to avoid potential conflicts with future
811  * reuse of the big-block.
812  */
813 void
814 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
815 {
816         hammer_off_t base_offset;
817         int error;
818
819         KKASSERT(resv->refs > 0);
820         KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
821
822         /*
823          * Setting append_off to the max prevents any new allocations
824          * from occuring while we are trying to dispose of the reservation,
825          * allowing us to safely delete any related HAMMER buffers.
826          *
827          * If we are unable to clean out all related HAMMER buffers we
828          * requeue the delay.
829          */
830         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
831                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
832                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
833                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
834                         hammer_dedup_cache_inval(hmp, base_offset);
835                 error = hammer_del_buffers(hmp, base_offset,
836                                            resv->zone_offset,
837                                            HAMMER_BIGBLOCK_SIZE,
838                                            1);
839                 if (hammer_debug_general & 0x20000) {
840                         hkprintf("delbgblk %016jx error %d\n",
841                                 (intmax_t)base_offset, error);
842                 }
843                 if (error)
844                         hammer_reserve_setdelay(hmp, resv);
845         }
846         if (--resv->refs == 0) {
847                 if (hammer_debug_general & 0x20000) {
848                         hkprintf("delresvr %016jx zone %02x\n",
849                                 (intmax_t)resv->zone_offset, resv->zone);
850                 }
851                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
852                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
853                 kfree(resv, hmp->m_misc);
854                 --hammer_count_reservations;
855         }
856 }
857
858 /*
859  * Prevent a potentially free big-block from being reused until after
860  * the related flushes have completely cycled, otherwise crash recovery
861  * could resurrect a data block that was already reused and overwritten.
862  *
863  * The caller might reset the underlying layer2 entry's append_off to 0, so
864  * our covering append_off must be set to max to prevent any reallocation
865  * until after the flush delays complete, not to mention proper invalidation
866  * of any underlying cached blocks.
867  */
868 static void
869 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
870                         int zone, struct hammer_blockmap_layer2 *layer2)
871 {
872         hammer_reserve_t resv;
873
874         /*
875          * Allocate the reservation if necessary.
876          *
877          * NOTE: need lock in future around resv lookup/allocation and
878          * the setdelay call, currently refs is not bumped until the call.
879          */
880 again:
881         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
882         if (resv == NULL) {
883                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
884                                M_WAITOK | M_ZERO | M_USE_RESERVE);
885                 resv->zone = zone;
886                 resv->zone_offset = base_offset;
887                 resv->refs = 0;
888                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
889
890                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
891                         resv->flags |= HAMMER_RESF_LAYER2FREE;
892                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
893                         kfree(resv, hmp->m_misc);
894                         goto again;
895                 }
896                 ++hammer_count_reservations;
897         } else {
898                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
899                         resv->flags |= HAMMER_RESF_LAYER2FREE;
900         }
901         hammer_reserve_setdelay(hmp, resv);
902 }
903
904 /*
905  * Enter the reservation on the on-delay list, or move it if it
906  * is already on the list.
907  */
908 static void
909 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
910 {
911         if (resv->flags & HAMMER_RESF_ONDELAY) {
912                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
913                 resv->flg_no = hmp->flusher.next + 1;
914                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
915         } else {
916                 ++resv->refs;
917                 ++hmp->rsv_fromdelay;
918                 resv->flags |= HAMMER_RESF_ONDELAY;
919                 resv->flg_no = hmp->flusher.next + 1;
920                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
921         }
922 }
923
924 /*
925  * Reserve has reached its flush point, remove it from the delay list
926  * and finish it off.  hammer_blockmap_reserve_complete() inherits
927  * the ondelay reference.
928  */
929 void
930 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
931 {
932         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
933         resv->flags &= ~HAMMER_RESF_ONDELAY;
934         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
935         --hmp->rsv_fromdelay;
936         hammer_blockmap_reserve_complete(hmp, resv);
937 }
938
939 /*
940  * Backend function - free (offset, bytes) in a zone.
941  *
942  * XXX error return
943  */
944 void
945 hammer_blockmap_free(hammer_transaction_t trans,
946                      hammer_off_t zone_offset, int bytes)
947 {
948         hammer_mount_t hmp;
949         hammer_volume_t root_volume;
950         hammer_blockmap_t freemap;
951         struct hammer_blockmap_layer1 *layer1;
952         struct hammer_blockmap_layer2 *layer2;
953         hammer_buffer_t buffer1 = NULL;
954         hammer_buffer_t buffer2 = NULL;
955         hammer_off_t layer1_offset;
956         hammer_off_t layer2_offset;
957         hammer_off_t base_off;
958         int error;
959         int zone;
960
961         if (bytes == 0)
962                 return;
963         hmp = trans->hmp;
964
965         /*
966          * Alignment
967          */
968         bytes = (bytes + 15) & ~15;
969         KKASSERT(bytes <= HAMMER_XBUFSIZE);
970         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
971                   ~HAMMER_BIGBLOCK_MASK64) == 0);
972
973         /*
974          * Basic zone validation & locking
975          */
976         zone = HAMMER_ZONE_DECODE(zone_offset);
977         KKASSERT(hammer_is_zone2_mapped_index(zone));
978         root_volume = trans->rootvol;
979         error = 0;
980
981         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
982
983         /*
984          * Dive layer 1.
985          */
986         layer1_offset = freemap->phys_offset +
987                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
988         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
989         if (error)
990                 goto failed;
991         KKASSERT(layer1->phys_offset &&
992                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
993         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
994                 hammer_lock_ex(&hmp->blkmap_lock);
995                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
996                         hpanic("CRC FAILED: LAYER1");
997                 hammer_unlock(&hmp->blkmap_lock);
998         }
999
1000         /*
1001          * Dive layer 2, each entry represents a big-block.
1002          */
1003         layer2_offset = layer1->phys_offset +
1004                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1005         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1006         if (error)
1007                 goto failed;
1008         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1009                 hammer_lock_ex(&hmp->blkmap_lock);
1010                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1011                         hpanic("CRC FAILED: LAYER2");
1012                 hammer_unlock(&hmp->blkmap_lock);
1013         }
1014
1015         hammer_lock_ex(&hmp->blkmap_lock);
1016
1017         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1018
1019         /*
1020          * Free space previously allocated via blockmap_alloc().
1021          *
1022          * NOTE: bytes_free can be and remain negative due to de-dup ops
1023          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1024          */
1025         KKASSERT(layer2->zone == zone);
1026         layer2->bytes_free += bytes;
1027         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1028
1029         /*
1030          * If a big-block becomes entirely free we must create a covering
1031          * reservation to prevent premature reuse.  Note, however, that
1032          * the big-block and/or reservation may still have an append_off
1033          * that allows further (non-reused) allocations.
1034          *
1035          * Once the reservation has been made we re-check layer2 and if
1036          * the big-block is still entirely free we reset the layer2 entry.
1037          * The reservation will prevent premature reuse.
1038          *
1039          * NOTE: hammer_buffer's are only invalidated when the reservation
1040          * is completed, if the layer2 entry is still completely free at
1041          * that time.  Any allocations from the reservation that may have
1042          * occured in the mean time, or active references on the reservation
1043          * from new pending allocations, will prevent the invalidation from
1044          * occuring.
1045          */
1046         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1047                 base_off = hammer_xlate_to_zone2(zone_offset &
1048                                                 ~HAMMER_BIGBLOCK_MASK64);
1049
1050                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1051                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1052                         layer2->zone = 0;
1053                         layer2->append_off = 0;
1054                         hammer_modify_buffer(trans, buffer1,
1055                                              layer1, sizeof(*layer1));
1056                         ++layer1->blocks_free;
1057                         layer1->layer1_crc = crc32(layer1,
1058                                                    HAMMER_LAYER1_CRCSIZE);
1059                         hammer_modify_buffer_done(buffer1);
1060                         hammer_modify_volume_field(trans,
1061                                         trans->rootvol,
1062                                         vol0_stat_freebigblocks);
1063                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1064                         hmp->copy_stat_freebigblocks =
1065                            root_volume->ondisk->vol0_stat_freebigblocks;
1066                         hammer_modify_volume_done(trans->rootvol);
1067                 }
1068         }
1069         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1070         hammer_modify_buffer_done(buffer2);
1071         hammer_unlock(&hmp->blkmap_lock);
1072
1073 failed:
1074         if (buffer1)
1075                 hammer_rel_buffer(buffer1, 0);
1076         if (buffer2)
1077                 hammer_rel_buffer(buffer2, 0);
1078 }
1079
1080 int
1081 hammer_blockmap_dedup(hammer_transaction_t trans,
1082                      hammer_off_t zone_offset, int bytes)
1083 {
1084         hammer_mount_t hmp;
1085         hammer_blockmap_t freemap;
1086         struct hammer_blockmap_layer1 *layer1;
1087         struct hammer_blockmap_layer2 *layer2;
1088         hammer_buffer_t buffer1 = NULL;
1089         hammer_buffer_t buffer2 = NULL;
1090         hammer_off_t layer1_offset;
1091         hammer_off_t layer2_offset;
1092         int32_t temp;
1093         int error;
1094         int zone __debugvar;
1095
1096         if (bytes == 0)
1097                 return (0);
1098         hmp = trans->hmp;
1099
1100         /*
1101          * Alignment
1102          */
1103         bytes = (bytes + 15) & ~15;
1104         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1105         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1106                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1107
1108         /*
1109          * Basic zone validation & locking
1110          */
1111         zone = HAMMER_ZONE_DECODE(zone_offset);
1112         KKASSERT(hammer_is_zone2_mapped_index(zone));
1113         error = 0;
1114
1115         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1116
1117         /*
1118          * Dive layer 1.
1119          */
1120         layer1_offset = freemap->phys_offset +
1121                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1122         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1123         if (error)
1124                 goto failed;
1125         KKASSERT(layer1->phys_offset &&
1126                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1127         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1128                 hammer_lock_ex(&hmp->blkmap_lock);
1129                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1130                         hpanic("CRC FAILED: LAYER1");
1131                 hammer_unlock(&hmp->blkmap_lock);
1132         }
1133
1134         /*
1135          * Dive layer 2, each entry represents a big-block.
1136          */
1137         layer2_offset = layer1->phys_offset +
1138                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1139         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1140         if (error)
1141                 goto failed;
1142         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1143                 hammer_lock_ex(&hmp->blkmap_lock);
1144                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1145                         hpanic("CRC FAILED: LAYER2");
1146                 hammer_unlock(&hmp->blkmap_lock);
1147         }
1148
1149         hammer_lock_ex(&hmp->blkmap_lock);
1150
1151         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1152
1153         /*
1154          * Free space previously allocated via blockmap_alloc().
1155          *
1156          * NOTE: bytes_free can be and remain negative due to de-dup ops
1157          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1158          */
1159         KKASSERT(layer2->zone == zone);
1160         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1161         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1162         if (temp > layer2->bytes_free) {
1163                 error = ERANGE;
1164                 goto underflow;
1165         }
1166         layer2->bytes_free -= bytes;
1167
1168         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1169
1170         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1171 underflow:
1172         hammer_modify_buffer_done(buffer2);
1173         hammer_unlock(&hmp->blkmap_lock);
1174
1175 failed:
1176         if (buffer1)
1177                 hammer_rel_buffer(buffer1, 0);
1178         if (buffer2)
1179                 hammer_rel_buffer(buffer2, 0);
1180         return (error);
1181 }
1182
1183 /*
1184  * Backend function - finalize (offset, bytes) in a zone.
1185  *
1186  * Allocate space that was previously reserved by the frontend.
1187  */
1188 int
1189 hammer_blockmap_finalize(hammer_transaction_t trans,
1190                          hammer_reserve_t resv,
1191                          hammer_off_t zone_offset, int bytes)
1192 {
1193         hammer_mount_t hmp;
1194         hammer_volume_t root_volume;
1195         hammer_blockmap_t freemap;
1196         struct hammer_blockmap_layer1 *layer1;
1197         struct hammer_blockmap_layer2 *layer2;
1198         hammer_buffer_t buffer1 = NULL;
1199         hammer_buffer_t buffer2 = NULL;
1200         hammer_off_t layer1_offset;
1201         hammer_off_t layer2_offset;
1202         int error;
1203         int zone;
1204         int offset;
1205
1206         if (bytes == 0)
1207                 return(0);
1208         hmp = trans->hmp;
1209
1210         /*
1211          * Alignment
1212          */
1213         bytes = (bytes + 15) & ~15;
1214         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1215
1216         /*
1217          * Basic zone validation & locking
1218          */
1219         zone = HAMMER_ZONE_DECODE(zone_offset);
1220         KKASSERT(hammer_is_zone2_mapped_index(zone));
1221         root_volume = trans->rootvol;
1222         error = 0;
1223
1224         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1225
1226         /*
1227          * Dive layer 1.
1228          */
1229         layer1_offset = freemap->phys_offset +
1230                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1231         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1232         if (error)
1233                 goto failed;
1234         KKASSERT(layer1->phys_offset &&
1235                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1236         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1237                 hammer_lock_ex(&hmp->blkmap_lock);
1238                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1239                         hpanic("CRC FAILED: LAYER1");
1240                 hammer_unlock(&hmp->blkmap_lock);
1241         }
1242
1243         /*
1244          * Dive layer 2, each entry represents a big-block.
1245          */
1246         layer2_offset = layer1->phys_offset +
1247                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1248         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1249         if (error)
1250                 goto failed;
1251         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1252                 hammer_lock_ex(&hmp->blkmap_lock);
1253                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1254                         hpanic("CRC FAILED: LAYER2");
1255                 hammer_unlock(&hmp->blkmap_lock);
1256         }
1257
1258         hammer_lock_ex(&hmp->blkmap_lock);
1259
1260         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1261
1262         /*
1263          * Finalize some or all of the space covered by a current
1264          * reservation.  An allocation in the same layer may have
1265          * already assigned ownership.
1266          */
1267         if (layer2->zone == 0) {
1268                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1269                 --layer1->blocks_free;
1270                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
1271                 hammer_modify_buffer_done(buffer1);
1272                 layer2->zone = zone;
1273                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1274                 KKASSERT(layer2->append_off == 0);
1275                 hammer_modify_volume_field(trans,
1276                                 trans->rootvol,
1277                                 vol0_stat_freebigblocks);
1278                 --root_volume->ondisk->vol0_stat_freebigblocks;
1279                 hmp->copy_stat_freebigblocks =
1280                    root_volume->ondisk->vol0_stat_freebigblocks;
1281                 hammer_modify_volume_done(trans->rootvol);
1282         }
1283         if (layer2->zone != zone)
1284                 hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1285         KKASSERT(layer2->zone == zone);
1286         KKASSERT(bytes != 0);
1287         layer2->bytes_free -= bytes;
1288
1289         if (resv) {
1290                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1291         }
1292
1293         /*
1294          * Finalizations can occur out of order, or combined with allocations.
1295          * append_off must be set to the highest allocated offset.
1296          */
1297         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1298         if (layer2->append_off < offset)
1299                 layer2->append_off = offset;
1300
1301         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1302         hammer_modify_buffer_done(buffer2);
1303         hammer_unlock(&hmp->blkmap_lock);
1304
1305 failed:
1306         if (buffer1)
1307                 hammer_rel_buffer(buffer1, 0);
1308         if (buffer2)
1309                 hammer_rel_buffer(buffer2, 0);
1310         return(error);
1311 }
1312
1313 /*
1314  * Return the approximate number of free bytes in the big-block
1315  * containing the specified blockmap offset.
1316  *
1317  * WARNING: A negative number can be returned if data de-dup exists,
1318  *          and the result will also not represent he actual number
1319  *          of free bytes in this case.
1320  *
1321  *          This code is used only by the reblocker.
1322  */
1323 int
1324 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1325                         int *curp, int *errorp)
1326 {
1327         hammer_volume_t root_volume;
1328         hammer_blockmap_t blockmap;
1329         hammer_blockmap_t freemap;
1330         struct hammer_blockmap_layer1 *layer1;
1331         struct hammer_blockmap_layer2 *layer2;
1332         hammer_buffer_t buffer = NULL;
1333         hammer_off_t layer1_offset;
1334         hammer_off_t layer2_offset;
1335         int32_t bytes;
1336         int zone;
1337
1338         zone = HAMMER_ZONE_DECODE(zone_offset);
1339         KKASSERT(hammer_is_zone2_mapped_index(zone));
1340         root_volume = hammer_get_root_volume(hmp, errorp);
1341         if (*errorp) {
1342                 *curp = 0;
1343                 return(0);
1344         }
1345         blockmap = &hmp->blockmap[zone];
1346         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1347
1348         /*
1349          * Dive layer 1.
1350          */
1351         layer1_offset = freemap->phys_offset +
1352                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1353         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1354         if (*errorp) {
1355                 *curp = 0;
1356                 bytes = 0;
1357                 goto failed;
1358         }
1359         KKASSERT(layer1->phys_offset);
1360         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1361                 hammer_lock_ex(&hmp->blkmap_lock);
1362                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1363                         hpanic("CRC FAILED: LAYER1");
1364                 hammer_unlock(&hmp->blkmap_lock);
1365         }
1366
1367         /*
1368          * Dive layer 2, each entry represents a big-block.
1369          *
1370          * (reuse buffer, layer1 pointer becomes invalid)
1371          */
1372         layer2_offset = layer1->phys_offset +
1373                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1374         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1375         if (*errorp) {
1376                 *curp = 0;
1377                 bytes = 0;
1378                 goto failed;
1379         }
1380         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1381                 hammer_lock_ex(&hmp->blkmap_lock);
1382                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1383                         hpanic("CRC FAILED: LAYER2");
1384                 hammer_unlock(&hmp->blkmap_lock);
1385         }
1386         KKASSERT(layer2->zone == zone);
1387
1388         bytes = layer2->bytes_free;
1389
1390         /*
1391          * *curp becomes 1 only when no error and,
1392          * next_offset and zone_offset are in the same big-block.
1393          */
1394         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1395                 *curp = 0;  /* not same */
1396         else
1397                 *curp = 1;
1398 failed:
1399         if (buffer)
1400                 hammer_rel_buffer(buffer, 0);
1401         hammer_rel_volume(root_volume, 0);
1402         if (hammer_debug_general & 0x4000) {
1403                 hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1404         }
1405         return(bytes);
1406 }
1407
1408
1409 /*
1410  * Lookup a blockmap offset and verify blockmap layers.
1411  */
1412 hammer_off_t
1413 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1414                         int *errorp)
1415 {
1416         hammer_volume_t root_volume;
1417         hammer_blockmap_t freemap;
1418         struct hammer_blockmap_layer1 *layer1;
1419         struct hammer_blockmap_layer2 *layer2;
1420         hammer_buffer_t buffer = NULL;
1421         hammer_off_t layer1_offset;
1422         hammer_off_t layer2_offset;
1423         hammer_off_t result_offset;
1424         hammer_off_t base_off;
1425         hammer_reserve_t resv __debugvar;
1426         int zone;
1427
1428         /*
1429          * Calculate the zone-2 offset.
1430          */
1431         zone = HAMMER_ZONE_DECODE(zone_offset);
1432         result_offset = hammer_xlate_to_zone2(zone_offset);
1433
1434         /*
1435          * Validate the allocation zone
1436          */
1437         root_volume = hammer_get_root_volume(hmp, errorp);
1438         if (*errorp)
1439                 return(0);
1440         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1441         KKASSERT(freemap->phys_offset != 0);
1442
1443         /*
1444          * Dive layer 1.
1445          */
1446         layer1_offset = freemap->phys_offset +
1447                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1448         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1449         if (*errorp)
1450                 goto failed;
1451         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1452         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1453                 hammer_lock_ex(&hmp->blkmap_lock);
1454                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1455                         hpanic("CRC FAILED: LAYER1");
1456                 hammer_unlock(&hmp->blkmap_lock);
1457         }
1458
1459         /*
1460          * Dive layer 2, each entry represents a big-block.
1461          */
1462         layer2_offset = layer1->phys_offset +
1463                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1464         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1465
1466         if (*errorp)
1467                 goto failed;
1468         if (layer2->zone == 0) {
1469                 base_off = hammer_xlate_to_zone2(zone_offset &
1470                                                 ~HAMMER_BIGBLOCK_MASK64);
1471                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1472                                  base_off);
1473                 KKASSERT(resv && resv->zone == zone);
1474
1475         } else if (layer2->zone != zone) {
1476                 hpanic("bad zone %d/%d", layer2->zone, zone);
1477         }
1478         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1479                 hammer_lock_ex(&hmp->blkmap_lock);
1480                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1481                         hpanic("CRC FAILED: LAYER2");
1482                 hammer_unlock(&hmp->blkmap_lock);
1483         }
1484
1485 failed:
1486         if (buffer)
1487                 hammer_rel_buffer(buffer, 0);
1488         hammer_rel_volume(root_volume, 0);
1489         if (hammer_debug_general & 0x0800) {
1490                 hdkprintf("%016jx -> %016jx\n",
1491                         (intmax_t)zone_offset, (intmax_t)result_offset);
1492         }
1493         return(result_offset);
1494 }
1495
1496
1497 /*
1498  * Check space availability
1499  *
1500  * MPSAFE - does not require fs_token
1501  */
1502 int
1503 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1504 {
1505         const int in_size = sizeof(struct hammer_inode_data) +
1506                             sizeof(union hammer_btree_elm);
1507         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1508         int64_t usedbytes;
1509
1510         usedbytes = hmp->rsv_inodes * in_size +
1511                     hmp->rsv_recs * rec_size +
1512                     hmp->rsv_databytes +
1513                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1514                     ((int64_t)hammer_limit_dirtybufspace) +
1515                     (slop << HAMMER_BIGBLOCK_BITS);
1516
1517         hammer_count_extra_space_used = usedbytes;      /* debugging */
1518         if (resp)
1519                 *resp = usedbytes;
1520
1521         if (hmp->copy_stat_freebigblocks >=
1522             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1523                 return(0);
1524         }
1525         return (ENOSPC);
1526 }
1527
1528 static int
1529 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1530 {
1531         hammer_blockmap_t freemap;
1532         struct hammer_blockmap_layer1 *layer1;
1533         hammer_buffer_t buffer1 = NULL;
1534         hammer_off_t layer1_offset;
1535         int error = 0;
1536
1537         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1538
1539         layer1_offset = freemap->phys_offset +
1540                         HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1541         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1542         if (error)
1543                 goto end;
1544
1545         /*
1546          * No more physically available space in layer1s
1547          * of the current volume, go to the next volume.
1548          */
1549         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1550                 hammer_skip_volume(offsetp);
1551 end:
1552         if (buffer1)
1553                 hammer_rel_buffer(buffer1, 0);
1554         return(error);
1555 }
1556
1557 static void
1558 hammer_skip_volume(hammer_off_t *offsetp)
1559 {
1560         hammer_off_t offset;
1561         int zone, vol_no;
1562
1563         offset = *offsetp;
1564         zone = HAMMER_ZONE_DECODE(offset);
1565         vol_no = HAMMER_VOL_DECODE(offset) + 1;
1566         KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1567
1568         if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1569                 vol_no = 0;
1570                 ++zone;
1571         }
1572
1573         *offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1574 }