hammer: Fix unusual line break style
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42                                     hammer_off_t base_offset, int zone,
43                                     struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
47
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52              hammer_res_rb_compare, hammer_off_t, zone_offset);
53
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57         if (res1->zone_offset < res2->zone_offset)
58                 return(-1);
59         if (res1->zone_offset > res2->zone_offset)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69                       hammer_off_t hint, int *errorp)
70 {
71         hammer_mount_t hmp;
72         hammer_volume_t root_volume;
73         hammer_blockmap_t blockmap;
74         hammer_blockmap_t freemap;
75         hammer_reserve_t resv;
76         struct hammer_blockmap_layer1 *layer1;
77         struct hammer_blockmap_layer2 *layer2;
78         hammer_buffer_t buffer1 = NULL;
79         hammer_buffer_t buffer2 = NULL;
80         hammer_buffer_t buffer3 = NULL;
81         hammer_off_t tmp_offset;
82         hammer_off_t next_offset;
83         hammer_off_t result_offset;
84         hammer_off_t layer1_offset;
85         hammer_off_t layer2_offset;
86         hammer_off_t base_off;
87         int loops = 0;
88         int offset;             /* offset within big-block */
89         int use_hint;
90
91         hmp = trans->hmp;
92
93         /*
94          * Deal with alignment and buffer-boundary issues.
95          *
96          * Be careful, certain primary alignments are used below to allocate
97          * new blockmap blocks.
98          */
99         bytes = (bytes + 15) & ~15;
100         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
102
103         /*
104          * Setup
105          */
106         root_volume = trans->rootvol;
107         *errorp = 0;
108         blockmap = &hmp->blockmap[zone];
109         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
111
112         /*
113          * Use the hint if we have one.
114          */
115         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116                 next_offset = (hint + 15) & ~(hammer_off_t)15;
117                 use_hint = 1;
118         } else {
119                 next_offset = blockmap->next_offset;
120                 use_hint = 0;
121         }
122 again:
123
124         /*
125          * use_hint is turned off if we leave the hinted big-block.
126          */
127         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128                 next_offset = blockmap->next_offset;
129                 use_hint = 0;
130         }
131
132         /*
133          * Check for wrap
134          */
135         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
136                 if (++loops == 2) {
137                         result_offset = 0;
138                         *errorp = ENOSPC;
139                         goto failed;
140                 }
141                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
142         }
143
144         /*
145          * The allocation request may not cross a buffer boundary.  Special
146          * large allocations must not cross a big-block boundary.
147          */
148         tmp_offset = next_offset + bytes - 1;
149         if (bytes <= HAMMER_BUFSIZE) {
150                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
152                         goto again;
153                 }
154         } else {
155                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
156                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
157                         goto again;
158                 }
159         }
160         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
161
162         /*
163          * Dive layer 1.
164          */
165         layer1_offset = freemap->phys_offset +
166                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
167
168         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
169         if (*errorp) {
170                 result_offset = 0;
171                 goto failed;
172         }
173
174         /*
175          * Check CRC.
176          */
177         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178                 hammer_lock_ex(&hmp->blkmap_lock);
179                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180                         panic("CRC FAILED: LAYER1");
181                 hammer_unlock(&hmp->blkmap_lock);
182         }
183
184         /*
185          * If we are at a big-block boundary and layer1 indicates no
186          * free big-blocks, then we cannot allocate a new big-block in
187          * layer2, skip to the next layer1 entry.
188          */
189         if (offset == 0 && layer1->blocks_free == 0) {
190                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
192                 if (hammer_check_volume(hmp, &next_offset)) {
193                         result_offset = 0;
194                         goto failed;
195                 }
196                 goto again;
197         }
198         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
199
200         /*
201          * Skip this layer1 entry if it is pointing to a layer2 big-block
202          * on a volume that we are currently trying to remove from the
203          * file-system. This is used by the volume-del code together with
204          * the reblocker to free up a volume.
205          */
206         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
207             hmp->volume_to_remove) {
208                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
209                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
210                 goto again;
211         }
212
213         /*
214          * Dive layer 2, each entry represents a big-block.
215          */
216         layer2_offset = layer1->phys_offset +
217                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
218         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
219         if (*errorp) {
220                 result_offset = 0;
221                 goto failed;
222         }
223
224         /*
225          * Check CRC.  This can race another thread holding the lock
226          * and in the middle of modifying layer2.
227          */
228         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
229                 hammer_lock_ex(&hmp->blkmap_lock);
230                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
231                         panic("CRC FAILED: LAYER2");
232                 hammer_unlock(&hmp->blkmap_lock);
233         }
234
235         /*
236          * Skip the layer if the zone is owned by someone other then us.
237          */
238         if (layer2->zone && layer2->zone != zone) {
239                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
240                 goto again;
241         }
242         if (offset < layer2->append_off) {
243                 next_offset += layer2->append_off - offset;
244                 goto again;
245         }
246
247 #if 0
248         /*
249          * If operating in the current non-hint blockmap block, do not
250          * allow it to get over-full.  Also drop any active hinting so
251          * blockmap->next_offset is updated at the end.
252          *
253          * We do this for B-Tree and meta-data allocations to provide
254          * localization for updates.
255          */
256         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
257              zone == HAMMER_ZONE_META_INDEX) &&
258             offset >= HAMMER_BIGBLOCK_OVERFILL &&
259             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
260                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
261                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
262                         use_hint = 0;
263                         goto again;
264                 }
265         }
266 #endif
267
268         /*
269          * We need the lock from this point on.  We have to re-check zone
270          * ownership after acquiring the lock and also check for reservations.
271          */
272         hammer_lock_ex(&hmp->blkmap_lock);
273
274         if (layer2->zone && layer2->zone != zone) {
275                 hammer_unlock(&hmp->blkmap_lock);
276                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
277                 goto again;
278         }
279         if (offset < layer2->append_off) {
280                 hammer_unlock(&hmp->blkmap_lock);
281                 next_offset += layer2->append_off - offset;
282                 goto again;
283         }
284
285         /*
286          * The big-block might be reserved by another zone.  If it is reserved
287          * by our zone we may have to move next_offset past the append_off.
288          */
289         base_off = hammer_xlate_to_zone2(next_offset &
290                                         ~HAMMER_BIGBLOCK_MASK64);
291         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
292         if (resv) {
293                 if (resv->zone != zone) {
294                         hammer_unlock(&hmp->blkmap_lock);
295                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
296                                       ~HAMMER_BIGBLOCK_MASK64;
297                         goto again;
298                 }
299                 if (offset < resv->append_off) {
300                         hammer_unlock(&hmp->blkmap_lock);
301                         next_offset += resv->append_off - offset;
302                         goto again;
303                 }
304                 ++resv->refs;
305         }
306
307         /*
308          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
309          * of the layer for real.  At this point we've validated any
310          * reservation that might exist and can just ignore resv.
311          */
312         if (layer2->zone == 0) {
313                 /*
314                  * Assign the big-block to our zone
315                  */
316                 hammer_modify_buffer(trans, buffer1,
317                                      layer1, sizeof(*layer1));
318                 --layer1->blocks_free;
319                 layer1->layer1_crc = crc32(layer1,
320                                            HAMMER_LAYER1_CRCSIZE);
321                 hammer_modify_buffer_done(buffer1);
322                 hammer_modify_buffer(trans, buffer2,
323                                      layer2, sizeof(*layer2));
324                 layer2->zone = zone;
325                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
326                 KKASSERT(layer2->append_off == 0);
327                 hammer_modify_volume_field(trans, trans->rootvol,
328                                            vol0_stat_freebigblocks);
329                 --root_volume->ondisk->vol0_stat_freebigblocks;
330                 hmp->copy_stat_freebigblocks =
331                         root_volume->ondisk->vol0_stat_freebigblocks;
332                 hammer_modify_volume_done(trans->rootvol);
333         } else {
334                 hammer_modify_buffer(trans, buffer2,
335                                      layer2, sizeof(*layer2));
336         }
337         KKASSERT(layer2->zone == zone);
338
339         /*
340          * NOTE: bytes_free can legally go negative due to de-dup.
341          */
342         layer2->bytes_free -= bytes;
343         KKASSERT(layer2->append_off <= offset);
344         layer2->append_off = offset + bytes;
345         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
346         hammer_modify_buffer_done(buffer2);
347
348         /*
349          * We hold the blockmap lock and should be the only ones
350          * capable of modifying resv->append_off.  Track the allocation
351          * as appropriate.
352          */
353         KKASSERT(bytes != 0);
354         if (resv) {
355                 KKASSERT(resv->append_off <= offset);
356                 resv->append_off = offset + bytes;
357                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
358                 hammer_blockmap_reserve_complete(hmp, resv);
359         }
360
361         /*
362          * If we are allocating from the base of a new buffer we can avoid
363          * a disk read by calling hammer_bnew_ext().
364          */
365         if ((next_offset & HAMMER_BUFMASK) == 0) {
366                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
367                                 errorp, &buffer3);
368                 if (*errorp) {
369                         result_offset = 0;
370                         goto failed;
371                 }
372         }
373         result_offset = next_offset;
374
375         /*
376          * If we weren't supplied with a hint or could not use the hint
377          * then we wound up using blockmap->next_offset as the hint and
378          * need to save it.
379          */
380         if (use_hint == 0) {
381                 hammer_modify_volume_noundo(NULL, root_volume);
382                 blockmap->next_offset = next_offset + bytes;
383                 hammer_modify_volume_done(root_volume);
384         }
385         hammer_unlock(&hmp->blkmap_lock);
386 failed:
387
388         /*
389          * Cleanup
390          */
391         if (buffer1)
392                 hammer_rel_buffer(buffer1, 0);
393         if (buffer2)
394                 hammer_rel_buffer(buffer2, 0);
395         if (buffer3)
396                 hammer_rel_buffer(buffer3, 0);
397
398         return(result_offset);
399 }
400
401 /*
402  * Frontend function - Reserve bytes in a zone.
403  *
404  * This code reserves bytes out of a blockmap without committing to any
405  * meta-data modifications, allowing the front-end to directly issue disk
406  * write I/O for big-blocks of data
407  *
408  * The backend later finalizes the reservation with hammer_blockmap_finalize()
409  * upon committing the related record.
410  */
411 hammer_reserve_t
412 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
413                         hammer_off_t *zone_offp, int *errorp)
414 {
415         hammer_volume_t root_volume;
416         hammer_blockmap_t blockmap;
417         hammer_blockmap_t freemap;
418         struct hammer_blockmap_layer1 *layer1;
419         struct hammer_blockmap_layer2 *layer2;
420         hammer_buffer_t buffer1 = NULL;
421         hammer_buffer_t buffer2 = NULL;
422         hammer_buffer_t buffer3 = NULL;
423         hammer_off_t tmp_offset;
424         hammer_off_t next_offset;
425         hammer_off_t layer1_offset;
426         hammer_off_t layer2_offset;
427         hammer_off_t base_off;
428         hammer_reserve_t resv;
429         hammer_reserve_t resx;
430         int loops = 0;
431         int offset;
432
433         /*
434          * Setup
435          */
436         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
437         root_volume = hammer_get_root_volume(hmp, errorp);
438         if (*errorp)
439                 return(NULL);
440         blockmap = &hmp->blockmap[zone];
441         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
442         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
443
444         /*
445          * Deal with alignment and buffer-boundary issues.
446          *
447          * Be careful, certain primary alignments are used below to allocate
448          * new blockmap blocks.
449          */
450         bytes = (bytes + 15) & ~15;
451         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
452
453         next_offset = blockmap->next_offset;
454 again:
455         resv = NULL;
456         /*
457          * Check for wrap
458          */
459         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
460                 if (++loops == 2) {
461                         *errorp = ENOSPC;
462                         goto failed;
463                 }
464                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
465         }
466
467         /*
468          * The allocation request may not cross a buffer boundary.  Special
469          * large allocations must not cross a big-block boundary.
470          */
471         tmp_offset = next_offset + bytes - 1;
472         if (bytes <= HAMMER_BUFSIZE) {
473                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
474                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
475                         goto again;
476                 }
477         } else {
478                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
479                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
480                         goto again;
481                 }
482         }
483         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
484
485         /*
486          * Dive layer 1.
487          */
488         layer1_offset = freemap->phys_offset +
489                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
490         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
491         if (*errorp)
492                 goto failed;
493
494         /*
495          * Check CRC.
496          */
497         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
498                 hammer_lock_ex(&hmp->blkmap_lock);
499                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
500                         panic("CRC FAILED: LAYER1");
501                 hammer_unlock(&hmp->blkmap_lock);
502         }
503
504         /*
505          * If we are at a big-block boundary and layer1 indicates no
506          * free big-blocks, then we cannot allocate a new big-block in
507          * layer2, skip to the next layer1 entry.
508          */
509         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
510             layer1->blocks_free == 0) {
511                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
512                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
513                 if (hammer_check_volume(hmp, &next_offset))
514                         goto failed;
515                 goto again;
516         }
517         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
518
519         /*
520          * Dive layer 2, each entry represents a big-block.
521          */
522         layer2_offset = layer1->phys_offset +
523                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
524         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
525         if (*errorp)
526                 goto failed;
527
528         /*
529          * Check CRC if not allocating into uninitialized space (which we
530          * aren't when reserving space).
531          */
532         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
533                 hammer_lock_ex(&hmp->blkmap_lock);
534                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
535                         panic("CRC FAILED: LAYER2");
536                 hammer_unlock(&hmp->blkmap_lock);
537         }
538
539         /*
540          * Skip the layer if the zone is owned by someone other then us.
541          */
542         if (layer2->zone && layer2->zone != zone) {
543                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
544                 goto again;
545         }
546         if (offset < layer2->append_off) {
547                 next_offset += layer2->append_off - offset;
548                 goto again;
549         }
550
551         /*
552          * We need the lock from this point on.  We have to re-check zone
553          * ownership after acquiring the lock and also check for reservations.
554          */
555         hammer_lock_ex(&hmp->blkmap_lock);
556
557         if (layer2->zone && layer2->zone != zone) {
558                 hammer_unlock(&hmp->blkmap_lock);
559                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
560                 goto again;
561         }
562         if (offset < layer2->append_off) {
563                 hammer_unlock(&hmp->blkmap_lock);
564                 next_offset += layer2->append_off - offset;
565                 goto again;
566         }
567
568         /*
569          * The big-block might be reserved by another zone.  If it is reserved
570          * by our zone we may have to move next_offset past the append_off.
571          */
572         base_off = hammer_xlate_to_zone2(next_offset &
573                                         ~HAMMER_BIGBLOCK_MASK64);
574         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
575         if (resv) {
576                 if (resv->zone != zone) {
577                         hammer_unlock(&hmp->blkmap_lock);
578                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
579                                       ~HAMMER_BIGBLOCK_MASK64;
580                         goto again;
581                 }
582                 if (offset < resv->append_off) {
583                         hammer_unlock(&hmp->blkmap_lock);
584                         next_offset += resv->append_off - offset;
585                         goto again;
586                 }
587                 ++resv->refs;
588                 resx = NULL;
589         } else {
590                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
591                                M_WAITOK | M_ZERO | M_USE_RESERVE);
592                 resx->refs = 1;
593                 resx->zone = zone;
594                 resx->zone_offset = base_off;
595                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
596                         resx->flags |= HAMMER_RESF_LAYER2FREE;
597                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
598                 KKASSERT(resv == NULL);
599                 resv = resx;
600                 ++hammer_count_reservations;
601         }
602         resv->append_off = offset + bytes;
603
604         /*
605          * If we are not reserving a whole buffer but are at the start of
606          * a new block, call hammer_bnew() to avoid a disk read.
607          *
608          * If we are reserving a whole buffer (or more), the caller will
609          * probably use a direct read, so do nothing.
610          *
611          * If we do not have a whole lot of system memory we really can't
612          * afford to block while holding the blkmap_lock!
613          */
614         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
615                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
616                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
617                         if (*errorp)
618                                 goto failed;
619                 }
620         }
621
622         /*
623          * Adjust our iterator and alloc_offset.  The layer1 and layer2
624          * space beyond alloc_offset is uninitialized.  alloc_offset must
625          * be big-block aligned.
626          */
627         blockmap->next_offset = next_offset + bytes;
628         hammer_unlock(&hmp->blkmap_lock);
629
630 failed:
631         if (buffer1)
632                 hammer_rel_buffer(buffer1, 0);
633         if (buffer2)
634                 hammer_rel_buffer(buffer2, 0);
635         if (buffer3)
636                 hammer_rel_buffer(buffer3, 0);
637         hammer_rel_volume(root_volume, 0);
638         *zone_offp = next_offset;
639
640         return(resv);
641 }
642
643 /*
644  * Frontend function - Dedup bytes in a zone.
645  *
646  * Dedup reservations work exactly the same as normal write reservations
647  * except we only adjust bytes_free field and don't touch append offset.
648  * Finalization mechanic for dedup reservations is also the same as for
649  * normal write ones - the backend finalizes the reservation with
650  * hammer_blockmap_finalize().
651  */
652 hammer_reserve_t
653 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
654                               hammer_off_t zone_offset, int *errorp)
655 {
656         hammer_volume_t root_volume;
657         hammer_blockmap_t freemap;
658         struct hammer_blockmap_layer1 *layer1;
659         struct hammer_blockmap_layer2 *layer2;
660         hammer_buffer_t buffer1 = NULL;
661         hammer_buffer_t buffer2 = NULL;
662         hammer_off_t layer1_offset;
663         hammer_off_t layer2_offset;
664         hammer_off_t base_off;
665         hammer_reserve_t resv = NULL;
666         hammer_reserve_t resx = NULL;
667
668         /*
669          * Setup
670          */
671         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
672         root_volume = hammer_get_root_volume(hmp, errorp);
673         if (*errorp)
674                 return (NULL);
675         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
676         KKASSERT(freemap->phys_offset != 0);
677
678         bytes = (bytes + 15) & ~15;
679         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
680
681         /*
682          * Dive layer 1.
683          */
684         layer1_offset = freemap->phys_offset +
685                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
686         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
687         if (*errorp)
688                 goto failed;
689
690         /*
691          * Check CRC.
692          */
693         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
694                 hammer_lock_ex(&hmp->blkmap_lock);
695                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
696                         panic("CRC FAILED: LAYER1");
697                 hammer_unlock(&hmp->blkmap_lock);
698         }
699         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
700
701         /*
702          * Dive layer 2, each entry represents a big-block.
703          */
704         layer2_offset = layer1->phys_offset +
705                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
706         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
707         if (*errorp)
708                 goto failed;
709
710         /*
711          * Check CRC.
712          */
713         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
714                 hammer_lock_ex(&hmp->blkmap_lock);
715                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
716                         panic("CRC FAILED: LAYER2");
717                 hammer_unlock(&hmp->blkmap_lock);
718         }
719
720         /*
721          * Fail if the zone is owned by someone other than us.
722          */
723         if (layer2->zone && layer2->zone != zone)
724                 goto failed;
725
726         /*
727          * We need the lock from this point on.  We have to re-check zone
728          * ownership after acquiring the lock and also check for reservations.
729          */
730         hammer_lock_ex(&hmp->blkmap_lock);
731
732         if (layer2->zone && layer2->zone != zone) {
733                 hammer_unlock(&hmp->blkmap_lock);
734                 goto failed;
735         }
736
737         base_off = hammer_xlate_to_zone2(zone_offset &
738                                         ~HAMMER_BIGBLOCK_MASK64);
739         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
740         if (resv) {
741                 if (resv->zone != zone) {
742                         hammer_unlock(&hmp->blkmap_lock);
743                         resv = NULL;
744                         goto failed;
745                 }
746                 /*
747                  * Due to possible big-block underflow we can't simply
748                  * subtract bytes from bytes_free.
749                  */
750                 if (update_bytes_free(resv, bytes) == 0) {
751                         hammer_unlock(&hmp->blkmap_lock);
752                         resv = NULL;
753                         goto failed;
754                 }
755                 ++resv->refs;
756                 resx = NULL;
757         } else {
758                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
759                                M_WAITOK | M_ZERO | M_USE_RESERVE);
760                 resx->refs = 1;
761                 resx->zone = zone;
762                 resx->bytes_free = layer2->bytes_free;
763                 /*
764                  * Due to possible big-block underflow we can't simply
765                  * subtract bytes from bytes_free.
766                  */
767                 if (update_bytes_free(resx, bytes) == 0) {
768                         hammer_unlock(&hmp->blkmap_lock);
769                         kfree(resx, hmp->m_misc);
770                         goto failed;
771                 }
772                 resx->zone_offset = base_off;
773                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
774                 KKASSERT(resv == NULL);
775                 resv = resx;
776                 ++hammer_count_reservations;
777         }
778
779         hammer_unlock(&hmp->blkmap_lock);
780
781 failed:
782         if (buffer1)
783                 hammer_rel_buffer(buffer1, 0);
784         if (buffer2)
785                 hammer_rel_buffer(buffer2, 0);
786         hammer_rel_volume(root_volume, 0);
787
788         return(resv);
789 }
790
791 static int
792 update_bytes_free(hammer_reserve_t resv, int bytes)
793 {
794         int32_t temp;
795
796         /*
797          * Big-block underflow check
798          */
799         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
800         cpu_ccfence(); /* XXX do we really need it ? */
801         if (temp > resv->bytes_free) {
802                 kprintf("BIGBLOCK UNDERFLOW\n");
803                 return (0);
804         }
805
806         resv->bytes_free -= bytes;
807         return (1);
808 }
809
810 /*
811  * Dereference a reservation structure.  Upon the final release the
812  * underlying big-block is checked and if it is entirely free we delete
813  * any related HAMMER buffers to avoid potential conflicts with future
814  * reuse of the big-block.
815  */
816 void
817 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
818 {
819         hammer_off_t base_offset;
820         int error;
821
822         KKASSERT(resv->refs > 0);
823         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
824                  HAMMER_ZONE_RAW_BUFFER);
825
826         /*
827          * Setting append_off to the max prevents any new allocations
828          * from occuring while we are trying to dispose of the reservation,
829          * allowing us to safely delete any related HAMMER buffers.
830          *
831          * If we are unable to clean out all related HAMMER buffers we
832          * requeue the delay.
833          */
834         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
835                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
836                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
837                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
838                         hammer_dedup_cache_inval(hmp, base_offset);
839                 error = hammer_del_buffers(hmp, base_offset,
840                                            resv->zone_offset,
841                                            HAMMER_BIGBLOCK_SIZE,
842                                            1);
843                 if (hammer_debug_general & 0x20000) {
844                         kprintf("hammer: delbgblk %016jx error %d\n",
845                                 (intmax_t)base_offset, error);
846                 }
847                 if (error)
848                         hammer_reserve_setdelay(hmp, resv);
849         }
850         if (--resv->refs == 0) {
851                 if (hammer_debug_general & 0x20000) {
852                         kprintf("hammer: delresvr %016jx zone %02x\n",
853                                 (intmax_t)resv->zone_offset, resv->zone);
854                 }
855                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
856                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
857                 kfree(resv, hmp->m_misc);
858                 --hammer_count_reservations;
859         }
860 }
861
862 /*
863  * Prevent a potentially free big-block from being reused until after
864  * the related flushes have completely cycled, otherwise crash recovery
865  * could resurrect a data block that was already reused and overwritten.
866  *
867  * The caller might reset the underlying layer2 entry's append_off to 0, so
868  * our covering append_off must be set to max to prevent any reallocation
869  * until after the flush delays complete, not to mention proper invalidation
870  * of any underlying cached blocks.
871  */
872 static void
873 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
874                         int zone, struct hammer_blockmap_layer2 *layer2)
875 {
876         hammer_reserve_t resv;
877
878         /*
879          * Allocate the reservation if necessary.
880          *
881          * NOTE: need lock in future around resv lookup/allocation and
882          * the setdelay call, currently refs is not bumped until the call.
883          */
884 again:
885         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
886         if (resv == NULL) {
887                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
888                                M_WAITOK | M_ZERO | M_USE_RESERVE);
889                 resv->zone = zone;
890                 resv->zone_offset = base_offset;
891                 resv->refs = 0;
892                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
893
894                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
895                         resv->flags |= HAMMER_RESF_LAYER2FREE;
896                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
897                         kfree(resv, hmp->m_misc);
898                         goto again;
899                 }
900                 ++hammer_count_reservations;
901         } else {
902                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
903                         resv->flags |= HAMMER_RESF_LAYER2FREE;
904         }
905         hammer_reserve_setdelay(hmp, resv);
906 }
907
908 /*
909  * Enter the reservation on the on-delay list, or move it if it
910  * is already on the list.
911  */
912 static void
913 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
914 {
915         if (resv->flags & HAMMER_RESF_ONDELAY) {
916                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
917                 resv->flush_group = hmp->flusher.next + 1;
918                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
919         } else {
920                 ++resv->refs;
921                 ++hmp->rsv_fromdelay;
922                 resv->flags |= HAMMER_RESF_ONDELAY;
923                 resv->flush_group = hmp->flusher.next + 1;
924                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
925         }
926 }
927
928 /*
929  * Reserve has reached its flush point, remove it from the delay list
930  * and finish it off.  hammer_blockmap_reserve_complete() inherits
931  * the ondelay reference.
932  */
933 void
934 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
935 {
936         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
937         resv->flags &= ~HAMMER_RESF_ONDELAY;
938         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
939         --hmp->rsv_fromdelay;
940         hammer_blockmap_reserve_complete(hmp, resv);
941 }
942
943 /*
944  * Backend function - free (offset, bytes) in a zone.
945  *
946  * XXX error return
947  */
948 void
949 hammer_blockmap_free(hammer_transaction_t trans,
950                      hammer_off_t zone_offset, int bytes)
951 {
952         hammer_mount_t hmp;
953         hammer_volume_t root_volume;
954         hammer_blockmap_t freemap;
955         struct hammer_blockmap_layer1 *layer1;
956         struct hammer_blockmap_layer2 *layer2;
957         hammer_buffer_t buffer1 = NULL;
958         hammer_buffer_t buffer2 = NULL;
959         hammer_off_t layer1_offset;
960         hammer_off_t layer2_offset;
961         hammer_off_t base_off;
962         int error;
963         int zone;
964
965         if (bytes == 0)
966                 return;
967         hmp = trans->hmp;
968
969         /*
970          * Alignment
971          */
972         bytes = (bytes + 15) & ~15;
973         KKASSERT(bytes <= HAMMER_XBUFSIZE);
974         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
975                   ~HAMMER_BIGBLOCK_MASK64) == 0);
976
977         /*
978          * Basic zone validation & locking
979          */
980         zone = HAMMER_ZONE_DECODE(zone_offset);
981         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
982         root_volume = trans->rootvol;
983         error = 0;
984
985         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
986
987         /*
988          * Dive layer 1.
989          */
990         layer1_offset = freemap->phys_offset +
991                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
992         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
993         if (error)
994                 goto failed;
995         KKASSERT(layer1->phys_offset &&
996                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
997         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
998                 hammer_lock_ex(&hmp->blkmap_lock);
999                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1000                         panic("CRC FAILED: LAYER1");
1001                 hammer_unlock(&hmp->blkmap_lock);
1002         }
1003
1004         /*
1005          * Dive layer 2, each entry represents a big-block.
1006          */
1007         layer2_offset = layer1->phys_offset +
1008                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1009         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1010         if (error)
1011                 goto failed;
1012         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1013                 hammer_lock_ex(&hmp->blkmap_lock);
1014                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1015                         panic("CRC FAILED: LAYER2");
1016                 hammer_unlock(&hmp->blkmap_lock);
1017         }
1018
1019         hammer_lock_ex(&hmp->blkmap_lock);
1020
1021         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1022
1023         /*
1024          * Free space previously allocated via blockmap_alloc().
1025          *
1026          * NOTE: bytes_free can be and remain negative due to de-dup ops
1027          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1028          */
1029         KKASSERT(layer2->zone == zone);
1030         layer2->bytes_free += bytes;
1031         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1032
1033         /*
1034          * If a big-block becomes entirely free we must create a covering
1035          * reservation to prevent premature reuse.  Note, however, that
1036          * the big-block and/or reservation may still have an append_off
1037          * that allows further (non-reused) allocations.
1038          *
1039          * Once the reservation has been made we re-check layer2 and if
1040          * the big-block is still entirely free we reset the layer2 entry.
1041          * The reservation will prevent premature reuse.
1042          *
1043          * NOTE: hammer_buffer's are only invalidated when the reservation
1044          * is completed, if the layer2 entry is still completely free at
1045          * that time.  Any allocations from the reservation that may have
1046          * occured in the mean time, or active references on the reservation
1047          * from new pending allocations, will prevent the invalidation from
1048          * occuring.
1049          */
1050         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1051                 base_off = hammer_xlate_to_zone2(zone_offset &
1052                                                 ~HAMMER_BIGBLOCK_MASK64);
1053
1054                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1055                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1056                         layer2->zone = 0;
1057                         layer2->append_off = 0;
1058                         hammer_modify_buffer(trans, buffer1,
1059                                              layer1, sizeof(*layer1));
1060                         ++layer1->blocks_free;
1061                         layer1->layer1_crc = crc32(layer1,
1062                                                    HAMMER_LAYER1_CRCSIZE);
1063                         hammer_modify_buffer_done(buffer1);
1064                         hammer_modify_volume_field(trans,
1065                                         trans->rootvol,
1066                                         vol0_stat_freebigblocks);
1067                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1068                         hmp->copy_stat_freebigblocks =
1069                            root_volume->ondisk->vol0_stat_freebigblocks;
1070                         hammer_modify_volume_done(trans->rootvol);
1071                 }
1072         }
1073         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1074         hammer_modify_buffer_done(buffer2);
1075         hammer_unlock(&hmp->blkmap_lock);
1076
1077 failed:
1078         if (buffer1)
1079                 hammer_rel_buffer(buffer1, 0);
1080         if (buffer2)
1081                 hammer_rel_buffer(buffer2, 0);
1082 }
1083
1084 int
1085 hammer_blockmap_dedup(hammer_transaction_t trans,
1086                      hammer_off_t zone_offset, int bytes)
1087 {
1088         hammer_mount_t hmp;
1089         hammer_blockmap_t freemap;
1090         struct hammer_blockmap_layer1 *layer1;
1091         struct hammer_blockmap_layer2 *layer2;
1092         hammer_buffer_t buffer1 = NULL;
1093         hammer_buffer_t buffer2 = NULL;
1094         hammer_off_t layer1_offset;
1095         hammer_off_t layer2_offset;
1096         int32_t temp;
1097         int error;
1098         int zone __debugvar;
1099
1100         if (bytes == 0)
1101                 return (0);
1102         hmp = trans->hmp;
1103
1104         /*
1105          * Alignment
1106          */
1107         bytes = (bytes + 15) & ~15;
1108         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1109         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1110                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1111
1112         /*
1113          * Basic zone validation & locking
1114          */
1115         zone = HAMMER_ZONE_DECODE(zone_offset);
1116         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1117         error = 0;
1118
1119         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1120
1121         /*
1122          * Dive layer 1.
1123          */
1124         layer1_offset = freemap->phys_offset +
1125                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1126         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1127         if (error)
1128                 goto failed;
1129         KKASSERT(layer1->phys_offset &&
1130                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1131         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1132                 hammer_lock_ex(&hmp->blkmap_lock);
1133                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1134                         panic("CRC FAILED: LAYER1");
1135                 hammer_unlock(&hmp->blkmap_lock);
1136         }
1137
1138         /*
1139          * Dive layer 2, each entry represents a big-block.
1140          */
1141         layer2_offset = layer1->phys_offset +
1142                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1143         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1144         if (error)
1145                 goto failed;
1146         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1147                 hammer_lock_ex(&hmp->blkmap_lock);
1148                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1149                         panic("CRC FAILED: LAYER2");
1150                 hammer_unlock(&hmp->blkmap_lock);
1151         }
1152
1153         hammer_lock_ex(&hmp->blkmap_lock);
1154
1155         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1156
1157         /*
1158          * Free space previously allocated via blockmap_alloc().
1159          *
1160          * NOTE: bytes_free can be and remain negative due to de-dup ops
1161          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1162          */
1163         KKASSERT(layer2->zone == zone);
1164         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1165         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1166         if (temp > layer2->bytes_free) {
1167                 error = ERANGE;
1168                 goto underflow;
1169         }
1170         layer2->bytes_free -= bytes;
1171
1172         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1173
1174         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1175 underflow:
1176         hammer_modify_buffer_done(buffer2);
1177         hammer_unlock(&hmp->blkmap_lock);
1178
1179 failed:
1180         if (buffer1)
1181                 hammer_rel_buffer(buffer1, 0);
1182         if (buffer2)
1183                 hammer_rel_buffer(buffer2, 0);
1184         return (error);
1185 }
1186
1187 /*
1188  * Backend function - finalize (offset, bytes) in a zone.
1189  *
1190  * Allocate space that was previously reserved by the frontend.
1191  */
1192 int
1193 hammer_blockmap_finalize(hammer_transaction_t trans,
1194                          hammer_reserve_t resv,
1195                          hammer_off_t zone_offset, int bytes)
1196 {
1197         hammer_mount_t hmp;
1198         hammer_volume_t root_volume;
1199         hammer_blockmap_t freemap;
1200         struct hammer_blockmap_layer1 *layer1;
1201         struct hammer_blockmap_layer2 *layer2;
1202         hammer_buffer_t buffer1 = NULL;
1203         hammer_buffer_t buffer2 = NULL;
1204         hammer_off_t layer1_offset;
1205         hammer_off_t layer2_offset;
1206         int error;
1207         int zone;
1208         int offset;
1209
1210         if (bytes == 0)
1211                 return(0);
1212         hmp = trans->hmp;
1213
1214         /*
1215          * Alignment
1216          */
1217         bytes = (bytes + 15) & ~15;
1218         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1219
1220         /*
1221          * Basic zone validation & locking
1222          */
1223         zone = HAMMER_ZONE_DECODE(zone_offset);
1224         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1225         root_volume = trans->rootvol;
1226         error = 0;
1227
1228         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1229
1230         /*
1231          * Dive layer 1.
1232          */
1233         layer1_offset = freemap->phys_offset +
1234                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1235         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1236         if (error)
1237                 goto failed;
1238         KKASSERT(layer1->phys_offset &&
1239                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1240         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1241                 hammer_lock_ex(&hmp->blkmap_lock);
1242                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1243                         panic("CRC FAILED: LAYER1");
1244                 hammer_unlock(&hmp->blkmap_lock);
1245         }
1246
1247         /*
1248          * Dive layer 2, each entry represents a big-block.
1249          */
1250         layer2_offset = layer1->phys_offset +
1251                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1252         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1253         if (error)
1254                 goto failed;
1255         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1256                 hammer_lock_ex(&hmp->blkmap_lock);
1257                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1258                         panic("CRC FAILED: LAYER2");
1259                 hammer_unlock(&hmp->blkmap_lock);
1260         }
1261
1262         hammer_lock_ex(&hmp->blkmap_lock);
1263
1264         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1265
1266         /*
1267          * Finalize some or all of the space covered by a current
1268          * reservation.  An allocation in the same layer may have
1269          * already assigned ownership.
1270          */
1271         if (layer2->zone == 0) {
1272                 hammer_modify_buffer(trans, buffer1,
1273                                      layer1, sizeof(*layer1));
1274                 --layer1->blocks_free;
1275                 layer1->layer1_crc = crc32(layer1,
1276                                            HAMMER_LAYER1_CRCSIZE);
1277                 hammer_modify_buffer_done(buffer1);
1278                 layer2->zone = zone;
1279                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1280                 KKASSERT(layer2->append_off == 0);
1281                 hammer_modify_volume_field(trans,
1282                                 trans->rootvol,
1283                                 vol0_stat_freebigblocks);
1284                 --root_volume->ondisk->vol0_stat_freebigblocks;
1285                 hmp->copy_stat_freebigblocks =
1286                    root_volume->ondisk->vol0_stat_freebigblocks;
1287                 hammer_modify_volume_done(trans->rootvol);
1288         }
1289         if (layer2->zone != zone)
1290                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1291         KKASSERT(layer2->zone == zone);
1292         KKASSERT(bytes != 0);
1293         layer2->bytes_free -= bytes;
1294
1295         if (resv) {
1296                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1297         }
1298
1299         /*
1300          * Finalizations can occur out of order, or combined with allocations.
1301          * append_off must be set to the highest allocated offset.
1302          */
1303         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1304         if (layer2->append_off < offset)
1305                 layer2->append_off = offset;
1306
1307         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1308         hammer_modify_buffer_done(buffer2);
1309         hammer_unlock(&hmp->blkmap_lock);
1310
1311 failed:
1312         if (buffer1)
1313                 hammer_rel_buffer(buffer1, 0);
1314         if (buffer2)
1315                 hammer_rel_buffer(buffer2, 0);
1316         return(error);
1317 }
1318
1319 /*
1320  * Return the approximate number of free bytes in the big-block
1321  * containing the specified blockmap offset.
1322  *
1323  * WARNING: A negative number can be returned if data de-dup exists,
1324  *          and the result will also not represent he actual number
1325  *          of free bytes in this case.
1326  *
1327  *          This code is used only by the reblocker.
1328  */
1329 int
1330 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1331                         int *curp, int *errorp)
1332 {
1333         hammer_volume_t root_volume;
1334         hammer_blockmap_t blockmap;
1335         hammer_blockmap_t freemap;
1336         struct hammer_blockmap_layer1 *layer1;
1337         struct hammer_blockmap_layer2 *layer2;
1338         hammer_buffer_t buffer = NULL;
1339         hammer_off_t layer1_offset;
1340         hammer_off_t layer2_offset;
1341         int32_t bytes;
1342         int zone;
1343
1344         zone = HAMMER_ZONE_DECODE(zone_offset);
1345         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1346         root_volume = hammer_get_root_volume(hmp, errorp);
1347         if (*errorp) {
1348                 *curp = 0;
1349                 return(0);
1350         }
1351         blockmap = &hmp->blockmap[zone];
1352         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1353
1354         /*
1355          * Dive layer 1.
1356          */
1357         layer1_offset = freemap->phys_offset +
1358                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1359         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1360         if (*errorp) {
1361                 *curp = 0;
1362                 bytes = 0;
1363                 goto failed;
1364         }
1365         KKASSERT(layer1->phys_offset);
1366         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1367                 hammer_lock_ex(&hmp->blkmap_lock);
1368                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1369                         panic("CRC FAILED: LAYER1");
1370                 hammer_unlock(&hmp->blkmap_lock);
1371         }
1372
1373         /*
1374          * Dive layer 2, each entry represents a big-block.
1375          *
1376          * (reuse buffer, layer1 pointer becomes invalid)
1377          */
1378         layer2_offset = layer1->phys_offset +
1379                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1380         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1381         if (*errorp) {
1382                 *curp = 0;
1383                 bytes = 0;
1384                 goto failed;
1385         }
1386         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1387                 hammer_lock_ex(&hmp->blkmap_lock);
1388                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1389                         panic("CRC FAILED: LAYER2");
1390                 hammer_unlock(&hmp->blkmap_lock);
1391         }
1392         KKASSERT(layer2->zone == zone);
1393
1394         bytes = layer2->bytes_free;
1395
1396         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1397                 *curp = 0;
1398         else
1399                 *curp = 1;
1400 failed:
1401         if (buffer)
1402                 hammer_rel_buffer(buffer, 0);
1403         hammer_rel_volume(root_volume, 0);
1404         if (hammer_debug_general & 0x0800) {
1405                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1406                         (long long)zone_offset, bytes);
1407         }
1408         return(bytes);
1409 }
1410
1411
1412 /*
1413  * Lookup a blockmap offset and verify blockmap layers.
1414  */
1415 hammer_off_t
1416 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1417                         int *errorp)
1418 {
1419         hammer_volume_t root_volume;
1420         hammer_blockmap_t freemap;
1421         struct hammer_blockmap_layer1 *layer1;
1422         struct hammer_blockmap_layer2 *layer2;
1423         hammer_buffer_t buffer = NULL;
1424         hammer_off_t layer1_offset;
1425         hammer_off_t layer2_offset;
1426         hammer_off_t result_offset;
1427         hammer_off_t base_off;
1428         hammer_reserve_t resv __debugvar;
1429         int zone;
1430
1431         /*
1432          * Calculate the zone-2 offset.
1433          */
1434         zone = HAMMER_ZONE_DECODE(zone_offset);
1435         result_offset = hammer_xlate_to_zone2(zone_offset);
1436
1437         /*
1438          * Validate the allocation zone
1439          */
1440         root_volume = hammer_get_root_volume(hmp, errorp);
1441         if (*errorp)
1442                 return(0);
1443         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1444         KKASSERT(freemap->phys_offset != 0);
1445
1446         /*
1447          * Dive layer 1.
1448          */
1449         layer1_offset = freemap->phys_offset +
1450                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1451         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1452         if (*errorp)
1453                 goto failed;
1454         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1455         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1456                 hammer_lock_ex(&hmp->blkmap_lock);
1457                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1458                         panic("CRC FAILED: LAYER1");
1459                 hammer_unlock(&hmp->blkmap_lock);
1460         }
1461
1462         /*
1463          * Dive layer 2, each entry represents a big-block.
1464          */
1465         layer2_offset = layer1->phys_offset +
1466                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1467         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1468
1469         if (*errorp)
1470                 goto failed;
1471         if (layer2->zone == 0) {
1472                 base_off = hammer_xlate_to_zone2(zone_offset &
1473                                                 ~HAMMER_BIGBLOCK_MASK64);
1474                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1475                                  base_off);
1476                 KKASSERT(resv && resv->zone == zone);
1477
1478         } else if (layer2->zone != zone) {
1479                 panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1480                         layer2->zone, zone);
1481         }
1482         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1483                 hammer_lock_ex(&hmp->blkmap_lock);
1484                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1485                         panic("CRC FAILED: LAYER2");
1486                 hammer_unlock(&hmp->blkmap_lock);
1487         }
1488
1489 failed:
1490         if (buffer)
1491                 hammer_rel_buffer(buffer, 0);
1492         hammer_rel_volume(root_volume, 0);
1493         if (hammer_debug_general & 0x0800) {
1494                 kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1495                         (long long)zone_offset, (long long)result_offset);
1496         }
1497         return(result_offset);
1498 }
1499
1500
1501 /*
1502  * Check space availability
1503  *
1504  * MPSAFE - does not require fs_token
1505  */
1506 int
1507 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1508 {
1509         const int in_size = sizeof(struct hammer_inode_data) +
1510                             sizeof(union hammer_btree_elm);
1511         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1512         int64_t usedbytes;
1513
1514         usedbytes = hmp->rsv_inodes * in_size +
1515                     hmp->rsv_recs * rec_size +
1516                     hmp->rsv_databytes +
1517                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1518                     ((int64_t)hammer_limit_dirtybufspace) +
1519                     (slop << HAMMER_BIGBLOCK_BITS);
1520
1521         hammer_count_extra_space_used = usedbytes;      /* debugging */
1522         if (resp)
1523                 *resp = usedbytes;
1524
1525         if (hmp->copy_stat_freebigblocks >=
1526             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1527                 return(0);
1528         }
1529         return (ENOSPC);
1530 }
1531
1532 static int
1533 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1534 {
1535         hammer_blockmap_t freemap;
1536         struct hammer_blockmap_layer1 *layer1;
1537         hammer_buffer_t buffer1 = NULL;
1538         hammer_off_t layer1_offset, offset;
1539         int zone, vol_no, error = 0;
1540
1541         offset = *offsetp;
1542         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1543
1544         layer1_offset = freemap->phys_offset +
1545                         HAMMER_BLOCKMAP_LAYER1_OFFSET(offset);
1546
1547         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1548         if (error)
1549                 goto end;
1550
1551         /*
1552          * No more available space in layer1s of this volume.
1553          */
1554         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
1555                 zone = HAMMER_ZONE_DECODE(offset);
1556                 vol_no = HAMMER_VOL_DECODE(offset) + 1;
1557                 KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1558                 if (vol_no == HAMMER_MAX_VOLUMES) {
1559                         vol_no = 0;
1560                         ++zone;
1561                 }
1562                 offset &= HAMMER_BLOCKMAP_LAYER2_MASK;
1563                 *offsetp = HAMMER_ENCODE(zone, vol_no, offset);
1564         }
1565 end:
1566         if (buffer1)
1567                 hammer_rel_buffer(buffer1, 0);
1568         return(error);
1569 }