Merge branch 'master' of git://git.theshell.com/dragonfly
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53              hammer_res_rb_compare, hammer_off_t, zone_offset);
54
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58         if (res1->zone_offset < res2->zone_offset)
59                 return(-1);
60         if (res1->zone_offset > res2->zone_offset)
61                 return(1);
62         return(0);
63 }
64
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70                       hammer_off_t hint, int *errorp)
71 {
72         hammer_mount_t hmp;
73         hammer_volume_t root_volume;
74         hammer_blockmap_t blockmap;
75         hammer_blockmap_t freemap;
76         hammer_reserve_t resv;
77         struct hammer_blockmap_layer1 *layer1;
78         struct hammer_blockmap_layer2 *layer2;
79         hammer_buffer_t buffer1 = NULL;
80         hammer_buffer_t buffer2 = NULL;
81         hammer_buffer_t buffer3 = NULL;
82         hammer_off_t tmp_offset;
83         hammer_off_t next_offset;
84         hammer_off_t result_offset;
85         hammer_off_t layer1_offset;
86         hammer_off_t layer2_offset;
87         hammer_off_t base_off;
88         int loops = 0;
89         int offset;             /* offset within big-block */
90         int use_hint;
91
92         hmp = trans->hmp;
93
94         /*
95          * Deal with alignment and buffer-boundary issues.
96          *
97          * Be careful, certain primary alignments are used below to allocate
98          * new blockmap blocks.
99          */
100         bytes = (bytes + 15) & ~15;
101         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
103
104         /*
105          * Setup
106          */
107         root_volume = trans->rootvol;
108         *errorp = 0;
109         blockmap = &hmp->blockmap[zone];
110         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112
113         /*
114          * Use the hint if we have one.
115          */
116         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117                 next_offset = (hint + 15) & ~(hammer_off_t)15;
118                 use_hint = 1;
119         } else {
120                 next_offset = blockmap->next_offset;
121                 use_hint = 0;
122         }
123 again:
124
125         /*
126          * use_hint is turned off if we leave the hinted big-block.
127          */
128         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129                 next_offset = blockmap->next_offset;
130                 use_hint = 0;
131         }
132
133         /*
134          * Check for wrap
135          */
136         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137                 if (++loops == 2) {
138                         result_offset = 0;
139                         *errorp = ENOSPC;
140                         goto failed;
141                 }
142                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143         }
144
145         /*
146          * The allocation request may not cross a buffer boundary.  Special
147          * large allocations must not cross a large-block boundary.
148          */
149         tmp_offset = next_offset + bytes - 1;
150         if (bytes <= HAMMER_BUFSIZE) {
151                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153                         goto again;
154                 }
155         } else {
156                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158                         goto again;
159                 }
160         }
161         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
162
163         /*
164          * Dive layer 1.
165          */
166         layer1_offset = freemap->phys_offset +
167                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168
169         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170         if (*errorp) {
171                 result_offset = 0;
172                 goto failed;
173         }
174
175         /*
176          * Check CRC.
177          */
178         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179                 hammer_lock_ex(&hmp->blkmap_lock);
180                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181                         panic("CRC FAILED: LAYER1");
182                 hammer_unlock(&hmp->blkmap_lock);
183         }
184
185         /*
186          * If we are at a big-block boundary and layer1 indicates no 
187          * free big-blocks, then we cannot allocate a new bigblock in
188          * layer2, skip to the next layer1 entry.
189          */
190         if (offset == 0 && layer1->blocks_free == 0) {
191                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
193                 goto again;
194         }
195         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
196
197         /*
198          * Skip this layer1 entry if it is pointing to a layer2 big-block
199          * on a volume that we are currently trying to remove from the
200          * file-system. This is used by the volume-del code together with
201          * the reblocker to free up a volume.
202          */
203         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204             hmp->volume_to_remove) {
205                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
207                 goto again;
208         }
209
210         /*
211          * Dive layer 2, each entry represents a large-block.
212          */
213         layer2_offset = layer1->phys_offset +
214                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
216         if (*errorp) {
217                 result_offset = 0;
218                 goto failed;
219         }
220
221         /*
222          * Check CRC.  This can race another thread holding the lock
223          * and in the middle of modifying layer2.
224          */
225         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
226                 hammer_lock_ex(&hmp->blkmap_lock);
227                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228                         panic("CRC FAILED: LAYER2");
229                 hammer_unlock(&hmp->blkmap_lock);
230         }
231
232         /*
233          * Skip the layer if the zone is owned by someone other then us.
234          */
235         if (layer2->zone && layer2->zone != zone) {
236                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237                 goto again;
238         }
239         if (offset < layer2->append_off) {
240                 next_offset += layer2->append_off - offset;
241                 goto again;
242         }
243
244 #if 0
245         /*
246          * If operating in the current non-hint blockmap block, do not
247          * allow it to get over-full.  Also drop any active hinting so
248          * blockmap->next_offset is updated at the end.
249          *
250          * We do this for B-Tree and meta-data allocations to provide
251          * localization for updates.
252          */
253         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
254              zone == HAMMER_ZONE_META_INDEX) &&
255             offset >= HAMMER_LARGEBLOCK_OVERFILL &&
256             !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
257         ) {
258                 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
259                         next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
260                         use_hint = 0;
261                         goto again;
262                 }
263         }
264 #endif
265
266         /*
267          * We need the lock from this point on.  We have to re-check zone
268          * ownership after acquiring the lock and also check for reservations.
269          */
270         hammer_lock_ex(&hmp->blkmap_lock);
271
272         if (layer2->zone && layer2->zone != zone) {
273                 hammer_unlock(&hmp->blkmap_lock);
274                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
275                 goto again;
276         }
277         if (offset < layer2->append_off) {
278                 hammer_unlock(&hmp->blkmap_lock);
279                 next_offset += layer2->append_off - offset;
280                 goto again;
281         }
282
283         /*
284          * The bigblock might be reserved by another zone.  If it is reserved
285          * by our zone we may have to move next_offset past the append_off.
286          */
287         base_off = (next_offset &
288                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
289                     HAMMER_ZONE_RAW_BUFFER;
290         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
291         if (resv) {
292                 if (resv->zone != zone) {
293                         hammer_unlock(&hmp->blkmap_lock);
294                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
295                                       ~HAMMER_LARGEBLOCK_MASK64;
296                         goto again;
297                 }
298                 if (offset < resv->append_off) {
299                         hammer_unlock(&hmp->blkmap_lock);
300                         next_offset += resv->append_off - offset;
301                         goto again;
302                 }
303                 ++resv->refs;
304         }
305
306         /*
307          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
308          * of the layer for real.  At this point we've validated any
309          * reservation that might exist and can just ignore resv.
310          */
311         if (layer2->zone == 0) {
312                 /*
313                  * Assign the bigblock to our zone
314                  */
315                 hammer_modify_buffer(trans, buffer1,
316                                      layer1, sizeof(*layer1));
317                 --layer1->blocks_free;
318                 layer1->layer1_crc = crc32(layer1,
319                                            HAMMER_LAYER1_CRCSIZE);
320                 hammer_modify_buffer_done(buffer1);
321                 hammer_modify_buffer(trans, buffer2,
322                                      layer2, sizeof(*layer2));
323                 layer2->zone = zone;
324                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
325                 KKASSERT(layer2->append_off == 0);
326                 hammer_modify_volume_field(trans, trans->rootvol,
327                                            vol0_stat_freebigblocks);
328                 --root_volume->ondisk->vol0_stat_freebigblocks;
329                 hmp->copy_stat_freebigblocks =
330                         root_volume->ondisk->vol0_stat_freebigblocks;
331                 hammer_modify_volume_done(trans->rootvol);
332         } else {
333                 hammer_modify_buffer(trans, buffer2,
334                                      layer2, sizeof(*layer2));
335         }
336         KKASSERT(layer2->zone == zone);
337
338         /*
339          * NOTE: bytes_free can legally go negative due to de-dup.
340          */
341         layer2->bytes_free -= bytes;
342         KKASSERT(layer2->append_off <= offset);
343         layer2->append_off = offset + bytes;
344         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
345         hammer_modify_buffer_done(buffer2);
346
347         /*
348          * We hold the blockmap lock and should be the only ones
349          * capable of modifying resv->append_off.  Track the allocation
350          * as appropriate.
351          */
352         KKASSERT(bytes != 0);
353         if (resv) {
354                 KKASSERT(resv->append_off <= offset);
355                 resv->append_off = offset + bytes;
356                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
357                 hammer_blockmap_reserve_complete(hmp, resv);
358         }
359
360         /*
361          * If we are allocating from the base of a new buffer we can avoid
362          * a disk read by calling hammer_bnew().
363          */
364         if ((next_offset & HAMMER_BUFMASK) == 0) {
365                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
366                                 errorp, &buffer3);
367         }
368         result_offset = next_offset;
369
370         /*
371          * If we weren't supplied with a hint or could not use the hint
372          * then we wound up using blockmap->next_offset as the hint and
373          * need to save it.
374          */
375         if (use_hint == 0) {
376                 hammer_modify_volume(NULL, root_volume, NULL, 0);
377                 blockmap->next_offset = next_offset + bytes;
378                 hammer_modify_volume_done(root_volume);
379         }
380         hammer_unlock(&hmp->blkmap_lock);
381 failed:
382
383         /*
384          * Cleanup
385          */
386         if (buffer1)
387                 hammer_rel_buffer(buffer1, 0);
388         if (buffer2)
389                 hammer_rel_buffer(buffer2, 0);
390         if (buffer3)
391                 hammer_rel_buffer(buffer3, 0);
392
393         return(result_offset);
394 }
395
396 /*
397  * Frontend function - Reserve bytes in a zone.
398  *
399  * This code reserves bytes out of a blockmap without committing to any
400  * meta-data modifications, allowing the front-end to directly issue disk
401  * write I/O for large blocks of data
402  *
403  * The backend later finalizes the reservation with hammer_blockmap_finalize()
404  * upon committing the related record.
405  */
406 hammer_reserve_t
407 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
408                         hammer_off_t *zone_offp, int *errorp)
409 {
410         hammer_volume_t root_volume;
411         hammer_blockmap_t blockmap;
412         hammer_blockmap_t freemap;
413         struct hammer_blockmap_layer1 *layer1;
414         struct hammer_blockmap_layer2 *layer2;
415         hammer_buffer_t buffer1 = NULL;
416         hammer_buffer_t buffer2 = NULL;
417         hammer_buffer_t buffer3 = NULL;
418         hammer_off_t tmp_offset;
419         hammer_off_t next_offset;
420         hammer_off_t layer1_offset;
421         hammer_off_t layer2_offset;
422         hammer_off_t base_off;
423         hammer_reserve_t resv;
424         hammer_reserve_t resx;
425         int loops = 0;
426         int offset;
427
428         /*
429          * Setup
430          */
431         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
432         root_volume = hammer_get_root_volume(hmp, errorp);
433         if (*errorp)
434                 return(NULL);
435         blockmap = &hmp->blockmap[zone];
436         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
437         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
438
439         /*
440          * Deal with alignment and buffer-boundary issues.
441          *
442          * Be careful, certain primary alignments are used below to allocate
443          * new blockmap blocks.
444          */
445         bytes = (bytes + 15) & ~15;
446         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
447
448         next_offset = blockmap->next_offset;
449 again:
450         resv = NULL;
451         /*
452          * Check for wrap
453          */
454         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
455                 if (++loops == 2) {
456                         *errorp = ENOSPC;
457                         goto failed;
458                 }
459                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
460         }
461
462         /*
463          * The allocation request may not cross a buffer boundary.  Special
464          * large allocations must not cross a large-block boundary.
465          */
466         tmp_offset = next_offset + bytes - 1;
467         if (bytes <= HAMMER_BUFSIZE) {
468                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
469                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
470                         goto again;
471                 }
472         } else {
473                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
474                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
475                         goto again;
476                 }
477         }
478         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
479
480         /*
481          * Dive layer 1.
482          */
483         layer1_offset = freemap->phys_offset +
484                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
485         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
486         if (*errorp)
487                 goto failed;
488
489         /*
490          * Check CRC.
491          */
492         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
493                 hammer_lock_ex(&hmp->blkmap_lock);
494                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
495                         panic("CRC FAILED: LAYER1");
496                 hammer_unlock(&hmp->blkmap_lock);
497         }
498
499         /*
500          * If we are at a big-block boundary and layer1 indicates no 
501          * free big-blocks, then we cannot allocate a new bigblock in
502          * layer2, skip to the next layer1 entry.
503          */
504         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
505             layer1->blocks_free == 0) {
506                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
507                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
508                 goto again;
509         }
510         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
511
512         /*
513          * Dive layer 2, each entry represents a large-block.
514          */
515         layer2_offset = layer1->phys_offset +
516                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
517         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
518         if (*errorp)
519                 goto failed;
520
521         /*
522          * Check CRC if not allocating into uninitialized space (which we
523          * aren't when reserving space).
524          */
525         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
526                 hammer_lock_ex(&hmp->blkmap_lock);
527                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
528                         panic("CRC FAILED: LAYER2");
529                 hammer_unlock(&hmp->blkmap_lock);
530         }
531
532         /*
533          * Skip the layer if the zone is owned by someone other then us.
534          */
535         if (layer2->zone && layer2->zone != zone) {
536                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
537                 goto again;
538         }
539         if (offset < layer2->append_off) {
540                 next_offset += layer2->append_off - offset;
541                 goto again;
542         }
543
544         /*
545          * We need the lock from this point on.  We have to re-check zone
546          * ownership after acquiring the lock and also check for reservations.
547          */
548         hammer_lock_ex(&hmp->blkmap_lock);
549
550         if (layer2->zone && layer2->zone != zone) {
551                 hammer_unlock(&hmp->blkmap_lock);
552                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
553                 goto again;
554         }
555         if (offset < layer2->append_off) {
556                 hammer_unlock(&hmp->blkmap_lock);
557                 next_offset += layer2->append_off - offset;
558                 goto again;
559         }
560
561         /*
562          * The bigblock might be reserved by another zone.  If it is reserved
563          * by our zone we may have to move next_offset past the append_off.
564          */
565         base_off = (next_offset &
566                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
567                     HAMMER_ZONE_RAW_BUFFER;
568         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
569         if (resv) {
570                 if (resv->zone != zone) {
571                         hammer_unlock(&hmp->blkmap_lock);
572                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
573                                       ~HAMMER_LARGEBLOCK_MASK64;
574                         goto again;
575                 }
576                 if (offset < resv->append_off) {
577                         hammer_unlock(&hmp->blkmap_lock);
578                         next_offset += resv->append_off - offset;
579                         goto again;
580                 }
581                 ++resv->refs;
582                 resx = NULL;
583         } else {
584                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
585                                M_WAITOK | M_ZERO | M_USE_RESERVE);
586                 resx->refs = 1;
587                 resx->zone = zone;
588                 resx->zone_offset = base_off;
589                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
590                         resx->flags |= HAMMER_RESF_LAYER2FREE;
591                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
592                 KKASSERT(resv == NULL);
593                 resv = resx;
594                 ++hammer_count_reservations;
595         }
596         resv->append_off = offset + bytes;
597
598         /*
599          * If we are not reserving a whole buffer but are at the start of
600          * a new block, call hammer_bnew() to avoid a disk read.
601          *
602          * If we are reserving a whole buffer (or more), the caller will
603          * probably use a direct read, so do nothing.
604          */
605         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
606                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
607         }
608
609         /*
610          * Adjust our iterator and alloc_offset.  The layer1 and layer2
611          * space beyond alloc_offset is uninitialized.  alloc_offset must
612          * be big-block aligned.
613          */
614         blockmap->next_offset = next_offset + bytes;
615         hammer_unlock(&hmp->blkmap_lock);
616
617 failed:
618         if (buffer1)
619                 hammer_rel_buffer(buffer1, 0);
620         if (buffer2)
621                 hammer_rel_buffer(buffer2, 0);
622         if (buffer3)
623                 hammer_rel_buffer(buffer3, 0);
624         hammer_rel_volume(root_volume, 0);
625         *zone_offp = next_offset;
626
627         return(resv);
628 }
629
630 /*
631  * Frontend function - Dedup bytes in a zone.
632  *
633  * Dedup reservations work exactly the same as normal write reservations
634  * except we only adjust bytes_free field and don't touch append offset.
635  * Finalization mechanic for dedup reservations is also the same as for
636  * normal write ones - the backend finalizes the reservation with
637  * hammer_blockmap_finalize().
638  */
639 hammer_reserve_t
640 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
641                               hammer_off_t zone_offset, int *errorp)
642 {
643         hammer_volume_t root_volume;
644         hammer_blockmap_t freemap;
645         struct hammer_blockmap_layer1 *layer1;
646         struct hammer_blockmap_layer2 *layer2;
647         hammer_buffer_t buffer1 = NULL;
648         hammer_buffer_t buffer2 = NULL;
649         hammer_off_t layer1_offset;
650         hammer_off_t layer2_offset;
651         hammer_off_t base_off;
652         hammer_reserve_t resv = NULL;
653         hammer_reserve_t resx = NULL;
654
655         /*
656          * Setup
657          */
658         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
659         root_volume = hammer_get_root_volume(hmp, errorp);
660         if (*errorp)
661                 return (NULL);
662         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
663         KKASSERT(freemap->phys_offset != 0);
664
665         bytes = (bytes + 15) & ~15;
666         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
667
668         /*
669          * Dive layer 1.
670          */
671         layer1_offset = freemap->phys_offset +
672                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
673         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
674         if (*errorp)
675                 goto failed;
676
677         /*
678          * Check CRC.
679          */
680         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
681                 hammer_lock_ex(&hmp->blkmap_lock);
682                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
683                         panic("CRC FAILED: LAYER1");
684                 hammer_unlock(&hmp->blkmap_lock);
685         }
686         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
687
688         /*
689          * Dive layer 2, each entry represents a large-block.
690          */
691         layer2_offset = layer1->phys_offset +
692                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
693         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
694         if (*errorp)
695                 goto failed;
696
697         /*
698          * Check CRC.
699          */
700         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
701                 hammer_lock_ex(&hmp->blkmap_lock);
702                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
703                         panic("CRC FAILED: LAYER2");
704                 hammer_unlock(&hmp->blkmap_lock);
705         }
706
707         /*
708          * Fail if the zone is owned by someone other than us.
709          */
710         if (layer2->zone && layer2->zone != zone)
711                 goto failed;
712
713         /*
714          * We need the lock from this point on.  We have to re-check zone
715          * ownership after acquiring the lock and also check for reservations.
716          */
717         hammer_lock_ex(&hmp->blkmap_lock);
718
719         if (layer2->zone && layer2->zone != zone) {
720                 hammer_unlock(&hmp->blkmap_lock);
721                 goto failed;
722         }
723
724         base_off = (zone_offset &
725                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
726                     HAMMER_ZONE_RAW_BUFFER;
727         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
728         if (resv) {
729                 if (resv->zone != zone) {
730                         hammer_unlock(&hmp->blkmap_lock);
731                         resv = NULL;
732                         goto failed;
733                 }
734                 /*
735                  * Due to possible big block underflow we can't simply
736                  * subtract bytes from bytes_free.
737                  */
738                 if (update_bytes_free(resv, bytes) == 0) {
739                         hammer_unlock(&hmp->blkmap_lock);
740                         resv = NULL;
741                         goto failed;
742                 }
743                 ++resv->refs;
744                 resx = NULL;
745         } else {
746                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
747                                M_WAITOK | M_ZERO | M_USE_RESERVE);
748                 resx->refs = 1;
749                 resx->zone = zone;
750                 resx->bytes_free = layer2->bytes_free;
751                 /*
752                  * Due to possible big block underflow we can't simply
753                  * subtract bytes from bytes_free.
754                  */
755                 if (update_bytes_free(resx, bytes) == 0) {
756                         hammer_unlock(&hmp->blkmap_lock);
757                         kfree(resx, hmp->m_misc);
758                         goto failed;
759                 }
760                 resx->zone_offset = base_off;
761                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
762                 KKASSERT(resv == NULL);
763                 resv = resx;
764                 ++hammer_count_reservations;
765         }
766
767         hammer_unlock(&hmp->blkmap_lock);
768
769 failed:
770         if (buffer1)
771                 hammer_rel_buffer(buffer1, 0);
772         if (buffer2)
773                 hammer_rel_buffer(buffer2, 0);
774         hammer_rel_volume(root_volume, 0);
775
776         return(resv);
777 }
778
779 static int
780 update_bytes_free(hammer_reserve_t resv, int bytes)
781 {
782         int32_t temp;
783
784         /*
785          * Big-block underflow check
786          */
787         temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
788         cpu_ccfence(); /* XXX do we really need it ? */
789         if (temp > resv->bytes_free) {
790                 kprintf("BIGBLOCK UNDERFLOW\n");
791                 return (0);
792         }
793
794         resv->bytes_free -= bytes;
795         return (1);
796 }
797
798 /*
799  * Dereference a reservation structure.  Upon the final release the
800  * underlying big-block is checked and if it is entirely free we delete
801  * any related HAMMER buffers to avoid potential conflicts with future
802  * reuse of the big-block.
803  */
804 void
805 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
806 {
807         hammer_off_t base_offset;
808         int error;
809
810         KKASSERT(resv->refs > 0);
811         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
812                  HAMMER_ZONE_RAW_BUFFER);
813
814         /*
815          * Setting append_off to the max prevents any new allocations
816          * from occuring while we are trying to dispose of the reservation,
817          * allowing us to safely delete any related HAMMER buffers.
818          *
819          * If we are unable to clean out all related HAMMER buffers we
820          * requeue the delay.
821          */
822         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
823                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
824                 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
825                 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
826                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
827                         hammer_dedup_cache_inval(hmp, base_offset);
828                 error = hammer_del_buffers(hmp, base_offset,
829                                            resv->zone_offset,
830                                            HAMMER_LARGEBLOCK_SIZE,
831                                            1);
832                 if (hammer_debug_general & 0x20000) {
833                         kprintf("hammer: dellgblk %016jx error %d\n",
834                                 (intmax_t)base_offset, error);
835                 }
836                 if (error)
837                         hammer_reserve_setdelay(hmp, resv);
838         }
839         if (--resv->refs == 0) {
840                 if (hammer_debug_general & 0x20000) {
841                         kprintf("hammer: delresvr %016jx zone %02x\n",
842                                 (intmax_t)resv->zone_offset, resv->zone);
843                 }
844                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
845                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
846                 kfree(resv, hmp->m_misc);
847                 --hammer_count_reservations;
848         }
849 }
850
851 /*
852  * Prevent a potentially free big-block from being reused until after
853  * the related flushes have completely cycled, otherwise crash recovery
854  * could resurrect a data block that was already reused and overwritten.
855  *
856  * The caller might reset the underlying layer2 entry's append_off to 0, so
857  * our covering append_off must be set to max to prevent any reallocation
858  * until after the flush delays complete, not to mention proper invalidation
859  * of any underlying cached blocks.
860  */
861 static void
862 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
863                         int zone, struct hammer_blockmap_layer2 *layer2)
864 {
865         hammer_reserve_t resv;
866
867         /*
868          * Allocate the reservation if necessary.
869          *
870          * NOTE: need lock in future around resv lookup/allocation and
871          * the setdelay call, currently refs is not bumped until the call.
872          */
873 again:
874         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
875         if (resv == NULL) {
876                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
877                                M_WAITOK | M_ZERO | M_USE_RESERVE);
878                 resv->zone = zone;
879                 resv->zone_offset = base_offset;
880                 resv->refs = 0;
881                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
882
883                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
884                         resv->flags |= HAMMER_RESF_LAYER2FREE;
885                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
886                         kfree(resv, hmp->m_misc);
887                         goto again;
888                 }
889                 ++hammer_count_reservations;
890         } else {
891                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
892                         resv->flags |= HAMMER_RESF_LAYER2FREE;
893         }
894         hammer_reserve_setdelay(hmp, resv);
895 }
896
897 /*
898  * Enter the reservation on the on-delay list, or move it if it
899  * is already on the list.
900  */
901 static void
902 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
903 {
904         if (resv->flags & HAMMER_RESF_ONDELAY) {
905                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
906                 resv->flush_group = hmp->flusher.next + 1;
907                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
908         } else {
909                 ++resv->refs;
910                 ++hmp->rsv_fromdelay;
911                 resv->flags |= HAMMER_RESF_ONDELAY;
912                 resv->flush_group = hmp->flusher.next + 1;
913                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
914         }
915 }
916
917 /*
918  * Reserve has reached its flush point, remove it from the delay list
919  * and finish it off.  hammer_blockmap_reserve_complete() inherits
920  * the ondelay reference.
921  */
922 void
923 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
924 {
925         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
926         resv->flags &= ~HAMMER_RESF_ONDELAY;
927         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
928         --hmp->rsv_fromdelay;
929         hammer_blockmap_reserve_complete(hmp, resv);
930 }
931
932 /*
933  * Backend function - free (offset, bytes) in a zone.
934  *
935  * XXX error return
936  */
937 void
938 hammer_blockmap_free(hammer_transaction_t trans,
939                      hammer_off_t zone_offset, int bytes)
940 {
941         hammer_mount_t hmp;
942         hammer_volume_t root_volume;
943         hammer_blockmap_t freemap;
944         struct hammer_blockmap_layer1 *layer1;
945         struct hammer_blockmap_layer2 *layer2;
946         hammer_buffer_t buffer1 = NULL;
947         hammer_buffer_t buffer2 = NULL;
948         hammer_off_t layer1_offset;
949         hammer_off_t layer2_offset;
950         hammer_off_t base_off;
951         int error;
952         int zone;
953
954         if (bytes == 0)
955                 return;
956         hmp = trans->hmp;
957
958         /*
959          * Alignment
960          */
961         bytes = (bytes + 15) & ~15;
962         KKASSERT(bytes <= HAMMER_XBUFSIZE);
963         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
964                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
965
966         /*
967          * Basic zone validation & locking
968          */
969         zone = HAMMER_ZONE_DECODE(zone_offset);
970         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
971         root_volume = trans->rootvol;
972         error = 0;
973
974         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
975
976         /*
977          * Dive layer 1.
978          */
979         layer1_offset = freemap->phys_offset +
980                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
981         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
982         if (error)
983                 goto failed;
984         KKASSERT(layer1->phys_offset &&
985                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
986         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
987                 hammer_lock_ex(&hmp->blkmap_lock);
988                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
989                         panic("CRC FAILED: LAYER1");
990                 hammer_unlock(&hmp->blkmap_lock);
991         }
992
993         /*
994          * Dive layer 2, each entry represents a large-block.
995          */
996         layer2_offset = layer1->phys_offset +
997                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
998         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
999         if (error)
1000                 goto failed;
1001         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1002                 hammer_lock_ex(&hmp->blkmap_lock);
1003                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1004                         panic("CRC FAILED: LAYER2");
1005                 hammer_unlock(&hmp->blkmap_lock);
1006         }
1007
1008         hammer_lock_ex(&hmp->blkmap_lock);
1009
1010         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1011
1012         /*
1013          * Free space previously allocated via blockmap_alloc().
1014          *
1015          * NOTE: bytes_free can be and remain negative due to de-dup ops
1016          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1017          */
1018         KKASSERT(layer2->zone == zone);
1019         layer2->bytes_free += bytes;
1020         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1021
1022         /*
1023          * If a big-block becomes entirely free we must create a covering
1024          * reservation to prevent premature reuse.  Note, however, that
1025          * the big-block and/or reservation may still have an append_off
1026          * that allows further (non-reused) allocations.
1027          *
1028          * Once the reservation has been made we re-check layer2 and if
1029          * the big-block is still entirely free we reset the layer2 entry.
1030          * The reservation will prevent premature reuse.
1031          *
1032          * NOTE: hammer_buffer's are only invalidated when the reservation
1033          * is completed, if the layer2 entry is still completely free at
1034          * that time.  Any allocations from the reservation that may have
1035          * occured in the mean time, or active references on the reservation
1036          * from new pending allocations, will prevent the invalidation from
1037          * occuring.
1038          */
1039         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1040                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1041
1042                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1043                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1044                         layer2->zone = 0;
1045                         layer2->append_off = 0;
1046                         hammer_modify_buffer(trans, buffer1,
1047                                              layer1, sizeof(*layer1));
1048                         ++layer1->blocks_free;
1049                         layer1->layer1_crc = crc32(layer1,
1050                                                    HAMMER_LAYER1_CRCSIZE);
1051                         hammer_modify_buffer_done(buffer1);
1052                         hammer_modify_volume_field(trans,
1053                                         trans->rootvol,
1054                                         vol0_stat_freebigblocks);
1055                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1056                         hmp->copy_stat_freebigblocks =
1057                            root_volume->ondisk->vol0_stat_freebigblocks;
1058                         hammer_modify_volume_done(trans->rootvol);
1059                 }
1060         }
1061         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1062         hammer_modify_buffer_done(buffer2);
1063         hammer_unlock(&hmp->blkmap_lock);
1064
1065 failed:
1066         if (buffer1)
1067                 hammer_rel_buffer(buffer1, 0);
1068         if (buffer2)
1069                 hammer_rel_buffer(buffer2, 0);
1070 }
1071
1072 int
1073 hammer_blockmap_dedup(hammer_transaction_t trans,
1074                      hammer_off_t zone_offset, int bytes)
1075 {
1076         hammer_mount_t hmp;
1077         hammer_volume_t root_volume;
1078         hammer_blockmap_t freemap;
1079         struct hammer_blockmap_layer1 *layer1;
1080         struct hammer_blockmap_layer2 *layer2;
1081         hammer_buffer_t buffer1 = NULL;
1082         hammer_buffer_t buffer2 = NULL;
1083         hammer_off_t layer1_offset;
1084         hammer_off_t layer2_offset;
1085         int32_t temp;
1086         int error;
1087         int zone;
1088
1089         if (bytes == 0)
1090                 return (0);
1091         hmp = trans->hmp;
1092
1093         /*
1094          * Alignment
1095          */
1096         bytes = (bytes + 15) & ~15;
1097         KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1098         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1099                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
1100
1101         /*
1102          * Basic zone validation & locking
1103          */
1104         zone = HAMMER_ZONE_DECODE(zone_offset);
1105         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1106         root_volume = trans->rootvol;
1107         error = 0;
1108
1109         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1110
1111         /*
1112          * Dive layer 1.
1113          */
1114         layer1_offset = freemap->phys_offset +
1115                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1116         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1117         if (error)
1118                 goto failed;
1119         KKASSERT(layer1->phys_offset &&
1120                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1121         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1122                 hammer_lock_ex(&hmp->blkmap_lock);
1123                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1124                         panic("CRC FAILED: LAYER1");
1125                 hammer_unlock(&hmp->blkmap_lock);
1126         }
1127
1128         /*
1129          * Dive layer 2, each entry represents a large-block.
1130          */
1131         layer2_offset = layer1->phys_offset +
1132                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1133         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1134         if (error)
1135                 goto failed;
1136         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1137                 hammer_lock_ex(&hmp->blkmap_lock);
1138                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1139                         panic("CRC FAILED: LAYER2");
1140                 hammer_unlock(&hmp->blkmap_lock);
1141         }
1142
1143         hammer_lock_ex(&hmp->blkmap_lock);
1144
1145         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1146
1147         /*
1148          * Free space previously allocated via blockmap_alloc().
1149          *
1150          * NOTE: bytes_free can be and remain negative due to de-dup ops
1151          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1152          */
1153         KKASSERT(layer2->zone == zone);
1154         temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1155         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1156         if (temp > layer2->bytes_free) {
1157                 error = ERANGE;
1158                 goto underflow;
1159         }
1160         layer2->bytes_free -= bytes;
1161
1162         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1163
1164         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1165 underflow:
1166         hammer_modify_buffer_done(buffer2);
1167         hammer_unlock(&hmp->blkmap_lock);
1168
1169 failed:
1170         if (buffer1)
1171                 hammer_rel_buffer(buffer1, 0);
1172         if (buffer2)
1173                 hammer_rel_buffer(buffer2, 0);
1174         return (error);
1175 }
1176
1177 /*
1178  * Backend function - finalize (offset, bytes) in a zone.
1179  *
1180  * Allocate space that was previously reserved by the frontend.
1181  */
1182 int
1183 hammer_blockmap_finalize(hammer_transaction_t trans,
1184                          hammer_reserve_t resv,
1185                          hammer_off_t zone_offset, int bytes)
1186 {
1187         hammer_mount_t hmp;
1188         hammer_volume_t root_volume;
1189         hammer_blockmap_t freemap;
1190         struct hammer_blockmap_layer1 *layer1;
1191         struct hammer_blockmap_layer2 *layer2;
1192         hammer_buffer_t buffer1 = NULL;
1193         hammer_buffer_t buffer2 = NULL;
1194         hammer_off_t layer1_offset;
1195         hammer_off_t layer2_offset;
1196         int error;
1197         int zone;
1198         int offset;
1199
1200         if (bytes == 0)
1201                 return(0);
1202         hmp = trans->hmp;
1203
1204         /*
1205          * Alignment
1206          */
1207         bytes = (bytes + 15) & ~15;
1208         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1209
1210         /*
1211          * Basic zone validation & locking
1212          */
1213         zone = HAMMER_ZONE_DECODE(zone_offset);
1214         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1215         root_volume = trans->rootvol;
1216         error = 0;
1217
1218         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1219
1220         /*
1221          * Dive layer 1.
1222          */
1223         layer1_offset = freemap->phys_offset +
1224                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1225         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1226         if (error)
1227                 goto failed;
1228         KKASSERT(layer1->phys_offset &&
1229                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1230         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1231                 hammer_lock_ex(&hmp->blkmap_lock);
1232                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1233                         panic("CRC FAILED: LAYER1");
1234                 hammer_unlock(&hmp->blkmap_lock);
1235         }
1236
1237         /*
1238          * Dive layer 2, each entry represents a large-block.
1239          */
1240         layer2_offset = layer1->phys_offset +
1241                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1242         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1243         if (error)
1244                 goto failed;
1245         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1246                 hammer_lock_ex(&hmp->blkmap_lock);
1247                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1248                         panic("CRC FAILED: LAYER2");
1249                 hammer_unlock(&hmp->blkmap_lock);
1250         }
1251
1252         hammer_lock_ex(&hmp->blkmap_lock);
1253
1254         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1255
1256         /*
1257          * Finalize some or all of the space covered by a current
1258          * reservation.  An allocation in the same layer may have
1259          * already assigned ownership.
1260          */
1261         if (layer2->zone == 0) {
1262                 hammer_modify_buffer(trans, buffer1,
1263                                      layer1, sizeof(*layer1));
1264                 --layer1->blocks_free;
1265                 layer1->layer1_crc = crc32(layer1,
1266                                            HAMMER_LAYER1_CRCSIZE);
1267                 hammer_modify_buffer_done(buffer1);
1268                 layer2->zone = zone;
1269                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1270                 KKASSERT(layer2->append_off == 0);
1271                 hammer_modify_volume_field(trans,
1272                                 trans->rootvol,
1273                                 vol0_stat_freebigblocks);
1274                 --root_volume->ondisk->vol0_stat_freebigblocks;
1275                 hmp->copy_stat_freebigblocks =
1276                    root_volume->ondisk->vol0_stat_freebigblocks;
1277                 hammer_modify_volume_done(trans->rootvol);
1278         }
1279         if (layer2->zone != zone)
1280                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1281         KKASSERT(layer2->zone == zone);
1282         KKASSERT(bytes != 0);
1283         layer2->bytes_free -= bytes;
1284
1285         if (resv) {
1286                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1287         }
1288
1289         /*
1290          * Finalizations can occur out of order, or combined with allocations.
1291          * append_off must be set to the highest allocated offset.
1292          */
1293         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1294         if (layer2->append_off < offset)
1295                 layer2->append_off = offset;
1296
1297         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1298         hammer_modify_buffer_done(buffer2);
1299         hammer_unlock(&hmp->blkmap_lock);
1300
1301 failed:
1302         if (buffer1)
1303                 hammer_rel_buffer(buffer1, 0);
1304         if (buffer2)
1305                 hammer_rel_buffer(buffer2, 0);
1306         return(error);
1307 }
1308
1309 /*
1310  * Return the approximate number of free bytes in the big-block
1311  * containing the specified blockmap offset.
1312  *
1313  * WARNING: A negative number can be returned if data de-dup exists,
1314  *          and the result will also not represent he actual number
1315  *          of free bytes in this case.
1316  *
1317  *          This code is used only by the reblocker.
1318  */
1319 int
1320 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1321                         int *curp, int *errorp)
1322 {
1323         hammer_volume_t root_volume;
1324         hammer_blockmap_t blockmap;
1325         hammer_blockmap_t freemap;
1326         struct hammer_blockmap_layer1 *layer1;
1327         struct hammer_blockmap_layer2 *layer2;
1328         hammer_buffer_t buffer = NULL;
1329         hammer_off_t layer1_offset;
1330         hammer_off_t layer2_offset;
1331         int32_t bytes;
1332         int zone;
1333
1334         zone = HAMMER_ZONE_DECODE(zone_offset);
1335         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1336         root_volume = hammer_get_root_volume(hmp, errorp);
1337         if (*errorp) {
1338                 *curp = 0;
1339                 return(0);
1340         }
1341         blockmap = &hmp->blockmap[zone];
1342         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1343
1344         /*
1345          * Dive layer 1.
1346          */
1347         layer1_offset = freemap->phys_offset +
1348                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1349         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1350         if (*errorp) {
1351                 bytes = 0;
1352                 goto failed;
1353         }
1354         KKASSERT(layer1->phys_offset);
1355         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1356                 hammer_lock_ex(&hmp->blkmap_lock);
1357                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1358                         panic("CRC FAILED: LAYER1");
1359                 hammer_unlock(&hmp->blkmap_lock);
1360         }
1361
1362         /*
1363          * Dive layer 2, each entry represents a large-block.
1364          *
1365          * (reuse buffer, layer1 pointer becomes invalid)
1366          */
1367         layer2_offset = layer1->phys_offset +
1368                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1369         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1370         if (*errorp) {
1371                 bytes = 0;
1372                 goto failed;
1373         }
1374         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1375                 hammer_lock_ex(&hmp->blkmap_lock);
1376                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1377                         panic("CRC FAILED: LAYER2");
1378                 hammer_unlock(&hmp->blkmap_lock);
1379         }
1380         KKASSERT(layer2->zone == zone);
1381
1382         bytes = layer2->bytes_free;
1383
1384         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1385                 *curp = 0;
1386         else
1387                 *curp = 1;
1388 failed:
1389         if (buffer)
1390                 hammer_rel_buffer(buffer, 0);
1391         hammer_rel_volume(root_volume, 0);
1392         if (hammer_debug_general & 0x0800) {
1393                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1394                         (long long)zone_offset, bytes);
1395         }
1396         return(bytes);
1397 }
1398
1399
1400 /*
1401  * Lookup a blockmap offset.
1402  */
1403 hammer_off_t
1404 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1405                        int *errorp)
1406 {
1407         hammer_volume_t root_volume;
1408         hammer_blockmap_t freemap;
1409         struct hammer_blockmap_layer1 *layer1;
1410         struct hammer_blockmap_layer2 *layer2;
1411         hammer_buffer_t buffer = NULL;
1412         hammer_off_t layer1_offset;
1413         hammer_off_t layer2_offset;
1414         hammer_off_t result_offset;
1415         hammer_off_t base_off;
1416         hammer_reserve_t resv;
1417         int zone;
1418
1419         /*
1420          * Calculate the zone-2 offset.
1421          */
1422         zone = HAMMER_ZONE_DECODE(zone_offset);
1423         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1424
1425         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1426                         HAMMER_ZONE_RAW_BUFFER;
1427
1428         /*
1429          * We can actually stop here, normal blockmaps are now direct-mapped
1430          * onto the freemap and so represent zone-2 addresses.
1431          */
1432         if (hammer_verify_zone == 0) {
1433                 *errorp = 0;
1434                 return(result_offset);
1435         }
1436
1437         /*
1438          * Validate the allocation zone
1439          */
1440         root_volume = hammer_get_root_volume(hmp, errorp);
1441         if (*errorp)
1442                 return(0);
1443         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1444         KKASSERT(freemap->phys_offset != 0);
1445
1446         /*
1447          * Dive layer 1.
1448          */
1449         layer1_offset = freemap->phys_offset +
1450                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1451         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1452         if (*errorp)
1453                 goto failed;
1454         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1455         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1456                 hammer_lock_ex(&hmp->blkmap_lock);
1457                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1458                         panic("CRC FAILED: LAYER1");
1459                 hammer_unlock(&hmp->blkmap_lock);
1460         }
1461
1462         /*
1463          * Dive layer 2, each entry represents a large-block.
1464          */
1465         layer2_offset = layer1->phys_offset +
1466                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1467         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1468
1469         if (*errorp)
1470                 goto failed;
1471         if (layer2->zone == 0) {
1472                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1473                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1474                                  base_off);
1475                 KKASSERT(resv && resv->zone == zone);
1476
1477         } else if (layer2->zone != zone) {
1478                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1479                         layer2->zone, zone);
1480         }
1481         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1482                 hammer_lock_ex(&hmp->blkmap_lock);
1483                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1484                         panic("CRC FAILED: LAYER2");
1485                 hammer_unlock(&hmp->blkmap_lock);
1486         }
1487
1488 failed:
1489         if (buffer)
1490                 hammer_rel_buffer(buffer, 0);
1491         hammer_rel_volume(root_volume, 0);
1492         if (hammer_debug_general & 0x0800) {
1493                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1494                         (long long)zone_offset, (long long)result_offset);
1495         }
1496         return(result_offset);
1497 }
1498
1499
1500 /*
1501  * Check space availability
1502  *
1503  * MPSAFE - does not require fs_token
1504  */
1505 int
1506 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1507 {
1508         const int in_size = sizeof(struct hammer_inode_data) +
1509                             sizeof(union hammer_btree_elm);
1510         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1511         int64_t usedbytes;
1512
1513         usedbytes = hmp->rsv_inodes * in_size +
1514                     hmp->rsv_recs * rec_size +
1515                     hmp->rsv_databytes +
1516                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1517                     ((int64_t)hidirtybufspace << 2) +
1518                     (slop << HAMMER_LARGEBLOCK_BITS);
1519
1520         hammer_count_extra_space_used = usedbytes;      /* debugging */
1521         if (resp)
1522                 *resp = usedbytes;
1523
1524         if (hmp->copy_stat_freebigblocks >=
1525             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1526                 return(0);
1527         }
1528         return (ENOSPC);
1529 }
1530