hammer - bring-in cleanup from PR 2771 and 2772
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42                                     hammer_off_t base_offset, int zone,
43                                     struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51              hammer_res_rb_compare, hammer_off_t, zone_offset);
52
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56         if (res1->zone_offset < res2->zone_offset)
57                 return(-1);
58         if (res1->zone_offset > res2->zone_offset)
59                 return(1);
60         return(0);
61 }
62
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
68                       hammer_off_t hint, int *errorp)
69 {
70         hammer_mount_t hmp;
71         hammer_volume_t root_volume;
72         hammer_blockmap_t blockmap;
73         hammer_blockmap_t freemap;
74         hammer_reserve_t resv;
75         struct hammer_blockmap_layer1 *layer1;
76         struct hammer_blockmap_layer2 *layer2;
77         hammer_buffer_t buffer1 = NULL;
78         hammer_buffer_t buffer2 = NULL;
79         hammer_buffer_t buffer3 = NULL;
80         hammer_off_t tmp_offset;
81         hammer_off_t next_offset;
82         hammer_off_t result_offset;
83         hammer_off_t layer1_offset;
84         hammer_off_t layer2_offset;
85         hammer_off_t base_off;
86         int loops = 0;
87         int offset;             /* offset within big-block */
88         int use_hint;
89
90         hmp = trans->hmp;
91
92         /*
93          * Deal with alignment and buffer-boundary issues.
94          *
95          * Be careful, certain primary alignments are used below to allocate
96          * new blockmap blocks.
97          */
98         bytes = (bytes + 15) & ~15;
99         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101
102         /*
103          * Setup
104          */
105         root_volume = trans->rootvol;
106         *errorp = 0;
107         blockmap = &hmp->blockmap[zone];
108         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110
111         /*
112          * Use the hint if we have one.
113          */
114         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
115                 next_offset = (hint + 15) & ~(hammer_off_t)15;
116                 use_hint = 1;
117         } else {
118                 next_offset = blockmap->next_offset;
119                 use_hint = 0;
120         }
121 again:
122
123         /*
124          * use_hint is turned off if we leave the hinted big-block.
125          */
126         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
127                 next_offset = blockmap->next_offset;
128                 use_hint = 0;
129         }
130
131         /*
132          * Check for wrap
133          */
134         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
135                 if (++loops == 2) {
136                         result_offset = 0;
137                         *errorp = ENOSPC;
138                         goto failed;
139                 }
140                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
141         }
142
143         /*
144          * The allocation request may not cross a buffer boundary.  Special
145          * large allocations must not cross a large-block boundary.
146          */
147         tmp_offset = next_offset + bytes - 1;
148         if (bytes <= HAMMER_BUFSIZE) {
149                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
150                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
151                         goto again;
152                 }
153         } else {
154                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
155                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
156                         goto again;
157                 }
158         }
159         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
160
161         /*
162          * Dive layer 1.
163          */
164         layer1_offset = freemap->phys_offset +
165                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
166
167         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
168         if (*errorp) {
169                 result_offset = 0;
170                 goto failed;
171         }
172
173         /*
174          * Check CRC.
175          */
176         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
177                 hammer_lock_ex(&hmp->blkmap_lock);
178                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
179                         panic("CRC FAILED: LAYER1");
180                 hammer_unlock(&hmp->blkmap_lock);
181         }
182
183         /*
184          * If we are at a big-block boundary and layer1 indicates no 
185          * free big-blocks, then we cannot allocate a new bigblock in
186          * layer2, skip to the next layer1 entry.
187          */
188         if (offset == 0 && layer1->blocks_free == 0) {
189                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
190                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
191                 goto again;
192         }
193         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
194
195         /*
196          * Skip this layer1 entry if it is pointing to a layer2 big-block
197          * on a volume that we are currently trying to remove from the
198          * file-system. This is used by the volume-del code together with
199          * the reblocker to free up a volume.
200          */
201         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
202             hmp->volume_to_remove) {
203                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
204                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
205                 goto again;
206         }
207
208         /*
209          * Dive layer 2, each entry represents a large-block.
210          */
211         layer2_offset = layer1->phys_offset +
212                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
213         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
214         if (*errorp) {
215                 result_offset = 0;
216                 goto failed;
217         }
218
219         /*
220          * Check CRC.  This can race another thread holding the lock
221          * and in the middle of modifying layer2.
222          */
223         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
224                 hammer_lock_ex(&hmp->blkmap_lock);
225                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
226                         panic("CRC FAILED: LAYER2");
227                 hammer_unlock(&hmp->blkmap_lock);
228         }
229
230         /*
231          * Skip the layer if the zone is owned by someone other then us.
232          */
233         if (layer2->zone && layer2->zone != zone) {
234                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
235                 goto again;
236         }
237         if (offset < layer2->append_off) {
238                 next_offset += layer2->append_off - offset;
239                 goto again;
240         }
241
242 #if 0
243         /*
244          * If operating in the current non-hint blockmap block, do not
245          * allow it to get over-full.  Also drop any active hinting so
246          * blockmap->next_offset is updated at the end.
247          *
248          * We do this for B-Tree and meta-data allocations to provide
249          * localization for updates.
250          */
251         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252              zone == HAMMER_ZONE_META_INDEX) &&
253             offset >= HAMMER_LARGEBLOCK_OVERFILL &&
254             !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
255         ) {
256                 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
257                         next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
258                         use_hint = 0;
259                         goto again;
260                 }
261         }
262 #endif
263
264         /*
265          * We need the lock from this point on.  We have to re-check zone
266          * ownership after acquiring the lock and also check for reservations.
267          */
268         hammer_lock_ex(&hmp->blkmap_lock);
269
270         if (layer2->zone && layer2->zone != zone) {
271                 hammer_unlock(&hmp->blkmap_lock);
272                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
273                 goto again;
274         }
275         if (offset < layer2->append_off) {
276                 hammer_unlock(&hmp->blkmap_lock);
277                 next_offset += layer2->append_off - offset;
278                 goto again;
279         }
280
281         /*
282          * The bigblock might be reserved by another zone.  If it is reserved
283          * by our zone we may have to move next_offset past the append_off.
284          */
285         base_off = hammer_xlate_to_zone2(next_offset &
286                                         ~HAMMER_LARGEBLOCK_MASK64);
287         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
288         if (resv) {
289                 if (resv->zone != zone) {
290                         hammer_unlock(&hmp->blkmap_lock);
291                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
292                                       ~HAMMER_LARGEBLOCK_MASK64;
293                         goto again;
294                 }
295                 if (offset < resv->append_off) {
296                         hammer_unlock(&hmp->blkmap_lock);
297                         next_offset += resv->append_off - offset;
298                         goto again;
299                 }
300                 ++resv->refs;
301         }
302
303         /*
304          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
305          * of the layer for real.  At this point we've validated any
306          * reservation that might exist and can just ignore resv.
307          */
308         if (layer2->zone == 0) {
309                 /*
310                  * Assign the bigblock to our zone
311                  */
312                 hammer_modify_buffer(trans, buffer1,
313                                      layer1, sizeof(*layer1));
314                 --layer1->blocks_free;
315                 layer1->layer1_crc = crc32(layer1,
316                                            HAMMER_LAYER1_CRCSIZE);
317                 hammer_modify_buffer_done(buffer1);
318                 hammer_modify_buffer(trans, buffer2,
319                                      layer2, sizeof(*layer2));
320                 layer2->zone = zone;
321                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
322                 KKASSERT(layer2->append_off == 0);
323                 hammer_modify_volume_field(trans, trans->rootvol,
324                                            vol0_stat_freebigblocks);
325                 --root_volume->ondisk->vol0_stat_freebigblocks;
326                 hmp->copy_stat_freebigblocks =
327                         root_volume->ondisk->vol0_stat_freebigblocks;
328                 hammer_modify_volume_done(trans->rootvol);
329         } else {
330                 hammer_modify_buffer(trans, buffer2,
331                                      layer2, sizeof(*layer2));
332         }
333         KKASSERT(layer2->zone == zone);
334
335         /*
336          * NOTE: bytes_free can legally go negative due to de-dup.
337          */
338         layer2->bytes_free -= bytes;
339         KKASSERT(layer2->append_off <= offset);
340         layer2->append_off = offset + bytes;
341         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342         hammer_modify_buffer_done(buffer2);
343
344         /*
345          * We hold the blockmap lock and should be the only ones
346          * capable of modifying resv->append_off.  Track the allocation
347          * as appropriate.
348          */
349         KKASSERT(bytes != 0);
350         if (resv) {
351                 KKASSERT(resv->append_off <= offset);
352                 resv->append_off = offset + bytes;
353                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354                 hammer_blockmap_reserve_complete(hmp, resv);
355         }
356
357         /*
358          * If we are allocating from the base of a new buffer we can avoid
359          * a disk read by calling hammer_bnew().
360          */
361         if ((next_offset & HAMMER_BUFMASK) == 0) {
362                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
363                                 errorp, &buffer3);
364         }
365         result_offset = next_offset;
366
367         /*
368          * If we weren't supplied with a hint or could not use the hint
369          * then we wound up using blockmap->next_offset as the hint and
370          * need to save it.
371          */
372         if (use_hint == 0) {
373                 hammer_modify_volume(NULL, root_volume, NULL, 0);
374                 blockmap->next_offset = next_offset + bytes;
375                 hammer_modify_volume_done(root_volume);
376         }
377         hammer_unlock(&hmp->blkmap_lock);
378 failed:
379
380         /*
381          * Cleanup
382          */
383         if (buffer1)
384                 hammer_rel_buffer(buffer1, 0);
385         if (buffer2)
386                 hammer_rel_buffer(buffer2, 0);
387         if (buffer3)
388                 hammer_rel_buffer(buffer3, 0);
389
390         return(result_offset);
391 }
392
393 /*
394  * Frontend function - Reserve bytes in a zone.
395  *
396  * This code reserves bytes out of a blockmap without committing to any
397  * meta-data modifications, allowing the front-end to directly issue disk
398  * write I/O for large blocks of data
399  *
400  * The backend later finalizes the reservation with hammer_blockmap_finalize()
401  * upon committing the related record.
402  */
403 hammer_reserve_t
404 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
405                         hammer_off_t *zone_offp, int *errorp)
406 {
407         hammer_volume_t root_volume;
408         hammer_blockmap_t blockmap;
409         hammer_blockmap_t freemap;
410         struct hammer_blockmap_layer1 *layer1;
411         struct hammer_blockmap_layer2 *layer2;
412         hammer_buffer_t buffer1 = NULL;
413         hammer_buffer_t buffer2 = NULL;
414         hammer_buffer_t buffer3 = NULL;
415         hammer_off_t tmp_offset;
416         hammer_off_t next_offset;
417         hammer_off_t layer1_offset;
418         hammer_off_t layer2_offset;
419         hammer_off_t base_off;
420         hammer_reserve_t resv;
421         hammer_reserve_t resx;
422         int loops = 0;
423         int offset;
424
425         /*
426          * Setup
427          */
428         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
429         root_volume = hammer_get_root_volume(hmp, errorp);
430         if (*errorp)
431                 return(NULL);
432         blockmap = &hmp->blockmap[zone];
433         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
434         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
435
436         /*
437          * Deal with alignment and buffer-boundary issues.
438          *
439          * Be careful, certain primary alignments are used below to allocate
440          * new blockmap blocks.
441          */
442         bytes = (bytes + 15) & ~15;
443         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
444
445         next_offset = blockmap->next_offset;
446 again:
447         resv = NULL;
448         /*
449          * Check for wrap
450          */
451         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
452                 if (++loops == 2) {
453                         *errorp = ENOSPC;
454                         goto failed;
455                 }
456                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
457         }
458
459         /*
460          * The allocation request may not cross a buffer boundary.  Special
461          * large allocations must not cross a large-block boundary.
462          */
463         tmp_offset = next_offset + bytes - 1;
464         if (bytes <= HAMMER_BUFSIZE) {
465                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
466                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
467                         goto again;
468                 }
469         } else {
470                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
471                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
472                         goto again;
473                 }
474         }
475         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
476
477         /*
478          * Dive layer 1.
479          */
480         layer1_offset = freemap->phys_offset +
481                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
482         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
483         if (*errorp)
484                 goto failed;
485
486         /*
487          * Check CRC.
488          */
489         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
490                 hammer_lock_ex(&hmp->blkmap_lock);
491                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
492                         panic("CRC FAILED: LAYER1");
493                 hammer_unlock(&hmp->blkmap_lock);
494         }
495
496         /*
497          * If we are at a big-block boundary and layer1 indicates no 
498          * free big-blocks, then we cannot allocate a new bigblock in
499          * layer2, skip to the next layer1 entry.
500          */
501         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
502             layer1->blocks_free == 0) {
503                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
504                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
505                 goto again;
506         }
507         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
508
509         /*
510          * Dive layer 2, each entry represents a large-block.
511          */
512         layer2_offset = layer1->phys_offset +
513                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
514         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
515         if (*errorp)
516                 goto failed;
517
518         /*
519          * Check CRC if not allocating into uninitialized space (which we
520          * aren't when reserving space).
521          */
522         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
523                 hammer_lock_ex(&hmp->blkmap_lock);
524                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
525                         panic("CRC FAILED: LAYER2");
526                 hammer_unlock(&hmp->blkmap_lock);
527         }
528
529         /*
530          * Skip the layer if the zone is owned by someone other then us.
531          */
532         if (layer2->zone && layer2->zone != zone) {
533                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
534                 goto again;
535         }
536         if (offset < layer2->append_off) {
537                 next_offset += layer2->append_off - offset;
538                 goto again;
539         }
540
541         /*
542          * We need the lock from this point on.  We have to re-check zone
543          * ownership after acquiring the lock and also check for reservations.
544          */
545         hammer_lock_ex(&hmp->blkmap_lock);
546
547         if (layer2->zone && layer2->zone != zone) {
548                 hammer_unlock(&hmp->blkmap_lock);
549                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
550                 goto again;
551         }
552         if (offset < layer2->append_off) {
553                 hammer_unlock(&hmp->blkmap_lock);
554                 next_offset += layer2->append_off - offset;
555                 goto again;
556         }
557
558         /*
559          * The bigblock might be reserved by another zone.  If it is reserved
560          * by our zone we may have to move next_offset past the append_off.
561          */
562         base_off = hammer_xlate_to_zone2(next_offset &
563                                         ~HAMMER_LARGEBLOCK_MASK64);
564         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
565         if (resv) {
566                 if (resv->zone != zone) {
567                         hammer_unlock(&hmp->blkmap_lock);
568                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
569                                       ~HAMMER_LARGEBLOCK_MASK64;
570                         goto again;
571                 }
572                 if (offset < resv->append_off) {
573                         hammer_unlock(&hmp->blkmap_lock);
574                         next_offset += resv->append_off - offset;
575                         goto again;
576                 }
577                 ++resv->refs;
578                 resx = NULL;
579         } else {
580                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
581                                M_WAITOK | M_ZERO | M_USE_RESERVE);
582                 resx->refs = 1;
583                 resx->zone = zone;
584                 resx->zone_offset = base_off;
585                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
586                         resx->flags |= HAMMER_RESF_LAYER2FREE;
587                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
588                 KKASSERT(resv == NULL);
589                 resv = resx;
590                 ++hammer_count_reservations;
591         }
592         resv->append_off = offset + bytes;
593
594         /*
595          * If we are not reserving a whole buffer but are at the start of
596          * a new block, call hammer_bnew() to avoid a disk read.
597          *
598          * If we are reserving a whole buffer (or more), the caller will
599          * probably use a direct read, so do nothing.
600          *
601          * If we do not have a whole lot of system memory we really can't
602          * afford to block while holding the blkmap_lock!
603          */
604         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
605                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE))
606                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
607         }
608
609         /*
610          * Adjust our iterator and alloc_offset.  The layer1 and layer2
611          * space beyond alloc_offset is uninitialized.  alloc_offset must
612          * be big-block aligned.
613          */
614         blockmap->next_offset = next_offset + bytes;
615         hammer_unlock(&hmp->blkmap_lock);
616
617 failed:
618         if (buffer1)
619                 hammer_rel_buffer(buffer1, 0);
620         if (buffer2)
621                 hammer_rel_buffer(buffer2, 0);
622         if (buffer3)
623                 hammer_rel_buffer(buffer3, 0);
624         hammer_rel_volume(root_volume, 0);
625         *zone_offp = next_offset;
626
627         return(resv);
628 }
629
630 /*
631  * Frontend function - Dedup bytes in a zone.
632  *
633  * Dedup reservations work exactly the same as normal write reservations
634  * except we only adjust bytes_free field and don't touch append offset.
635  * Finalization mechanic for dedup reservations is also the same as for
636  * normal write ones - the backend finalizes the reservation with
637  * hammer_blockmap_finalize().
638  */
639 hammer_reserve_t
640 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
641                               hammer_off_t zone_offset, int *errorp)
642 {
643         hammer_volume_t root_volume;
644         hammer_blockmap_t freemap;
645         struct hammer_blockmap_layer1 *layer1;
646         struct hammer_blockmap_layer2 *layer2;
647         hammer_buffer_t buffer1 = NULL;
648         hammer_buffer_t buffer2 = NULL;
649         hammer_off_t layer1_offset;
650         hammer_off_t layer2_offset;
651         hammer_off_t base_off;
652         hammer_reserve_t resv = NULL;
653         hammer_reserve_t resx = NULL;
654
655         /*
656          * Setup
657          */
658         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
659         root_volume = hammer_get_root_volume(hmp, errorp);
660         if (*errorp)
661                 return (NULL);
662         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
663         KKASSERT(freemap->phys_offset != 0);
664
665         bytes = (bytes + 15) & ~15;
666         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
667
668         /*
669          * Dive layer 1.
670          */
671         layer1_offset = freemap->phys_offset +
672                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
673         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
674         if (*errorp)
675                 goto failed;
676
677         /*
678          * Check CRC.
679          */
680         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
681                 hammer_lock_ex(&hmp->blkmap_lock);
682                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
683                         panic("CRC FAILED: LAYER1");
684                 hammer_unlock(&hmp->blkmap_lock);
685         }
686         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
687
688         /*
689          * Dive layer 2, each entry represents a large-block.
690          */
691         layer2_offset = layer1->phys_offset +
692                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
693         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
694         if (*errorp)
695                 goto failed;
696
697         /*
698          * Check CRC.
699          */
700         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
701                 hammer_lock_ex(&hmp->blkmap_lock);
702                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
703                         panic("CRC FAILED: LAYER2");
704                 hammer_unlock(&hmp->blkmap_lock);
705         }
706
707         /*
708          * Fail if the zone is owned by someone other than us.
709          */
710         if (layer2->zone && layer2->zone != zone)
711                 goto failed;
712
713         /*
714          * We need the lock from this point on.  We have to re-check zone
715          * ownership after acquiring the lock and also check for reservations.
716          */
717         hammer_lock_ex(&hmp->blkmap_lock);
718
719         if (layer2->zone && layer2->zone != zone) {
720                 hammer_unlock(&hmp->blkmap_lock);
721                 goto failed;
722         }
723
724         base_off = hammer_xlate_to_zone2(zone_offset &
725                                         ~HAMMER_LARGEBLOCK_MASK64);
726         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
727         if (resv) {
728                 if (resv->zone != zone) {
729                         hammer_unlock(&hmp->blkmap_lock);
730                         resv = NULL;
731                         goto failed;
732                 }
733                 /*
734                  * Due to possible big block underflow we can't simply
735                  * subtract bytes from bytes_free.
736                  */
737                 if (update_bytes_free(resv, bytes) == 0) {
738                         hammer_unlock(&hmp->blkmap_lock);
739                         resv = NULL;
740                         goto failed;
741                 }
742                 ++resv->refs;
743                 resx = NULL;
744         } else {
745                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
746                                M_WAITOK | M_ZERO | M_USE_RESERVE);
747                 resx->refs = 1;
748                 resx->zone = zone;
749                 resx->bytes_free = layer2->bytes_free;
750                 /*
751                  * Due to possible big block underflow we can't simply
752                  * subtract bytes from bytes_free.
753                  */
754                 if (update_bytes_free(resx, bytes) == 0) {
755                         hammer_unlock(&hmp->blkmap_lock);
756                         kfree(resx, hmp->m_misc);
757                         goto failed;
758                 }
759                 resx->zone_offset = base_off;
760                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
761                 KKASSERT(resv == NULL);
762                 resv = resx;
763                 ++hammer_count_reservations;
764         }
765
766         hammer_unlock(&hmp->blkmap_lock);
767
768 failed:
769         if (buffer1)
770                 hammer_rel_buffer(buffer1, 0);
771         if (buffer2)
772                 hammer_rel_buffer(buffer2, 0);
773         hammer_rel_volume(root_volume, 0);
774
775         return(resv);
776 }
777
778 static int
779 update_bytes_free(hammer_reserve_t resv, int bytes)
780 {
781         int32_t temp;
782
783         /*
784          * Big-block underflow check
785          */
786         temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
787         cpu_ccfence(); /* XXX do we really need it ? */
788         if (temp > resv->bytes_free) {
789                 kprintf("BIGBLOCK UNDERFLOW\n");
790                 return (0);
791         }
792
793         resv->bytes_free -= bytes;
794         return (1);
795 }
796
797 /*
798  * Dereference a reservation structure.  Upon the final release the
799  * underlying big-block is checked and if it is entirely free we delete
800  * any related HAMMER buffers to avoid potential conflicts with future
801  * reuse of the big-block.
802  */
803 void
804 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
805 {
806         hammer_off_t base_offset;
807         int error;
808
809         KKASSERT(resv->refs > 0);
810         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
811                  HAMMER_ZONE_RAW_BUFFER);
812
813         /*
814          * Setting append_off to the max prevents any new allocations
815          * from occuring while we are trying to dispose of the reservation,
816          * allowing us to safely delete any related HAMMER buffers.
817          *
818          * If we are unable to clean out all related HAMMER buffers we
819          * requeue the delay.
820          */
821         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
822                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
823                 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
824                 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
825                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
826                         hammer_dedup_cache_inval(hmp, base_offset);
827                 error = hammer_del_buffers(hmp, base_offset,
828                                            resv->zone_offset,
829                                            HAMMER_LARGEBLOCK_SIZE,
830                                            1);
831                 if (hammer_debug_general & 0x20000) {
832                         kprintf("hammer: dellgblk %016jx error %d\n",
833                                 (intmax_t)base_offset, error);
834                 }
835                 if (error)
836                         hammer_reserve_setdelay(hmp, resv);
837         }
838         if (--resv->refs == 0) {
839                 if (hammer_debug_general & 0x20000) {
840                         kprintf("hammer: delresvr %016jx zone %02x\n",
841                                 (intmax_t)resv->zone_offset, resv->zone);
842                 }
843                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
844                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
845                 kfree(resv, hmp->m_misc);
846                 --hammer_count_reservations;
847         }
848 }
849
850 /*
851  * Prevent a potentially free big-block from being reused until after
852  * the related flushes have completely cycled, otherwise crash recovery
853  * could resurrect a data block that was already reused and overwritten.
854  *
855  * The caller might reset the underlying layer2 entry's append_off to 0, so
856  * our covering append_off must be set to max to prevent any reallocation
857  * until after the flush delays complete, not to mention proper invalidation
858  * of any underlying cached blocks.
859  */
860 static void
861 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
862                         int zone, struct hammer_blockmap_layer2 *layer2)
863 {
864         hammer_reserve_t resv;
865
866         /*
867          * Allocate the reservation if necessary.
868          *
869          * NOTE: need lock in future around resv lookup/allocation and
870          * the setdelay call, currently refs is not bumped until the call.
871          */
872 again:
873         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
874         if (resv == NULL) {
875                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
876                                M_WAITOK | M_ZERO | M_USE_RESERVE);
877                 resv->zone = zone;
878                 resv->zone_offset = base_offset;
879                 resv->refs = 0;
880                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
881
882                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
883                         resv->flags |= HAMMER_RESF_LAYER2FREE;
884                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
885                         kfree(resv, hmp->m_misc);
886                         goto again;
887                 }
888                 ++hammer_count_reservations;
889         } else {
890                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
891                         resv->flags |= HAMMER_RESF_LAYER2FREE;
892         }
893         hammer_reserve_setdelay(hmp, resv);
894 }
895
896 /*
897  * Enter the reservation on the on-delay list, or move it if it
898  * is already on the list.
899  */
900 static void
901 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
902 {
903         if (resv->flags & HAMMER_RESF_ONDELAY) {
904                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
905                 resv->flush_group = hmp->flusher.next + 1;
906                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
907         } else {
908                 ++resv->refs;
909                 ++hmp->rsv_fromdelay;
910                 resv->flags |= HAMMER_RESF_ONDELAY;
911                 resv->flush_group = hmp->flusher.next + 1;
912                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
913         }
914 }
915
916 /*
917  * Reserve has reached its flush point, remove it from the delay list
918  * and finish it off.  hammer_blockmap_reserve_complete() inherits
919  * the ondelay reference.
920  */
921 void
922 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
923 {
924         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
925         resv->flags &= ~HAMMER_RESF_ONDELAY;
926         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
927         --hmp->rsv_fromdelay;
928         hammer_blockmap_reserve_complete(hmp, resv);
929 }
930
931 /*
932  * Backend function - free (offset, bytes) in a zone.
933  *
934  * XXX error return
935  */
936 void
937 hammer_blockmap_free(hammer_transaction_t trans,
938                      hammer_off_t zone_offset, int bytes)
939 {
940         hammer_mount_t hmp;
941         hammer_volume_t root_volume;
942         hammer_blockmap_t freemap;
943         struct hammer_blockmap_layer1 *layer1;
944         struct hammer_blockmap_layer2 *layer2;
945         hammer_buffer_t buffer1 = NULL;
946         hammer_buffer_t buffer2 = NULL;
947         hammer_off_t layer1_offset;
948         hammer_off_t layer2_offset;
949         hammer_off_t base_off;
950         int error;
951         int zone;
952
953         if (bytes == 0)
954                 return;
955         hmp = trans->hmp;
956
957         /*
958          * Alignment
959          */
960         bytes = (bytes + 15) & ~15;
961         KKASSERT(bytes <= HAMMER_XBUFSIZE);
962         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
963                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
964
965         /*
966          * Basic zone validation & locking
967          */
968         zone = HAMMER_ZONE_DECODE(zone_offset);
969         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
970         root_volume = trans->rootvol;
971         error = 0;
972
973         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
974
975         /*
976          * Dive layer 1.
977          */
978         layer1_offset = freemap->phys_offset +
979                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
980         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
981         if (error)
982                 goto failed;
983         KKASSERT(layer1->phys_offset &&
984                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
985         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
986                 hammer_lock_ex(&hmp->blkmap_lock);
987                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
988                         panic("CRC FAILED: LAYER1");
989                 hammer_unlock(&hmp->blkmap_lock);
990         }
991
992         /*
993          * Dive layer 2, each entry represents a large-block.
994          */
995         layer2_offset = layer1->phys_offset +
996                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
997         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
998         if (error)
999                 goto failed;
1000         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1001                 hammer_lock_ex(&hmp->blkmap_lock);
1002                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1003                         panic("CRC FAILED: LAYER2");
1004                 hammer_unlock(&hmp->blkmap_lock);
1005         }
1006
1007         hammer_lock_ex(&hmp->blkmap_lock);
1008
1009         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1010
1011         /*
1012          * Free space previously allocated via blockmap_alloc().
1013          *
1014          * NOTE: bytes_free can be and remain negative due to de-dup ops
1015          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1016          */
1017         KKASSERT(layer2->zone == zone);
1018         layer2->bytes_free += bytes;
1019         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1020
1021         /*
1022          * If a big-block becomes entirely free we must create a covering
1023          * reservation to prevent premature reuse.  Note, however, that
1024          * the big-block and/or reservation may still have an append_off
1025          * that allows further (non-reused) allocations.
1026          *
1027          * Once the reservation has been made we re-check layer2 and if
1028          * the big-block is still entirely free we reset the layer2 entry.
1029          * The reservation will prevent premature reuse.
1030          *
1031          * NOTE: hammer_buffer's are only invalidated when the reservation
1032          * is completed, if the layer2 entry is still completely free at
1033          * that time.  Any allocations from the reservation that may have
1034          * occured in the mean time, or active references on the reservation
1035          * from new pending allocations, will prevent the invalidation from
1036          * occuring.
1037          */
1038         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1039                 base_off = hammer_xlate_to_zone2(zone_offset &
1040                                                 ~HAMMER_LARGEBLOCK_MASK64);
1041
1042                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1043                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1044                         layer2->zone = 0;
1045                         layer2->append_off = 0;
1046                         hammer_modify_buffer(trans, buffer1,
1047                                              layer1, sizeof(*layer1));
1048                         ++layer1->blocks_free;
1049                         layer1->layer1_crc = crc32(layer1,
1050                                                    HAMMER_LAYER1_CRCSIZE);
1051                         hammer_modify_buffer_done(buffer1);
1052                         hammer_modify_volume_field(trans,
1053                                         trans->rootvol,
1054                                         vol0_stat_freebigblocks);
1055                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1056                         hmp->copy_stat_freebigblocks =
1057                            root_volume->ondisk->vol0_stat_freebigblocks;
1058                         hammer_modify_volume_done(trans->rootvol);
1059                 }
1060         }
1061         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1062         hammer_modify_buffer_done(buffer2);
1063         hammer_unlock(&hmp->blkmap_lock);
1064
1065 failed:
1066         if (buffer1)
1067                 hammer_rel_buffer(buffer1, 0);
1068         if (buffer2)
1069                 hammer_rel_buffer(buffer2, 0);
1070 }
1071
1072 int
1073 hammer_blockmap_dedup(hammer_transaction_t trans,
1074                      hammer_off_t zone_offset, int bytes)
1075 {
1076         hammer_mount_t hmp;
1077         hammer_blockmap_t freemap;
1078         struct hammer_blockmap_layer1 *layer1;
1079         struct hammer_blockmap_layer2 *layer2;
1080         hammer_buffer_t buffer1 = NULL;
1081         hammer_buffer_t buffer2 = NULL;
1082         hammer_off_t layer1_offset;
1083         hammer_off_t layer2_offset;
1084         int32_t temp;
1085         int error;
1086         int zone __debugvar;
1087
1088         if (bytes == 0)
1089                 return (0);
1090         hmp = trans->hmp;
1091
1092         /*
1093          * Alignment
1094          */
1095         bytes = (bytes + 15) & ~15;
1096         KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1097         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1098                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
1099
1100         /*
1101          * Basic zone validation & locking
1102          */
1103         zone = HAMMER_ZONE_DECODE(zone_offset);
1104         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1105         error = 0;
1106
1107         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1108
1109         /*
1110          * Dive layer 1.
1111          */
1112         layer1_offset = freemap->phys_offset +
1113                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1114         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1115         if (error)
1116                 goto failed;
1117         KKASSERT(layer1->phys_offset &&
1118                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1119         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1120                 hammer_lock_ex(&hmp->blkmap_lock);
1121                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1122                         panic("CRC FAILED: LAYER1");
1123                 hammer_unlock(&hmp->blkmap_lock);
1124         }
1125
1126         /*
1127          * Dive layer 2, each entry represents a large-block.
1128          */
1129         layer2_offset = layer1->phys_offset +
1130                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1131         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1132         if (error)
1133                 goto failed;
1134         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1135                 hammer_lock_ex(&hmp->blkmap_lock);
1136                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1137                         panic("CRC FAILED: LAYER2");
1138                 hammer_unlock(&hmp->blkmap_lock);
1139         }
1140
1141         hammer_lock_ex(&hmp->blkmap_lock);
1142
1143         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1144
1145         /*
1146          * Free space previously allocated via blockmap_alloc().
1147          *
1148          * NOTE: bytes_free can be and remain negative due to de-dup ops
1149          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1150          */
1151         KKASSERT(layer2->zone == zone);
1152         temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1153         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1154         if (temp > layer2->bytes_free) {
1155                 error = ERANGE;
1156                 goto underflow;
1157         }
1158         layer2->bytes_free -= bytes;
1159
1160         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1161
1162         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1163 underflow:
1164         hammer_modify_buffer_done(buffer2);
1165         hammer_unlock(&hmp->blkmap_lock);
1166
1167 failed:
1168         if (buffer1)
1169                 hammer_rel_buffer(buffer1, 0);
1170         if (buffer2)
1171                 hammer_rel_buffer(buffer2, 0);
1172         return (error);
1173 }
1174
1175 /*
1176  * Backend function - finalize (offset, bytes) in a zone.
1177  *
1178  * Allocate space that was previously reserved by the frontend.
1179  */
1180 int
1181 hammer_blockmap_finalize(hammer_transaction_t trans,
1182                          hammer_reserve_t resv,
1183                          hammer_off_t zone_offset, int bytes)
1184 {
1185         hammer_mount_t hmp;
1186         hammer_volume_t root_volume;
1187         hammer_blockmap_t freemap;
1188         struct hammer_blockmap_layer1 *layer1;
1189         struct hammer_blockmap_layer2 *layer2;
1190         hammer_buffer_t buffer1 = NULL;
1191         hammer_buffer_t buffer2 = NULL;
1192         hammer_off_t layer1_offset;
1193         hammer_off_t layer2_offset;
1194         int error;
1195         int zone;
1196         int offset;
1197
1198         if (bytes == 0)
1199                 return(0);
1200         hmp = trans->hmp;
1201
1202         /*
1203          * Alignment
1204          */
1205         bytes = (bytes + 15) & ~15;
1206         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1207
1208         /*
1209          * Basic zone validation & locking
1210          */
1211         zone = HAMMER_ZONE_DECODE(zone_offset);
1212         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1213         root_volume = trans->rootvol;
1214         error = 0;
1215
1216         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1217
1218         /*
1219          * Dive layer 1.
1220          */
1221         layer1_offset = freemap->phys_offset +
1222                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1223         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1224         if (error)
1225                 goto failed;
1226         KKASSERT(layer1->phys_offset &&
1227                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1228         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1229                 hammer_lock_ex(&hmp->blkmap_lock);
1230                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1231                         panic("CRC FAILED: LAYER1");
1232                 hammer_unlock(&hmp->blkmap_lock);
1233         }
1234
1235         /*
1236          * Dive layer 2, each entry represents a large-block.
1237          */
1238         layer2_offset = layer1->phys_offset +
1239                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1240         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1241         if (error)
1242                 goto failed;
1243         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1244                 hammer_lock_ex(&hmp->blkmap_lock);
1245                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1246                         panic("CRC FAILED: LAYER2");
1247                 hammer_unlock(&hmp->blkmap_lock);
1248         }
1249
1250         hammer_lock_ex(&hmp->blkmap_lock);
1251
1252         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1253
1254         /*
1255          * Finalize some or all of the space covered by a current
1256          * reservation.  An allocation in the same layer may have
1257          * already assigned ownership.
1258          */
1259         if (layer2->zone == 0) {
1260                 hammer_modify_buffer(trans, buffer1,
1261                                      layer1, sizeof(*layer1));
1262                 --layer1->blocks_free;
1263                 layer1->layer1_crc = crc32(layer1,
1264                                            HAMMER_LAYER1_CRCSIZE);
1265                 hammer_modify_buffer_done(buffer1);
1266                 layer2->zone = zone;
1267                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1268                 KKASSERT(layer2->append_off == 0);
1269                 hammer_modify_volume_field(trans,
1270                                 trans->rootvol,
1271                                 vol0_stat_freebigblocks);
1272                 --root_volume->ondisk->vol0_stat_freebigblocks;
1273                 hmp->copy_stat_freebigblocks =
1274                    root_volume->ondisk->vol0_stat_freebigblocks;
1275                 hammer_modify_volume_done(trans->rootvol);
1276         }
1277         if (layer2->zone != zone)
1278                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1279         KKASSERT(layer2->zone == zone);
1280         KKASSERT(bytes != 0);
1281         layer2->bytes_free -= bytes;
1282
1283         if (resv) {
1284                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1285         }
1286
1287         /*
1288          * Finalizations can occur out of order, or combined with allocations.
1289          * append_off must be set to the highest allocated offset.
1290          */
1291         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1292         if (layer2->append_off < offset)
1293                 layer2->append_off = offset;
1294
1295         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1296         hammer_modify_buffer_done(buffer2);
1297         hammer_unlock(&hmp->blkmap_lock);
1298
1299 failed:
1300         if (buffer1)
1301                 hammer_rel_buffer(buffer1, 0);
1302         if (buffer2)
1303                 hammer_rel_buffer(buffer2, 0);
1304         return(error);
1305 }
1306
1307 /*
1308  * Return the approximate number of free bytes in the big-block
1309  * containing the specified blockmap offset.
1310  *
1311  * WARNING: A negative number can be returned if data de-dup exists,
1312  *          and the result will also not represent he actual number
1313  *          of free bytes in this case.
1314  *
1315  *          This code is used only by the reblocker.
1316  */
1317 int
1318 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1319                         int *curp, int *errorp)
1320 {
1321         hammer_volume_t root_volume;
1322         hammer_blockmap_t blockmap;
1323         hammer_blockmap_t freemap;
1324         struct hammer_blockmap_layer1 *layer1;
1325         struct hammer_blockmap_layer2 *layer2;
1326         hammer_buffer_t buffer = NULL;
1327         hammer_off_t layer1_offset;
1328         hammer_off_t layer2_offset;
1329         int32_t bytes;
1330         int zone;
1331
1332         zone = HAMMER_ZONE_DECODE(zone_offset);
1333         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1334         root_volume = hammer_get_root_volume(hmp, errorp);
1335         if (*errorp) {
1336                 *curp = 0;
1337                 return(0);
1338         }
1339         blockmap = &hmp->blockmap[zone];
1340         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1341
1342         /*
1343          * Dive layer 1.
1344          */
1345         layer1_offset = freemap->phys_offset +
1346                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1347         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1348         if (*errorp) {
1349                 bytes = 0;
1350                 goto failed;
1351         }
1352         KKASSERT(layer1->phys_offset);
1353         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1354                 hammer_lock_ex(&hmp->blkmap_lock);
1355                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1356                         panic("CRC FAILED: LAYER1");
1357                 hammer_unlock(&hmp->blkmap_lock);
1358         }
1359
1360         /*
1361          * Dive layer 2, each entry represents a large-block.
1362          *
1363          * (reuse buffer, layer1 pointer becomes invalid)
1364          */
1365         layer2_offset = layer1->phys_offset +
1366                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1367         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1368         if (*errorp) {
1369                 bytes = 0;
1370                 goto failed;
1371         }
1372         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1373                 hammer_lock_ex(&hmp->blkmap_lock);
1374                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1375                         panic("CRC FAILED: LAYER2");
1376                 hammer_unlock(&hmp->blkmap_lock);
1377         }
1378         KKASSERT(layer2->zone == zone);
1379
1380         bytes = layer2->bytes_free;
1381
1382         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1383                 *curp = 0;
1384         else
1385                 *curp = 1;
1386 failed:
1387         if (buffer)
1388                 hammer_rel_buffer(buffer, 0);
1389         hammer_rel_volume(root_volume, 0);
1390         if (hammer_debug_general & 0x0800) {
1391                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1392                         (long long)zone_offset, bytes);
1393         }
1394         return(bytes);
1395 }
1396
1397
1398 /*
1399  * Lookup a blockmap offset and verify blockmap layers.
1400  */
1401 hammer_off_t
1402 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1403                         int *errorp)
1404 {
1405         hammer_volume_t root_volume;
1406         hammer_blockmap_t freemap;
1407         struct hammer_blockmap_layer1 *layer1;
1408         struct hammer_blockmap_layer2 *layer2;
1409         hammer_buffer_t buffer = NULL;
1410         hammer_off_t layer1_offset;
1411         hammer_off_t layer2_offset;
1412         hammer_off_t result_offset;
1413         hammer_off_t base_off;
1414         hammer_reserve_t resv __debugvar;
1415         int zone;
1416
1417         /*
1418          * Calculate the zone-2 offset.
1419          */
1420         zone = HAMMER_ZONE_DECODE(zone_offset);
1421         result_offset = hammer_xlate_to_zone2(zone_offset);
1422
1423         /*
1424          * Validate the allocation zone
1425          */
1426         root_volume = hammer_get_root_volume(hmp, errorp);
1427         if (*errorp)
1428                 return(0);
1429         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1430         KKASSERT(freemap->phys_offset != 0);
1431
1432         /*
1433          * Dive layer 1.
1434          */
1435         layer1_offset = freemap->phys_offset +
1436                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1437         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1438         if (*errorp)
1439                 goto failed;
1440         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1441         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1442                 hammer_lock_ex(&hmp->blkmap_lock);
1443                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1444                         panic("CRC FAILED: LAYER1");
1445                 hammer_unlock(&hmp->blkmap_lock);
1446         }
1447
1448         /*
1449          * Dive layer 2, each entry represents a large-block.
1450          */
1451         layer2_offset = layer1->phys_offset +
1452                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1453         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1454
1455         if (*errorp)
1456                 goto failed;
1457         if (layer2->zone == 0) {
1458                 base_off = hammer_xlate_to_zone2(zone_offset &
1459                                                 ~HAMMER_LARGEBLOCK_MASK64);
1460                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1461                                  base_off);
1462                 KKASSERT(resv && resv->zone == zone);
1463
1464         } else if (layer2->zone != zone) {
1465                 panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1466                         layer2->zone, zone);
1467         }
1468         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1469                 hammer_lock_ex(&hmp->blkmap_lock);
1470                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1471                         panic("CRC FAILED: LAYER2");
1472                 hammer_unlock(&hmp->blkmap_lock);
1473         }
1474
1475 failed:
1476         if (buffer)
1477                 hammer_rel_buffer(buffer, 0);
1478         hammer_rel_volume(root_volume, 0);
1479         if (hammer_debug_general & 0x0800) {
1480                 kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1481                         (long long)zone_offset, (long long)result_offset);
1482         }
1483         return(result_offset);
1484 }
1485
1486
1487 /*
1488  * Check space availability
1489  *
1490  * MPSAFE - does not require fs_token
1491  */
1492 int
1493 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1494 {
1495         const int in_size = sizeof(struct hammer_inode_data) +
1496                             sizeof(union hammer_btree_elm);
1497         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1498         int64_t usedbytes;
1499
1500         usedbytes = hmp->rsv_inodes * in_size +
1501                     hmp->rsv_recs * rec_size +
1502                     hmp->rsv_databytes +
1503                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1504                     ((int64_t)hammer_limit_dirtybufspace) +
1505                     (slop << HAMMER_LARGEBLOCK_BITS);
1506
1507         hammer_count_extra_space_used = usedbytes;      /* debugging */
1508         if (resp)
1509                 *resp = usedbytes;
1510
1511         if (hmp->copy_stat_freebigblocks >=
1512             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1513                 return(0);
1514         }
1515         return (ENOSPC);
1516 }
1517