702b846ed6d68fd79a577f0b3ecc1bdb189a68dc
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55              hammer_res_rb_compare, hammer_off_t, zone_offset);
56
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60         if (res1->zone_offset < res2->zone_offset)
61                 return(-1);
62         if (res1->zone_offset > res2->zone_offset)
63                 return(1);
64         return(0);
65 }
66
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72                       hammer_off_t hint, int *errorp)
73 {
74         hammer_mount_t hmp;
75         hammer_volume_t root_volume;
76         hammer_blockmap_t blockmap;
77         hammer_blockmap_t freemap;
78         hammer_reserve_t resv;
79         struct hammer_blockmap_layer1 *layer1;
80         struct hammer_blockmap_layer2 *layer2;
81         hammer_buffer_t buffer1 = NULL;
82         hammer_buffer_t buffer2 = NULL;
83         hammer_buffer_t buffer3 = NULL;
84         hammer_off_t tmp_offset;
85         hammer_off_t next_offset;
86         hammer_off_t result_offset;
87         hammer_off_t layer1_offset;
88         hammer_off_t layer2_offset;
89         hammer_off_t base_off;
90         int loops = 0;
91         int offset;             /* offset within big-block */
92         int use_hint;
93
94         hmp = trans->hmp;
95
96         /*
97          * Deal with alignment and buffer-boundary issues.
98          *
99          * Be careful, certain primary alignments are used below to allocate
100          * new blockmap blocks.
101          */
102         bytes = (bytes + 15) & ~15;
103         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
105
106         /*
107          * Setup
108          */
109         root_volume = trans->rootvol;
110         *errorp = 0;
111         blockmap = &hmp->blockmap[zone];
112         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114
115         /*
116          * Use the hint if we have one.
117          */
118         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119                 next_offset = (hint + 15) & ~(hammer_off_t)15;
120                 use_hint = 1;
121         } else {
122                 next_offset = blockmap->next_offset;
123                 use_hint = 0;
124         }
125 again:
126
127         /*
128          * use_hint is turned off if we leave the hinted big-block.
129          */
130         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131                 next_offset = blockmap->next_offset;
132                 use_hint = 0;
133         }
134
135         /*
136          * Check for wrap
137          */
138         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139                 if (++loops == 2) {
140                         result_offset = 0;
141                         *errorp = ENOSPC;
142                         goto failed;
143                 }
144                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
145         }
146
147         /*
148          * The allocation request may not cross a buffer boundary.  Special
149          * large allocations must not cross a big-block boundary.
150          */
151         tmp_offset = next_offset + bytes - 1;
152         if (bytes <= HAMMER_BUFSIZE) {
153                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
154                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
155                         goto again;
156                 }
157         } else {
158                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
159                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
160                         goto again;
161                 }
162         }
163         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
164
165         /*
166          * Dive layer 1.
167          */
168         layer1_offset = freemap->phys_offset +
169                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
170
171         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
172         if (*errorp) {
173                 result_offset = 0;
174                 goto failed;
175         }
176
177         /*
178          * Check CRC.
179          */
180         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
181                 hammer_lock_ex(&hmp->blkmap_lock);
182                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
183                         hpanic("CRC FAILED: LAYER1");
184                 hammer_unlock(&hmp->blkmap_lock);
185         }
186
187         /*
188          * If we are at a big-block boundary and layer1 indicates no
189          * free big-blocks, then we cannot allocate a new big-block in
190          * layer2, skip to the next layer1 entry.
191          */
192         if (offset == 0 && layer1->blocks_free == 0) {
193                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
194                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
195                 if (hammer_check_volume(hmp, &next_offset)) {
196                         result_offset = 0;
197                         goto failed;
198                 }
199                 goto again;
200         }
201         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
202
203         /*
204          * Skip the whole volume if it is pointing to a layer2 big-block
205          * on a volume that we are currently trying to remove from the
206          * file-system. This is used by the volume-del code together with
207          * the reblocker to free up a volume.
208          */
209         if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
210                 hammer_skip_volume(&next_offset);
211                 goto again;
212         }
213
214         /*
215          * Dive layer 2, each entry represents a big-block.
216          */
217         layer2_offset = layer1->phys_offset +
218                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
219         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
220         if (*errorp) {
221                 result_offset = 0;
222                 goto failed;
223         }
224
225         /*
226          * Check CRC.  This can race another thread holding the lock
227          * and in the middle of modifying layer2.
228          */
229         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
230                 hammer_lock_ex(&hmp->blkmap_lock);
231                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
232                         hpanic("CRC FAILED: LAYER2");
233                 hammer_unlock(&hmp->blkmap_lock);
234         }
235
236         /*
237          * Skip the layer if the zone is owned by someone other then us.
238          */
239         if (layer2->zone && layer2->zone != zone) {
240                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
241                 goto again;
242         }
243         if (offset < layer2->append_off) {
244                 next_offset += layer2->append_off - offset;
245                 goto again;
246         }
247
248 #if 0
249         /*
250          * If operating in the current non-hint blockmap block, do not
251          * allow it to get over-full.  Also drop any active hinting so
252          * blockmap->next_offset is updated at the end.
253          *
254          * We do this for B-Tree and meta-data allocations to provide
255          * localization for updates.
256          */
257         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
258              zone == HAMMER_ZONE_META_INDEX) &&
259             offset >= HAMMER_BIGBLOCK_OVERFILL &&
260             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
261                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
262                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
263                         use_hint = 0;
264                         goto again;
265                 }
266         }
267 #endif
268
269         /*
270          * We need the lock from this point on.  We have to re-check zone
271          * ownership after acquiring the lock and also check for reservations.
272          */
273         hammer_lock_ex(&hmp->blkmap_lock);
274
275         if (layer2->zone && layer2->zone != zone) {
276                 hammer_unlock(&hmp->blkmap_lock);
277                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
278                 goto again;
279         }
280         if (offset < layer2->append_off) {
281                 hammer_unlock(&hmp->blkmap_lock);
282                 next_offset += layer2->append_off - offset;
283                 goto again;
284         }
285
286         /*
287          * The big-block might be reserved by another zone.  If it is reserved
288          * by our zone we may have to move next_offset past the append_off.
289          */
290         base_off = hammer_xlate_to_zone2(next_offset &
291                                         ~HAMMER_BIGBLOCK_MASK64);
292         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293         if (resv) {
294                 if (resv->zone != zone) {
295                         hammer_unlock(&hmp->blkmap_lock);
296                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
297                                       ~HAMMER_BIGBLOCK_MASK64;
298                         goto again;
299                 }
300                 if (offset < resv->append_off) {
301                         hammer_unlock(&hmp->blkmap_lock);
302                         next_offset += resv->append_off - offset;
303                         goto again;
304                 }
305                 ++resv->refs;
306         }
307
308         /*
309          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
310          * of the layer for real.  At this point we've validated any
311          * reservation that might exist and can just ignore resv.
312          */
313         if (layer2->zone == 0) {
314                 /*
315                  * Assign the big-block to our zone
316                  */
317                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
318                 --layer1->blocks_free;
319                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
320                 hammer_modify_buffer_done(buffer1);
321                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
322                 layer2->zone = zone;
323                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
324                 KKASSERT(layer2->append_off == 0);
325                 hammer_modify_volume_field(trans, trans->rootvol,
326                                            vol0_stat_freebigblocks);
327                 --root_volume->ondisk->vol0_stat_freebigblocks;
328                 hmp->copy_stat_freebigblocks =
329                         root_volume->ondisk->vol0_stat_freebigblocks;
330                 hammer_modify_volume_done(trans->rootvol);
331         } else {
332                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
333         }
334         KKASSERT(layer2->zone == zone);
335
336         /*
337          * NOTE: bytes_free can legally go negative due to de-dup.
338          */
339         layer2->bytes_free -= bytes;
340         KKASSERT(layer2->append_off <= offset);
341         layer2->append_off = offset + bytes;
342         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
343         hammer_modify_buffer_done(buffer2);
344
345         /*
346          * We hold the blockmap lock and should be the only ones
347          * capable of modifying resv->append_off.  Track the allocation
348          * as appropriate.
349          */
350         KKASSERT(bytes != 0);
351         if (resv) {
352                 KKASSERT(resv->append_off <= offset);
353                 resv->append_off = offset + bytes;
354                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
355                 hammer_blockmap_reserve_complete(hmp, resv);
356         }
357
358         /*
359          * If we are allocating from the base of a new buffer we can avoid
360          * a disk read by calling hammer_bnew_ext().
361          */
362         if ((next_offset & HAMMER_BUFMASK) == 0) {
363                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
364                                 errorp, &buffer3);
365                 if (*errorp) {
366                         result_offset = 0;
367                         goto failed;
368                 }
369         }
370         result_offset = next_offset;
371
372         /*
373          * If we weren't supplied with a hint or could not use the hint
374          * then we wound up using blockmap->next_offset as the hint and
375          * need to save it.
376          */
377         if (use_hint == 0) {
378                 hammer_modify_volume_noundo(NULL, root_volume);
379                 blockmap->next_offset = next_offset + bytes;
380                 hammer_modify_volume_done(root_volume);
381         }
382         hammer_unlock(&hmp->blkmap_lock);
383 failed:
384
385         /*
386          * Cleanup
387          */
388         if (buffer1)
389                 hammer_rel_buffer(buffer1, 0);
390         if (buffer2)
391                 hammer_rel_buffer(buffer2, 0);
392         if (buffer3)
393                 hammer_rel_buffer(buffer3, 0);
394
395         return(result_offset);
396 }
397
398 /*
399  * Frontend function - Reserve bytes in a zone.
400  *
401  * This code reserves bytes out of a blockmap without committing to any
402  * meta-data modifications, allowing the front-end to directly issue disk
403  * write I/O for big-blocks of data
404  *
405  * The backend later finalizes the reservation with hammer_blockmap_finalize()
406  * upon committing the related record.
407  */
408 hammer_reserve_t
409 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
410                         hammer_off_t *zone_offp, int *errorp)
411 {
412         hammer_volume_t root_volume;
413         hammer_blockmap_t blockmap;
414         hammer_blockmap_t freemap;
415         struct hammer_blockmap_layer1 *layer1;
416         struct hammer_blockmap_layer2 *layer2;
417         hammer_buffer_t buffer1 = NULL;
418         hammer_buffer_t buffer2 = NULL;
419         hammer_buffer_t buffer3 = NULL;
420         hammer_off_t tmp_offset;
421         hammer_off_t next_offset;
422         hammer_off_t layer1_offset;
423         hammer_off_t layer2_offset;
424         hammer_off_t base_off;
425         hammer_reserve_t resv;
426         hammer_reserve_t resx;
427         int loops = 0;
428         int offset;
429
430         /*
431          * Setup
432          */
433         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
434         root_volume = hammer_get_root_volume(hmp, errorp);
435         if (*errorp)
436                 return(NULL);
437         blockmap = &hmp->blockmap[zone];
438         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
439         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
440
441         /*
442          * Deal with alignment and buffer-boundary issues.
443          *
444          * Be careful, certain primary alignments are used below to allocate
445          * new blockmap blocks.
446          */
447         bytes = (bytes + 15) & ~15;
448         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
449
450         next_offset = blockmap->next_offset;
451 again:
452         resv = NULL;
453         /*
454          * Check for wrap
455          */
456         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
457                 if (++loops == 2) {
458                         *errorp = ENOSPC;
459                         goto failed;
460                 }
461                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
462         }
463
464         /*
465          * The allocation request may not cross a buffer boundary.  Special
466          * large allocations must not cross a big-block boundary.
467          */
468         tmp_offset = next_offset + bytes - 1;
469         if (bytes <= HAMMER_BUFSIZE) {
470                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
471                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
472                         goto again;
473                 }
474         } else {
475                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
476                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
477                         goto again;
478                 }
479         }
480         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
481
482         /*
483          * Dive layer 1.
484          */
485         layer1_offset = freemap->phys_offset +
486                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
487         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
488         if (*errorp)
489                 goto failed;
490
491         /*
492          * Check CRC.
493          */
494         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
495                 hammer_lock_ex(&hmp->blkmap_lock);
496                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
497                         hpanic("CRC FAILED: LAYER1");
498                 hammer_unlock(&hmp->blkmap_lock);
499         }
500
501         /*
502          * If we are at a big-block boundary and layer1 indicates no
503          * free big-blocks, then we cannot allocate a new big-block in
504          * layer2, skip to the next layer1 entry.
505          */
506         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
507             layer1->blocks_free == 0) {
508                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
509                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
510                 if (hammer_check_volume(hmp, &next_offset))
511                         goto failed;
512                 goto again;
513         }
514         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
515
516         /*
517          * Dive layer 2, each entry represents a big-block.
518          */
519         layer2_offset = layer1->phys_offset +
520                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
521         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
522         if (*errorp)
523                 goto failed;
524
525         /*
526          * Check CRC if not allocating into uninitialized space (which we
527          * aren't when reserving space).
528          */
529         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
530                 hammer_lock_ex(&hmp->blkmap_lock);
531                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
532                         hpanic("CRC FAILED: LAYER2");
533                 hammer_unlock(&hmp->blkmap_lock);
534         }
535
536         /*
537          * Skip the layer if the zone is owned by someone other then us.
538          */
539         if (layer2->zone && layer2->zone != zone) {
540                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
541                 goto again;
542         }
543         if (offset < layer2->append_off) {
544                 next_offset += layer2->append_off - offset;
545                 goto again;
546         }
547
548         /*
549          * We need the lock from this point on.  We have to re-check zone
550          * ownership after acquiring the lock and also check for reservations.
551          */
552         hammer_lock_ex(&hmp->blkmap_lock);
553
554         if (layer2->zone && layer2->zone != zone) {
555                 hammer_unlock(&hmp->blkmap_lock);
556                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
557                 goto again;
558         }
559         if (offset < layer2->append_off) {
560                 hammer_unlock(&hmp->blkmap_lock);
561                 next_offset += layer2->append_off - offset;
562                 goto again;
563         }
564
565         /*
566          * The big-block might be reserved by another zone.  If it is reserved
567          * by our zone we may have to move next_offset past the append_off.
568          */
569         base_off = hammer_xlate_to_zone2(next_offset &
570                                         ~HAMMER_BIGBLOCK_MASK64);
571         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
572         if (resv) {
573                 if (resv->zone != zone) {
574                         hammer_unlock(&hmp->blkmap_lock);
575                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
576                                       ~HAMMER_BIGBLOCK_MASK64;
577                         goto again;
578                 }
579                 if (offset < resv->append_off) {
580                         hammer_unlock(&hmp->blkmap_lock);
581                         next_offset += resv->append_off - offset;
582                         goto again;
583                 }
584                 ++resv->refs;
585                 resx = NULL;
586         } else {
587                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
588                                M_WAITOK | M_ZERO | M_USE_RESERVE);
589                 resx->refs = 1;
590                 resx->zone = zone;
591                 resx->zone_offset = base_off;
592                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
593                         resx->flags |= HAMMER_RESF_LAYER2FREE;
594                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
595                 KKASSERT(resv == NULL);
596                 resv = resx;
597                 ++hammer_count_reservations;
598         }
599         resv->append_off = offset + bytes;
600
601         /*
602          * If we are not reserving a whole buffer but are at the start of
603          * a new block, call hammer_bnew() to avoid a disk read.
604          *
605          * If we are reserving a whole buffer (or more), the caller will
606          * probably use a direct read, so do nothing.
607          *
608          * If we do not have a whole lot of system memory we really can't
609          * afford to block while holding the blkmap_lock!
610          */
611         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
612                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
613                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
614                         if (*errorp)
615                                 goto failed;
616                 }
617         }
618
619         /*
620          * Adjust our iterator and alloc_offset.  The layer1 and layer2
621          * space beyond alloc_offset is uninitialized.  alloc_offset must
622          * be big-block aligned.
623          */
624         blockmap->next_offset = next_offset + bytes;
625         hammer_unlock(&hmp->blkmap_lock);
626
627 failed:
628         if (buffer1)
629                 hammer_rel_buffer(buffer1, 0);
630         if (buffer2)
631                 hammer_rel_buffer(buffer2, 0);
632         if (buffer3)
633                 hammer_rel_buffer(buffer3, 0);
634         hammer_rel_volume(root_volume, 0);
635         *zone_offp = next_offset;
636
637         return(resv);
638 }
639
640 /*
641  * Frontend function - Dedup bytes in a zone.
642  *
643  * Dedup reservations work exactly the same as normal write reservations
644  * except we only adjust bytes_free field and don't touch append offset.
645  * Finalization mechanic for dedup reservations is also the same as for
646  * normal write ones - the backend finalizes the reservation with
647  * hammer_blockmap_finalize().
648  */
649 hammer_reserve_t
650 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
651                               hammer_off_t zone_offset, int *errorp)
652 {
653         hammer_volume_t root_volume;
654         hammer_blockmap_t freemap;
655         struct hammer_blockmap_layer1 *layer1;
656         struct hammer_blockmap_layer2 *layer2;
657         hammer_buffer_t buffer1 = NULL;
658         hammer_buffer_t buffer2 = NULL;
659         hammer_off_t layer1_offset;
660         hammer_off_t layer2_offset;
661         hammer_off_t base_off;
662         hammer_reserve_t resv = NULL;
663         hammer_reserve_t resx = NULL;
664
665         /*
666          * Setup
667          */
668         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
669         root_volume = hammer_get_root_volume(hmp, errorp);
670         if (*errorp)
671                 return (NULL);
672         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
673         KKASSERT(freemap->phys_offset != 0);
674
675         bytes = (bytes + 15) & ~15;
676         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
677
678         /*
679          * Dive layer 1.
680          */
681         layer1_offset = freemap->phys_offset +
682                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
683         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
684         if (*errorp)
685                 goto failed;
686
687         /*
688          * Check CRC.
689          */
690         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
691                 hammer_lock_ex(&hmp->blkmap_lock);
692                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
693                         hpanic("CRC FAILED: LAYER1");
694                 hammer_unlock(&hmp->blkmap_lock);
695         }
696         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
697
698         /*
699          * Dive layer 2, each entry represents a big-block.
700          */
701         layer2_offset = layer1->phys_offset +
702                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
703         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
704         if (*errorp)
705                 goto failed;
706
707         /*
708          * Check CRC.
709          */
710         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
711                 hammer_lock_ex(&hmp->blkmap_lock);
712                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
713                         hpanic("CRC FAILED: LAYER2");
714                 hammer_unlock(&hmp->blkmap_lock);
715         }
716
717         /*
718          * Fail if the zone is owned by someone other than us.
719          */
720         if (layer2->zone && layer2->zone != zone)
721                 goto failed;
722
723         /*
724          * We need the lock from this point on.  We have to re-check zone
725          * ownership after acquiring the lock and also check for reservations.
726          */
727         hammer_lock_ex(&hmp->blkmap_lock);
728
729         if (layer2->zone && layer2->zone != zone) {
730                 hammer_unlock(&hmp->blkmap_lock);
731                 goto failed;
732         }
733
734         base_off = hammer_xlate_to_zone2(zone_offset &
735                                         ~HAMMER_BIGBLOCK_MASK64);
736         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
737         if (resv) {
738                 if (resv->zone != zone) {
739                         hammer_unlock(&hmp->blkmap_lock);
740                         resv = NULL;
741                         goto failed;
742                 }
743                 /*
744                  * Due to possible big-block underflow we can't simply
745                  * subtract bytes from bytes_free.
746                  */
747                 if (update_bytes_free(resv, bytes) == 0) {
748                         hammer_unlock(&hmp->blkmap_lock);
749                         resv = NULL;
750                         goto failed;
751                 }
752                 ++resv->refs;
753                 resx = NULL;
754         } else {
755                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
756                                M_WAITOK | M_ZERO | M_USE_RESERVE);
757                 resx->refs = 1;
758                 resx->zone = zone;
759                 resx->bytes_free = layer2->bytes_free;
760                 /*
761                  * Due to possible big-block underflow we can't simply
762                  * subtract bytes from bytes_free.
763                  */
764                 if (update_bytes_free(resx, bytes) == 0) {
765                         hammer_unlock(&hmp->blkmap_lock);
766                         kfree(resx, hmp->m_misc);
767                         goto failed;
768                 }
769                 resx->zone_offset = base_off;
770                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
771                 KKASSERT(resv == NULL);
772                 resv = resx;
773                 ++hammer_count_reservations;
774         }
775
776         hammer_unlock(&hmp->blkmap_lock);
777
778 failed:
779         if (buffer1)
780                 hammer_rel_buffer(buffer1, 0);
781         if (buffer2)
782                 hammer_rel_buffer(buffer2, 0);
783         hammer_rel_volume(root_volume, 0);
784
785         return(resv);
786 }
787
788 static int
789 update_bytes_free(hammer_reserve_t resv, int bytes)
790 {
791         int32_t temp;
792
793         /*
794          * Big-block underflow check
795          */
796         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
797         cpu_ccfence(); /* XXX do we really need it ? */
798         if (temp > resv->bytes_free) {
799                 hdkprintf("BIGBLOCK UNDERFLOW\n");
800                 return (0);
801         }
802
803         resv->bytes_free -= bytes;
804         return (1);
805 }
806
807 /*
808  * Dereference a reservation structure.  Upon the final release the
809  * underlying big-block is checked and if it is entirely free we delete
810  * any related HAMMER buffers to avoid potential conflicts with future
811  * reuse of the big-block.
812  */
813 void
814 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
815 {
816         hammer_off_t base_offset;
817         int error;
818
819         KKASSERT(resv->refs > 0);
820         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
821                  HAMMER_ZONE_RAW_BUFFER);
822
823         /*
824          * Setting append_off to the max prevents any new allocations
825          * from occuring while we are trying to dispose of the reservation,
826          * allowing us to safely delete any related HAMMER buffers.
827          *
828          * If we are unable to clean out all related HAMMER buffers we
829          * requeue the delay.
830          */
831         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
832                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
833                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
834                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
835                         hammer_dedup_cache_inval(hmp, base_offset);
836                 error = hammer_del_buffers(hmp, base_offset,
837                                            resv->zone_offset,
838                                            HAMMER_BIGBLOCK_SIZE,
839                                            1);
840                 if (hammer_debug_general & 0x20000) {
841                         hkprintf("delbgblk %016jx error %d\n",
842                                 (intmax_t)base_offset, error);
843                 }
844                 if (error)
845                         hammer_reserve_setdelay(hmp, resv);
846         }
847         if (--resv->refs == 0) {
848                 if (hammer_debug_general & 0x20000) {
849                         hkprintf("delresvr %016jx zone %02x\n",
850                                 (intmax_t)resv->zone_offset, resv->zone);
851                 }
852                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
853                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
854                 kfree(resv, hmp->m_misc);
855                 --hammer_count_reservations;
856         }
857 }
858
859 /*
860  * Prevent a potentially free big-block from being reused until after
861  * the related flushes have completely cycled, otherwise crash recovery
862  * could resurrect a data block that was already reused and overwritten.
863  *
864  * The caller might reset the underlying layer2 entry's append_off to 0, so
865  * our covering append_off must be set to max to prevent any reallocation
866  * until after the flush delays complete, not to mention proper invalidation
867  * of any underlying cached blocks.
868  */
869 static void
870 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
871                         int zone, struct hammer_blockmap_layer2 *layer2)
872 {
873         hammer_reserve_t resv;
874
875         /*
876          * Allocate the reservation if necessary.
877          *
878          * NOTE: need lock in future around resv lookup/allocation and
879          * the setdelay call, currently refs is not bumped until the call.
880          */
881 again:
882         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
883         if (resv == NULL) {
884                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
885                                M_WAITOK | M_ZERO | M_USE_RESERVE);
886                 resv->zone = zone;
887                 resv->zone_offset = base_offset;
888                 resv->refs = 0;
889                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
890
891                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
892                         resv->flags |= HAMMER_RESF_LAYER2FREE;
893                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
894                         kfree(resv, hmp->m_misc);
895                         goto again;
896                 }
897                 ++hammer_count_reservations;
898         } else {
899                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
900                         resv->flags |= HAMMER_RESF_LAYER2FREE;
901         }
902         hammer_reserve_setdelay(hmp, resv);
903 }
904
905 /*
906  * Enter the reservation on the on-delay list, or move it if it
907  * is already on the list.
908  */
909 static void
910 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
911 {
912         if (resv->flags & HAMMER_RESF_ONDELAY) {
913                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
914                 resv->flush_group = hmp->flusher.next + 1;
915                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
916         } else {
917                 ++resv->refs;
918                 ++hmp->rsv_fromdelay;
919                 resv->flags |= HAMMER_RESF_ONDELAY;
920                 resv->flush_group = hmp->flusher.next + 1;
921                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
922         }
923 }
924
925 /*
926  * Reserve has reached its flush point, remove it from the delay list
927  * and finish it off.  hammer_blockmap_reserve_complete() inherits
928  * the ondelay reference.
929  */
930 void
931 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
932 {
933         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
934         resv->flags &= ~HAMMER_RESF_ONDELAY;
935         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
936         --hmp->rsv_fromdelay;
937         hammer_blockmap_reserve_complete(hmp, resv);
938 }
939
940 /*
941  * Backend function - free (offset, bytes) in a zone.
942  *
943  * XXX error return
944  */
945 void
946 hammer_blockmap_free(hammer_transaction_t trans,
947                      hammer_off_t zone_offset, int bytes)
948 {
949         hammer_mount_t hmp;
950         hammer_volume_t root_volume;
951         hammer_blockmap_t freemap;
952         struct hammer_blockmap_layer1 *layer1;
953         struct hammer_blockmap_layer2 *layer2;
954         hammer_buffer_t buffer1 = NULL;
955         hammer_buffer_t buffer2 = NULL;
956         hammer_off_t layer1_offset;
957         hammer_off_t layer2_offset;
958         hammer_off_t base_off;
959         int error;
960         int zone;
961
962         if (bytes == 0)
963                 return;
964         hmp = trans->hmp;
965
966         /*
967          * Alignment
968          */
969         bytes = (bytes + 15) & ~15;
970         KKASSERT(bytes <= HAMMER_XBUFSIZE);
971         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
972                   ~HAMMER_BIGBLOCK_MASK64) == 0);
973
974         /*
975          * Basic zone validation & locking
976          */
977         zone = HAMMER_ZONE_DECODE(zone_offset);
978         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
979         root_volume = trans->rootvol;
980         error = 0;
981
982         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
983
984         /*
985          * Dive layer 1.
986          */
987         layer1_offset = freemap->phys_offset +
988                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
989         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
990         if (error)
991                 goto failed;
992         KKASSERT(layer1->phys_offset &&
993                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
994         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
995                 hammer_lock_ex(&hmp->blkmap_lock);
996                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
997                         hpanic("CRC FAILED: LAYER1");
998                 hammer_unlock(&hmp->blkmap_lock);
999         }
1000
1001         /*
1002          * Dive layer 2, each entry represents a big-block.
1003          */
1004         layer2_offset = layer1->phys_offset +
1005                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1006         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1007         if (error)
1008                 goto failed;
1009         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1010                 hammer_lock_ex(&hmp->blkmap_lock);
1011                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1012                         hpanic("CRC FAILED: LAYER2");
1013                 hammer_unlock(&hmp->blkmap_lock);
1014         }
1015
1016         hammer_lock_ex(&hmp->blkmap_lock);
1017
1018         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1019
1020         /*
1021          * Free space previously allocated via blockmap_alloc().
1022          *
1023          * NOTE: bytes_free can be and remain negative due to de-dup ops
1024          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1025          */
1026         KKASSERT(layer2->zone == zone);
1027         layer2->bytes_free += bytes;
1028         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1029
1030         /*
1031          * If a big-block becomes entirely free we must create a covering
1032          * reservation to prevent premature reuse.  Note, however, that
1033          * the big-block and/or reservation may still have an append_off
1034          * that allows further (non-reused) allocations.
1035          *
1036          * Once the reservation has been made we re-check layer2 and if
1037          * the big-block is still entirely free we reset the layer2 entry.
1038          * The reservation will prevent premature reuse.
1039          *
1040          * NOTE: hammer_buffer's are only invalidated when the reservation
1041          * is completed, if the layer2 entry is still completely free at
1042          * that time.  Any allocations from the reservation that may have
1043          * occured in the mean time, or active references on the reservation
1044          * from new pending allocations, will prevent the invalidation from
1045          * occuring.
1046          */
1047         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1048                 base_off = hammer_xlate_to_zone2(zone_offset &
1049                                                 ~HAMMER_BIGBLOCK_MASK64);
1050
1051                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1052                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1053                         layer2->zone = 0;
1054                         layer2->append_off = 0;
1055                         hammer_modify_buffer(trans, buffer1,
1056                                              layer1, sizeof(*layer1));
1057                         ++layer1->blocks_free;
1058                         layer1->layer1_crc = crc32(layer1,
1059                                                    HAMMER_LAYER1_CRCSIZE);
1060                         hammer_modify_buffer_done(buffer1);
1061                         hammer_modify_volume_field(trans,
1062                                         trans->rootvol,
1063                                         vol0_stat_freebigblocks);
1064                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1065                         hmp->copy_stat_freebigblocks =
1066                            root_volume->ondisk->vol0_stat_freebigblocks;
1067                         hammer_modify_volume_done(trans->rootvol);
1068                 }
1069         }
1070         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1071         hammer_modify_buffer_done(buffer2);
1072         hammer_unlock(&hmp->blkmap_lock);
1073
1074 failed:
1075         if (buffer1)
1076                 hammer_rel_buffer(buffer1, 0);
1077         if (buffer2)
1078                 hammer_rel_buffer(buffer2, 0);
1079 }
1080
1081 int
1082 hammer_blockmap_dedup(hammer_transaction_t trans,
1083                      hammer_off_t zone_offset, int bytes)
1084 {
1085         hammer_mount_t hmp;
1086         hammer_blockmap_t freemap;
1087         struct hammer_blockmap_layer1 *layer1;
1088         struct hammer_blockmap_layer2 *layer2;
1089         hammer_buffer_t buffer1 = NULL;
1090         hammer_buffer_t buffer2 = NULL;
1091         hammer_off_t layer1_offset;
1092         hammer_off_t layer2_offset;
1093         int32_t temp;
1094         int error;
1095         int zone __debugvar;
1096
1097         if (bytes == 0)
1098                 return (0);
1099         hmp = trans->hmp;
1100
1101         /*
1102          * Alignment
1103          */
1104         bytes = (bytes + 15) & ~15;
1105         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1106         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1107                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1108
1109         /*
1110          * Basic zone validation & locking
1111          */
1112         zone = HAMMER_ZONE_DECODE(zone_offset);
1113         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1114         error = 0;
1115
1116         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1117
1118         /*
1119          * Dive layer 1.
1120          */
1121         layer1_offset = freemap->phys_offset +
1122                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1123         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1124         if (error)
1125                 goto failed;
1126         KKASSERT(layer1->phys_offset &&
1127                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1128         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1129                 hammer_lock_ex(&hmp->blkmap_lock);
1130                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1131                         hpanic("CRC FAILED: LAYER1");
1132                 hammer_unlock(&hmp->blkmap_lock);
1133         }
1134
1135         /*
1136          * Dive layer 2, each entry represents a big-block.
1137          */
1138         layer2_offset = layer1->phys_offset +
1139                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1140         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1141         if (error)
1142                 goto failed;
1143         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1144                 hammer_lock_ex(&hmp->blkmap_lock);
1145                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1146                         hpanic("CRC FAILED: LAYER2");
1147                 hammer_unlock(&hmp->blkmap_lock);
1148         }
1149
1150         hammer_lock_ex(&hmp->blkmap_lock);
1151
1152         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1153
1154         /*
1155          * Free space previously allocated via blockmap_alloc().
1156          *
1157          * NOTE: bytes_free can be and remain negative due to de-dup ops
1158          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1159          */
1160         KKASSERT(layer2->zone == zone);
1161         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1162         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1163         if (temp > layer2->bytes_free) {
1164                 error = ERANGE;
1165                 goto underflow;
1166         }
1167         layer2->bytes_free -= bytes;
1168
1169         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1170
1171         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1172 underflow:
1173         hammer_modify_buffer_done(buffer2);
1174         hammer_unlock(&hmp->blkmap_lock);
1175
1176 failed:
1177         if (buffer1)
1178                 hammer_rel_buffer(buffer1, 0);
1179         if (buffer2)
1180                 hammer_rel_buffer(buffer2, 0);
1181         return (error);
1182 }
1183
1184 /*
1185  * Backend function - finalize (offset, bytes) in a zone.
1186  *
1187  * Allocate space that was previously reserved by the frontend.
1188  */
1189 int
1190 hammer_blockmap_finalize(hammer_transaction_t trans,
1191                          hammer_reserve_t resv,
1192                          hammer_off_t zone_offset, int bytes)
1193 {
1194         hammer_mount_t hmp;
1195         hammer_volume_t root_volume;
1196         hammer_blockmap_t freemap;
1197         struct hammer_blockmap_layer1 *layer1;
1198         struct hammer_blockmap_layer2 *layer2;
1199         hammer_buffer_t buffer1 = NULL;
1200         hammer_buffer_t buffer2 = NULL;
1201         hammer_off_t layer1_offset;
1202         hammer_off_t layer2_offset;
1203         int error;
1204         int zone;
1205         int offset;
1206
1207         if (bytes == 0)
1208                 return(0);
1209         hmp = trans->hmp;
1210
1211         /*
1212          * Alignment
1213          */
1214         bytes = (bytes + 15) & ~15;
1215         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1216
1217         /*
1218          * Basic zone validation & locking
1219          */
1220         zone = HAMMER_ZONE_DECODE(zone_offset);
1221         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1222         root_volume = trans->rootvol;
1223         error = 0;
1224
1225         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1226
1227         /*
1228          * Dive layer 1.
1229          */
1230         layer1_offset = freemap->phys_offset +
1231                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1232         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1233         if (error)
1234                 goto failed;
1235         KKASSERT(layer1->phys_offset &&
1236                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1237         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1238                 hammer_lock_ex(&hmp->blkmap_lock);
1239                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1240                         hpanic("CRC FAILED: LAYER1");
1241                 hammer_unlock(&hmp->blkmap_lock);
1242         }
1243
1244         /*
1245          * Dive layer 2, each entry represents a big-block.
1246          */
1247         layer2_offset = layer1->phys_offset +
1248                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1249         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1250         if (error)
1251                 goto failed;
1252         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1253                 hammer_lock_ex(&hmp->blkmap_lock);
1254                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1255                         hpanic("CRC FAILED: LAYER2");
1256                 hammer_unlock(&hmp->blkmap_lock);
1257         }
1258
1259         hammer_lock_ex(&hmp->blkmap_lock);
1260
1261         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1262
1263         /*
1264          * Finalize some or all of the space covered by a current
1265          * reservation.  An allocation in the same layer may have
1266          * already assigned ownership.
1267          */
1268         if (layer2->zone == 0) {
1269                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1270                 --layer1->blocks_free;
1271                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
1272                 hammer_modify_buffer_done(buffer1);
1273                 layer2->zone = zone;
1274                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1275                 KKASSERT(layer2->append_off == 0);
1276                 hammer_modify_volume_field(trans,
1277                                 trans->rootvol,
1278                                 vol0_stat_freebigblocks);
1279                 --root_volume->ondisk->vol0_stat_freebigblocks;
1280                 hmp->copy_stat_freebigblocks =
1281                    root_volume->ondisk->vol0_stat_freebigblocks;
1282                 hammer_modify_volume_done(trans->rootvol);
1283         }
1284         if (layer2->zone != zone)
1285                 hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1286         KKASSERT(layer2->zone == zone);
1287         KKASSERT(bytes != 0);
1288         layer2->bytes_free -= bytes;
1289
1290         if (resv) {
1291                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1292         }
1293
1294         /*
1295          * Finalizations can occur out of order, or combined with allocations.
1296          * append_off must be set to the highest allocated offset.
1297          */
1298         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1299         if (layer2->append_off < offset)
1300                 layer2->append_off = offset;
1301
1302         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1303         hammer_modify_buffer_done(buffer2);
1304         hammer_unlock(&hmp->blkmap_lock);
1305
1306 failed:
1307         if (buffer1)
1308                 hammer_rel_buffer(buffer1, 0);
1309         if (buffer2)
1310                 hammer_rel_buffer(buffer2, 0);
1311         return(error);
1312 }
1313
1314 /*
1315  * Return the approximate number of free bytes in the big-block
1316  * containing the specified blockmap offset.
1317  *
1318  * WARNING: A negative number can be returned if data de-dup exists,
1319  *          and the result will also not represent he actual number
1320  *          of free bytes in this case.
1321  *
1322  *          This code is used only by the reblocker.
1323  */
1324 int
1325 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1326                         int *curp, int *errorp)
1327 {
1328         hammer_volume_t root_volume;
1329         hammer_blockmap_t blockmap;
1330         hammer_blockmap_t freemap;
1331         struct hammer_blockmap_layer1 *layer1;
1332         struct hammer_blockmap_layer2 *layer2;
1333         hammer_buffer_t buffer = NULL;
1334         hammer_off_t layer1_offset;
1335         hammer_off_t layer2_offset;
1336         int32_t bytes;
1337         int zone;
1338
1339         zone = HAMMER_ZONE_DECODE(zone_offset);
1340         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1341         root_volume = hammer_get_root_volume(hmp, errorp);
1342         if (*errorp) {
1343                 *curp = 0;
1344                 return(0);
1345         }
1346         blockmap = &hmp->blockmap[zone];
1347         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1348
1349         /*
1350          * Dive layer 1.
1351          */
1352         layer1_offset = freemap->phys_offset +
1353                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1354         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1355         if (*errorp) {
1356                 *curp = 0;
1357                 bytes = 0;
1358                 goto failed;
1359         }
1360         KKASSERT(layer1->phys_offset);
1361         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1362                 hammer_lock_ex(&hmp->blkmap_lock);
1363                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1364                         hpanic("CRC FAILED: LAYER1");
1365                 hammer_unlock(&hmp->blkmap_lock);
1366         }
1367
1368         /*
1369          * Dive layer 2, each entry represents a big-block.
1370          *
1371          * (reuse buffer, layer1 pointer becomes invalid)
1372          */
1373         layer2_offset = layer1->phys_offset +
1374                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1375         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1376         if (*errorp) {
1377                 *curp = 0;
1378                 bytes = 0;
1379                 goto failed;
1380         }
1381         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1382                 hammer_lock_ex(&hmp->blkmap_lock);
1383                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1384                         hpanic("CRC FAILED: LAYER2");
1385                 hammer_unlock(&hmp->blkmap_lock);
1386         }
1387         KKASSERT(layer2->zone == zone);
1388
1389         bytes = layer2->bytes_free;
1390
1391         /*
1392          * *curp becomes 1 only when no error and,
1393          * next_offset and zone_offset are in the same big-block.
1394          */
1395         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1396                 *curp = 0;  /* not same */
1397         else
1398                 *curp = 1;
1399 failed:
1400         if (buffer)
1401                 hammer_rel_buffer(buffer, 0);
1402         hammer_rel_volume(root_volume, 0);
1403         if (hammer_debug_general & 0x4000) {
1404                 hdkprintf("%016llx -> %d\n", (long long)zone_offset, bytes);
1405         }
1406         return(bytes);
1407 }
1408
1409
1410 /*
1411  * Lookup a blockmap offset and verify blockmap layers.
1412  */
1413 hammer_off_t
1414 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1415                         int *errorp)
1416 {
1417         hammer_volume_t root_volume;
1418         hammer_blockmap_t freemap;
1419         struct hammer_blockmap_layer1 *layer1;
1420         struct hammer_blockmap_layer2 *layer2;
1421         hammer_buffer_t buffer = NULL;
1422         hammer_off_t layer1_offset;
1423         hammer_off_t layer2_offset;
1424         hammer_off_t result_offset;
1425         hammer_off_t base_off;
1426         hammer_reserve_t resv __debugvar;
1427         int zone;
1428
1429         /*
1430          * Calculate the zone-2 offset.
1431          */
1432         zone = HAMMER_ZONE_DECODE(zone_offset);
1433         result_offset = hammer_xlate_to_zone2(zone_offset);
1434
1435         /*
1436          * Validate the allocation zone
1437          */
1438         root_volume = hammer_get_root_volume(hmp, errorp);
1439         if (*errorp)
1440                 return(0);
1441         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1442         KKASSERT(freemap->phys_offset != 0);
1443
1444         /*
1445          * Dive layer 1.
1446          */
1447         layer1_offset = freemap->phys_offset +
1448                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1449         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1450         if (*errorp)
1451                 goto failed;
1452         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1453         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1454                 hammer_lock_ex(&hmp->blkmap_lock);
1455                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1456                         hpanic("CRC FAILED: LAYER1");
1457                 hammer_unlock(&hmp->blkmap_lock);
1458         }
1459
1460         /*
1461          * Dive layer 2, each entry represents a big-block.
1462          */
1463         layer2_offset = layer1->phys_offset +
1464                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1465         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1466
1467         if (*errorp)
1468                 goto failed;
1469         if (layer2->zone == 0) {
1470                 base_off = hammer_xlate_to_zone2(zone_offset &
1471                                                 ~HAMMER_BIGBLOCK_MASK64);
1472                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1473                                  base_off);
1474                 KKASSERT(resv && resv->zone == zone);
1475
1476         } else if (layer2->zone != zone) {
1477                 hpanic("bad zone %d/%d", layer2->zone, zone);
1478         }
1479         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1480                 hammer_lock_ex(&hmp->blkmap_lock);
1481                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1482                         hpanic("CRC FAILED: LAYER2");
1483                 hammer_unlock(&hmp->blkmap_lock);
1484         }
1485
1486 failed:
1487         if (buffer)
1488                 hammer_rel_buffer(buffer, 0);
1489         hammer_rel_volume(root_volume, 0);
1490         if (hammer_debug_general & 0x0800) {
1491                 hdkprintf("%016llx -> %016llx\n",
1492                         (long long)zone_offset, (long long)result_offset);
1493         }
1494         return(result_offset);
1495 }
1496
1497
1498 /*
1499  * Check space availability
1500  *
1501  * MPSAFE - does not require fs_token
1502  */
1503 int
1504 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1505 {
1506         const int in_size = sizeof(struct hammer_inode_data) +
1507                             sizeof(union hammer_btree_elm);
1508         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1509         int64_t usedbytes;
1510
1511         usedbytes = hmp->rsv_inodes * in_size +
1512                     hmp->rsv_recs * rec_size +
1513                     hmp->rsv_databytes +
1514                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1515                     ((int64_t)hammer_limit_dirtybufspace) +
1516                     (slop << HAMMER_BIGBLOCK_BITS);
1517
1518         hammer_count_extra_space_used = usedbytes;      /* debugging */
1519         if (resp)
1520                 *resp = usedbytes;
1521
1522         if (hmp->copy_stat_freebigblocks >=
1523             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1524                 return(0);
1525         }
1526         return (ENOSPC);
1527 }
1528
1529 static int
1530 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1531 {
1532         hammer_blockmap_t freemap;
1533         struct hammer_blockmap_layer1 *layer1;
1534         hammer_buffer_t buffer1 = NULL;
1535         hammer_off_t layer1_offset;
1536         int error = 0;
1537
1538         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1539
1540         layer1_offset = freemap->phys_offset +
1541                         HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1542         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1543         if (error)
1544                 goto end;
1545
1546         /*
1547          * No more physically available space in layer1s
1548          * of the current volume, go to the next volume.
1549          */
1550         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1551                 hammer_skip_volume(offsetp);
1552 end:
1553         if (buffer1)
1554                 hammer_rel_buffer(buffer1, 0);
1555         return(error);
1556 }
1557
1558 static void
1559 hammer_skip_volume(hammer_off_t *offsetp)
1560 {
1561         hammer_off_t offset;
1562         int zone, vol_no;
1563
1564         offset = *offsetp;
1565         zone = HAMMER_ZONE_DECODE(offset);
1566         vol_no = HAMMER_VOL_DECODE(offset) + 1;
1567         KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1568
1569         if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1570                 vol_no = 0;
1571                 ++zone;
1572         }
1573
1574         *offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1575 }