sys/vfs/hammer: Add hpanic() [2/2]
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55              hammer_res_rb_compare, hammer_off_t, zone_offset);
56
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60         if (res1->zone_offset < res2->zone_offset)
61                 return(-1);
62         if (res1->zone_offset > res2->zone_offset)
63                 return(1);
64         return(0);
65 }
66
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72                       hammer_off_t hint, int *errorp)
73 {
74         hammer_mount_t hmp;
75         hammer_volume_t root_volume;
76         hammer_blockmap_t blockmap;
77         hammer_blockmap_t freemap;
78         hammer_reserve_t resv;
79         struct hammer_blockmap_layer1 *layer1;
80         struct hammer_blockmap_layer2 *layer2;
81         hammer_buffer_t buffer1 = NULL;
82         hammer_buffer_t buffer2 = NULL;
83         hammer_buffer_t buffer3 = NULL;
84         hammer_off_t tmp_offset;
85         hammer_off_t next_offset;
86         hammer_off_t result_offset;
87         hammer_off_t layer1_offset;
88         hammer_off_t layer2_offset;
89         hammer_off_t base_off;
90         int loops = 0;
91         int offset;             /* offset within big-block */
92         int use_hint;
93
94         hmp = trans->hmp;
95
96         /*
97          * Deal with alignment and buffer-boundary issues.
98          *
99          * Be careful, certain primary alignments are used below to allocate
100          * new blockmap blocks.
101          */
102         bytes = (bytes + 15) & ~15;
103         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
105
106         /*
107          * Setup
108          */
109         root_volume = trans->rootvol;
110         *errorp = 0;
111         blockmap = &hmp->blockmap[zone];
112         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114
115         /*
116          * Use the hint if we have one.
117          */
118         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119                 next_offset = (hint + 15) & ~(hammer_off_t)15;
120                 use_hint = 1;
121         } else {
122                 next_offset = blockmap->next_offset;
123                 use_hint = 0;
124         }
125 again:
126
127         /*
128          * use_hint is turned off if we leave the hinted big-block.
129          */
130         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131                 next_offset = blockmap->next_offset;
132                 use_hint = 0;
133         }
134
135         /*
136          * Check for wrap
137          */
138         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139                 if (++loops == 2) {
140                         result_offset = 0;
141                         *errorp = ENOSPC;
142                         goto failed;
143                 }
144                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
145         }
146
147         /*
148          * The allocation request may not cross a buffer boundary.  Special
149          * large allocations must not cross a big-block boundary.
150          */
151         tmp_offset = next_offset + bytes - 1;
152         if (bytes <= HAMMER_BUFSIZE) {
153                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
154                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
155                         goto again;
156                 }
157         } else {
158                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
159                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
160                         goto again;
161                 }
162         }
163         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
164
165         /*
166          * Dive layer 1.
167          */
168         layer1_offset = freemap->phys_offset +
169                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
170
171         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
172         if (*errorp) {
173                 result_offset = 0;
174                 goto failed;
175         }
176
177         /*
178          * Check CRC.
179          */
180         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
181                 hammer_lock_ex(&hmp->blkmap_lock);
182                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
183                         hpanic("CRC FAILED: LAYER1");
184                 hammer_unlock(&hmp->blkmap_lock);
185         }
186
187         /*
188          * If we are at a big-block boundary and layer1 indicates no
189          * free big-blocks, then we cannot allocate a new big-block in
190          * layer2, skip to the next layer1 entry.
191          */
192         if (offset == 0 && layer1->blocks_free == 0) {
193                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
194                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
195                 if (hammer_check_volume(hmp, &next_offset)) {
196                         result_offset = 0;
197                         goto failed;
198                 }
199                 goto again;
200         }
201         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
202
203         /*
204          * Skip the whole volume if it is pointing to a layer2 big-block
205          * on a volume that we are currently trying to remove from the
206          * file-system. This is used by the volume-del code together with
207          * the reblocker to free up a volume.
208          */
209         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
210             hmp->volume_to_remove) {
211                 hammer_skip_volume(&next_offset);
212                 goto again;
213         }
214
215         /*
216          * Dive layer 2, each entry represents a big-block.
217          */
218         layer2_offset = layer1->phys_offset +
219                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
220         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
221         if (*errorp) {
222                 result_offset = 0;
223                 goto failed;
224         }
225
226         /*
227          * Check CRC.  This can race another thread holding the lock
228          * and in the middle of modifying layer2.
229          */
230         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
231                 hammer_lock_ex(&hmp->blkmap_lock);
232                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
233                         hpanic("CRC FAILED: LAYER2");
234                 hammer_unlock(&hmp->blkmap_lock);
235         }
236
237         /*
238          * Skip the layer if the zone is owned by someone other then us.
239          */
240         if (layer2->zone && layer2->zone != zone) {
241                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
242                 goto again;
243         }
244         if (offset < layer2->append_off) {
245                 next_offset += layer2->append_off - offset;
246                 goto again;
247         }
248
249 #if 0
250         /*
251          * If operating in the current non-hint blockmap block, do not
252          * allow it to get over-full.  Also drop any active hinting so
253          * blockmap->next_offset is updated at the end.
254          *
255          * We do this for B-Tree and meta-data allocations to provide
256          * localization for updates.
257          */
258         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
259              zone == HAMMER_ZONE_META_INDEX) &&
260             offset >= HAMMER_BIGBLOCK_OVERFILL &&
261             !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
262                 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
263                         next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
264                         use_hint = 0;
265                         goto again;
266                 }
267         }
268 #endif
269
270         /*
271          * We need the lock from this point on.  We have to re-check zone
272          * ownership after acquiring the lock and also check for reservations.
273          */
274         hammer_lock_ex(&hmp->blkmap_lock);
275
276         if (layer2->zone && layer2->zone != zone) {
277                 hammer_unlock(&hmp->blkmap_lock);
278                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
279                 goto again;
280         }
281         if (offset < layer2->append_off) {
282                 hammer_unlock(&hmp->blkmap_lock);
283                 next_offset += layer2->append_off - offset;
284                 goto again;
285         }
286
287         /*
288          * The big-block might be reserved by another zone.  If it is reserved
289          * by our zone we may have to move next_offset past the append_off.
290          */
291         base_off = hammer_xlate_to_zone2(next_offset &
292                                         ~HAMMER_BIGBLOCK_MASK64);
293         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
294         if (resv) {
295                 if (resv->zone != zone) {
296                         hammer_unlock(&hmp->blkmap_lock);
297                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
298                                       ~HAMMER_BIGBLOCK_MASK64;
299                         goto again;
300                 }
301                 if (offset < resv->append_off) {
302                         hammer_unlock(&hmp->blkmap_lock);
303                         next_offset += resv->append_off - offset;
304                         goto again;
305                 }
306                 ++resv->refs;
307         }
308
309         /*
310          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
311          * of the layer for real.  At this point we've validated any
312          * reservation that might exist and can just ignore resv.
313          */
314         if (layer2->zone == 0) {
315                 /*
316                  * Assign the big-block to our zone
317                  */
318                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
319                 --layer1->blocks_free;
320                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
321                 hammer_modify_buffer_done(buffer1);
322                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
323                 layer2->zone = zone;
324                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
325                 KKASSERT(layer2->append_off == 0);
326                 hammer_modify_volume_field(trans, trans->rootvol,
327                                            vol0_stat_freebigblocks);
328                 --root_volume->ondisk->vol0_stat_freebigblocks;
329                 hmp->copy_stat_freebigblocks =
330                         root_volume->ondisk->vol0_stat_freebigblocks;
331                 hammer_modify_volume_done(trans->rootvol);
332         } else {
333                 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
334         }
335         KKASSERT(layer2->zone == zone);
336
337         /*
338          * NOTE: bytes_free can legally go negative due to de-dup.
339          */
340         layer2->bytes_free -= bytes;
341         KKASSERT(layer2->append_off <= offset);
342         layer2->append_off = offset + bytes;
343         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
344         hammer_modify_buffer_done(buffer2);
345
346         /*
347          * We hold the blockmap lock and should be the only ones
348          * capable of modifying resv->append_off.  Track the allocation
349          * as appropriate.
350          */
351         KKASSERT(bytes != 0);
352         if (resv) {
353                 KKASSERT(resv->append_off <= offset);
354                 resv->append_off = offset + bytes;
355                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
356                 hammer_blockmap_reserve_complete(hmp, resv);
357         }
358
359         /*
360          * If we are allocating from the base of a new buffer we can avoid
361          * a disk read by calling hammer_bnew_ext().
362          */
363         if ((next_offset & HAMMER_BUFMASK) == 0) {
364                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
365                                 errorp, &buffer3);
366                 if (*errorp) {
367                         result_offset = 0;
368                         goto failed;
369                 }
370         }
371         result_offset = next_offset;
372
373         /*
374          * If we weren't supplied with a hint or could not use the hint
375          * then we wound up using blockmap->next_offset as the hint and
376          * need to save it.
377          */
378         if (use_hint == 0) {
379                 hammer_modify_volume_noundo(NULL, root_volume);
380                 blockmap->next_offset = next_offset + bytes;
381                 hammer_modify_volume_done(root_volume);
382         }
383         hammer_unlock(&hmp->blkmap_lock);
384 failed:
385
386         /*
387          * Cleanup
388          */
389         if (buffer1)
390                 hammer_rel_buffer(buffer1, 0);
391         if (buffer2)
392                 hammer_rel_buffer(buffer2, 0);
393         if (buffer3)
394                 hammer_rel_buffer(buffer3, 0);
395
396         return(result_offset);
397 }
398
399 /*
400  * Frontend function - Reserve bytes in a zone.
401  *
402  * This code reserves bytes out of a blockmap without committing to any
403  * meta-data modifications, allowing the front-end to directly issue disk
404  * write I/O for big-blocks of data
405  *
406  * The backend later finalizes the reservation with hammer_blockmap_finalize()
407  * upon committing the related record.
408  */
409 hammer_reserve_t
410 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
411                         hammer_off_t *zone_offp, int *errorp)
412 {
413         hammer_volume_t root_volume;
414         hammer_blockmap_t blockmap;
415         hammer_blockmap_t freemap;
416         struct hammer_blockmap_layer1 *layer1;
417         struct hammer_blockmap_layer2 *layer2;
418         hammer_buffer_t buffer1 = NULL;
419         hammer_buffer_t buffer2 = NULL;
420         hammer_buffer_t buffer3 = NULL;
421         hammer_off_t tmp_offset;
422         hammer_off_t next_offset;
423         hammer_off_t layer1_offset;
424         hammer_off_t layer2_offset;
425         hammer_off_t base_off;
426         hammer_reserve_t resv;
427         hammer_reserve_t resx;
428         int loops = 0;
429         int offset;
430
431         /*
432          * Setup
433          */
434         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
435         root_volume = hammer_get_root_volume(hmp, errorp);
436         if (*errorp)
437                 return(NULL);
438         blockmap = &hmp->blockmap[zone];
439         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
440         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
441
442         /*
443          * Deal with alignment and buffer-boundary issues.
444          *
445          * Be careful, certain primary alignments are used below to allocate
446          * new blockmap blocks.
447          */
448         bytes = (bytes + 15) & ~15;
449         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
450
451         next_offset = blockmap->next_offset;
452 again:
453         resv = NULL;
454         /*
455          * Check for wrap
456          */
457         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
458                 if (++loops == 2) {
459                         *errorp = ENOSPC;
460                         goto failed;
461                 }
462                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
463         }
464
465         /*
466          * The allocation request may not cross a buffer boundary.  Special
467          * large allocations must not cross a big-block boundary.
468          */
469         tmp_offset = next_offset + bytes - 1;
470         if (bytes <= HAMMER_BUFSIZE) {
471                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
472                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
473                         goto again;
474                 }
475         } else {
476                 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
477                         next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
478                         goto again;
479                 }
480         }
481         offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
482
483         /*
484          * Dive layer 1.
485          */
486         layer1_offset = freemap->phys_offset +
487                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
488         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
489         if (*errorp)
490                 goto failed;
491
492         /*
493          * Check CRC.
494          */
495         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
496                 hammer_lock_ex(&hmp->blkmap_lock);
497                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
498                         hpanic("CRC FAILED: LAYER1");
499                 hammer_unlock(&hmp->blkmap_lock);
500         }
501
502         /*
503          * If we are at a big-block boundary and layer1 indicates no
504          * free big-blocks, then we cannot allocate a new big-block in
505          * layer2, skip to the next layer1 entry.
506          */
507         if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
508             layer1->blocks_free == 0) {
509                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
510                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
511                 if (hammer_check_volume(hmp, &next_offset))
512                         goto failed;
513                 goto again;
514         }
515         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
516
517         /*
518          * Dive layer 2, each entry represents a big-block.
519          */
520         layer2_offset = layer1->phys_offset +
521                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
522         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
523         if (*errorp)
524                 goto failed;
525
526         /*
527          * Check CRC if not allocating into uninitialized space (which we
528          * aren't when reserving space).
529          */
530         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
531                 hammer_lock_ex(&hmp->blkmap_lock);
532                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
533                         hpanic("CRC FAILED: LAYER2");
534                 hammer_unlock(&hmp->blkmap_lock);
535         }
536
537         /*
538          * Skip the layer if the zone is owned by someone other then us.
539          */
540         if (layer2->zone && layer2->zone != zone) {
541                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
542                 goto again;
543         }
544         if (offset < layer2->append_off) {
545                 next_offset += layer2->append_off - offset;
546                 goto again;
547         }
548
549         /*
550          * We need the lock from this point on.  We have to re-check zone
551          * ownership after acquiring the lock and also check for reservations.
552          */
553         hammer_lock_ex(&hmp->blkmap_lock);
554
555         if (layer2->zone && layer2->zone != zone) {
556                 hammer_unlock(&hmp->blkmap_lock);
557                 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
558                 goto again;
559         }
560         if (offset < layer2->append_off) {
561                 hammer_unlock(&hmp->blkmap_lock);
562                 next_offset += layer2->append_off - offset;
563                 goto again;
564         }
565
566         /*
567          * The big-block might be reserved by another zone.  If it is reserved
568          * by our zone we may have to move next_offset past the append_off.
569          */
570         base_off = hammer_xlate_to_zone2(next_offset &
571                                         ~HAMMER_BIGBLOCK_MASK64);
572         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
573         if (resv) {
574                 if (resv->zone != zone) {
575                         hammer_unlock(&hmp->blkmap_lock);
576                         next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
577                                       ~HAMMER_BIGBLOCK_MASK64;
578                         goto again;
579                 }
580                 if (offset < resv->append_off) {
581                         hammer_unlock(&hmp->blkmap_lock);
582                         next_offset += resv->append_off - offset;
583                         goto again;
584                 }
585                 ++resv->refs;
586                 resx = NULL;
587         } else {
588                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
589                                M_WAITOK | M_ZERO | M_USE_RESERVE);
590                 resx->refs = 1;
591                 resx->zone = zone;
592                 resx->zone_offset = base_off;
593                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
594                         resx->flags |= HAMMER_RESF_LAYER2FREE;
595                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
596                 KKASSERT(resv == NULL);
597                 resv = resx;
598                 ++hammer_count_reservations;
599         }
600         resv->append_off = offset + bytes;
601
602         /*
603          * If we are not reserving a whole buffer but are at the start of
604          * a new block, call hammer_bnew() to avoid a disk read.
605          *
606          * If we are reserving a whole buffer (or more), the caller will
607          * probably use a direct read, so do nothing.
608          *
609          * If we do not have a whole lot of system memory we really can't
610          * afford to block while holding the blkmap_lock!
611          */
612         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
613                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
614                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
615                         if (*errorp)
616                                 goto failed;
617                 }
618         }
619
620         /*
621          * Adjust our iterator and alloc_offset.  The layer1 and layer2
622          * space beyond alloc_offset is uninitialized.  alloc_offset must
623          * be big-block aligned.
624          */
625         blockmap->next_offset = next_offset + bytes;
626         hammer_unlock(&hmp->blkmap_lock);
627
628 failed:
629         if (buffer1)
630                 hammer_rel_buffer(buffer1, 0);
631         if (buffer2)
632                 hammer_rel_buffer(buffer2, 0);
633         if (buffer3)
634                 hammer_rel_buffer(buffer3, 0);
635         hammer_rel_volume(root_volume, 0);
636         *zone_offp = next_offset;
637
638         return(resv);
639 }
640
641 /*
642  * Frontend function - Dedup bytes in a zone.
643  *
644  * Dedup reservations work exactly the same as normal write reservations
645  * except we only adjust bytes_free field and don't touch append offset.
646  * Finalization mechanic for dedup reservations is also the same as for
647  * normal write ones - the backend finalizes the reservation with
648  * hammer_blockmap_finalize().
649  */
650 hammer_reserve_t
651 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
652                               hammer_off_t zone_offset, int *errorp)
653 {
654         hammer_volume_t root_volume;
655         hammer_blockmap_t freemap;
656         struct hammer_blockmap_layer1 *layer1;
657         struct hammer_blockmap_layer2 *layer2;
658         hammer_buffer_t buffer1 = NULL;
659         hammer_buffer_t buffer2 = NULL;
660         hammer_off_t layer1_offset;
661         hammer_off_t layer2_offset;
662         hammer_off_t base_off;
663         hammer_reserve_t resv = NULL;
664         hammer_reserve_t resx = NULL;
665
666         /*
667          * Setup
668          */
669         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
670         root_volume = hammer_get_root_volume(hmp, errorp);
671         if (*errorp)
672                 return (NULL);
673         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
674         KKASSERT(freemap->phys_offset != 0);
675
676         bytes = (bytes + 15) & ~15;
677         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
678
679         /*
680          * Dive layer 1.
681          */
682         layer1_offset = freemap->phys_offset +
683                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
684         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
685         if (*errorp)
686                 goto failed;
687
688         /*
689          * Check CRC.
690          */
691         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
692                 hammer_lock_ex(&hmp->blkmap_lock);
693                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
694                         hpanic("CRC FAILED: LAYER1");
695                 hammer_unlock(&hmp->blkmap_lock);
696         }
697         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
698
699         /*
700          * Dive layer 2, each entry represents a big-block.
701          */
702         layer2_offset = layer1->phys_offset +
703                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
704         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
705         if (*errorp)
706                 goto failed;
707
708         /*
709          * Check CRC.
710          */
711         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
712                 hammer_lock_ex(&hmp->blkmap_lock);
713                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
714                         hpanic("CRC FAILED: LAYER2");
715                 hammer_unlock(&hmp->blkmap_lock);
716         }
717
718         /*
719          * Fail if the zone is owned by someone other than us.
720          */
721         if (layer2->zone && layer2->zone != zone)
722                 goto failed;
723
724         /*
725          * We need the lock from this point on.  We have to re-check zone
726          * ownership after acquiring the lock and also check for reservations.
727          */
728         hammer_lock_ex(&hmp->blkmap_lock);
729
730         if (layer2->zone && layer2->zone != zone) {
731                 hammer_unlock(&hmp->blkmap_lock);
732                 goto failed;
733         }
734
735         base_off = hammer_xlate_to_zone2(zone_offset &
736                                         ~HAMMER_BIGBLOCK_MASK64);
737         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
738         if (resv) {
739                 if (resv->zone != zone) {
740                         hammer_unlock(&hmp->blkmap_lock);
741                         resv = NULL;
742                         goto failed;
743                 }
744                 /*
745                  * Due to possible big-block underflow we can't simply
746                  * subtract bytes from bytes_free.
747                  */
748                 if (update_bytes_free(resv, bytes) == 0) {
749                         hammer_unlock(&hmp->blkmap_lock);
750                         resv = NULL;
751                         goto failed;
752                 }
753                 ++resv->refs;
754                 resx = NULL;
755         } else {
756                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
757                                M_WAITOK | M_ZERO | M_USE_RESERVE);
758                 resx->refs = 1;
759                 resx->zone = zone;
760                 resx->bytes_free = layer2->bytes_free;
761                 /*
762                  * Due to possible big-block underflow we can't simply
763                  * subtract bytes from bytes_free.
764                  */
765                 if (update_bytes_free(resx, bytes) == 0) {
766                         hammer_unlock(&hmp->blkmap_lock);
767                         kfree(resx, hmp->m_misc);
768                         goto failed;
769                 }
770                 resx->zone_offset = base_off;
771                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
772                 KKASSERT(resv == NULL);
773                 resv = resx;
774                 ++hammer_count_reservations;
775         }
776
777         hammer_unlock(&hmp->blkmap_lock);
778
779 failed:
780         if (buffer1)
781                 hammer_rel_buffer(buffer1, 0);
782         if (buffer2)
783                 hammer_rel_buffer(buffer2, 0);
784         hammer_rel_volume(root_volume, 0);
785
786         return(resv);
787 }
788
789 static int
790 update_bytes_free(hammer_reserve_t resv, int bytes)
791 {
792         int32_t temp;
793
794         /*
795          * Big-block underflow check
796          */
797         temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
798         cpu_ccfence(); /* XXX do we really need it ? */
799         if (temp > resv->bytes_free) {
800                 kprintf("BIGBLOCK UNDERFLOW\n");
801                 return (0);
802         }
803
804         resv->bytes_free -= bytes;
805         return (1);
806 }
807
808 /*
809  * Dereference a reservation structure.  Upon the final release the
810  * underlying big-block is checked and if it is entirely free we delete
811  * any related HAMMER buffers to avoid potential conflicts with future
812  * reuse of the big-block.
813  */
814 void
815 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
816 {
817         hammer_off_t base_offset;
818         int error;
819
820         KKASSERT(resv->refs > 0);
821         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
822                  HAMMER_ZONE_RAW_BUFFER);
823
824         /*
825          * Setting append_off to the max prevents any new allocations
826          * from occuring while we are trying to dispose of the reservation,
827          * allowing us to safely delete any related HAMMER buffers.
828          *
829          * If we are unable to clean out all related HAMMER buffers we
830          * requeue the delay.
831          */
832         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
833                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
834                 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
835                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
836                         hammer_dedup_cache_inval(hmp, base_offset);
837                 error = hammer_del_buffers(hmp, base_offset,
838                                            resv->zone_offset,
839                                            HAMMER_BIGBLOCK_SIZE,
840                                            1);
841                 if (hammer_debug_general & 0x20000) {
842                         hkprintf("delbgblk %016jx error %d\n",
843                                 (intmax_t)base_offset, error);
844                 }
845                 if (error)
846                         hammer_reserve_setdelay(hmp, resv);
847         }
848         if (--resv->refs == 0) {
849                 if (hammer_debug_general & 0x20000) {
850                         hkprintf("delresvr %016jx zone %02x\n",
851                                 (intmax_t)resv->zone_offset, resv->zone);
852                 }
853                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
854                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
855                 kfree(resv, hmp->m_misc);
856                 --hammer_count_reservations;
857         }
858 }
859
860 /*
861  * Prevent a potentially free big-block from being reused until after
862  * the related flushes have completely cycled, otherwise crash recovery
863  * could resurrect a data block that was already reused and overwritten.
864  *
865  * The caller might reset the underlying layer2 entry's append_off to 0, so
866  * our covering append_off must be set to max to prevent any reallocation
867  * until after the flush delays complete, not to mention proper invalidation
868  * of any underlying cached blocks.
869  */
870 static void
871 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
872                         int zone, struct hammer_blockmap_layer2 *layer2)
873 {
874         hammer_reserve_t resv;
875
876         /*
877          * Allocate the reservation if necessary.
878          *
879          * NOTE: need lock in future around resv lookup/allocation and
880          * the setdelay call, currently refs is not bumped until the call.
881          */
882 again:
883         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
884         if (resv == NULL) {
885                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
886                                M_WAITOK | M_ZERO | M_USE_RESERVE);
887                 resv->zone = zone;
888                 resv->zone_offset = base_offset;
889                 resv->refs = 0;
890                 resv->append_off = HAMMER_BIGBLOCK_SIZE;
891
892                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
893                         resv->flags |= HAMMER_RESF_LAYER2FREE;
894                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
895                         kfree(resv, hmp->m_misc);
896                         goto again;
897                 }
898                 ++hammer_count_reservations;
899         } else {
900                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
901                         resv->flags |= HAMMER_RESF_LAYER2FREE;
902         }
903         hammer_reserve_setdelay(hmp, resv);
904 }
905
906 /*
907  * Enter the reservation on the on-delay list, or move it if it
908  * is already on the list.
909  */
910 static void
911 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
912 {
913         if (resv->flags & HAMMER_RESF_ONDELAY) {
914                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
915                 resv->flush_group = hmp->flusher.next + 1;
916                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
917         } else {
918                 ++resv->refs;
919                 ++hmp->rsv_fromdelay;
920                 resv->flags |= HAMMER_RESF_ONDELAY;
921                 resv->flush_group = hmp->flusher.next + 1;
922                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
923         }
924 }
925
926 /*
927  * Reserve has reached its flush point, remove it from the delay list
928  * and finish it off.  hammer_blockmap_reserve_complete() inherits
929  * the ondelay reference.
930  */
931 void
932 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
933 {
934         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
935         resv->flags &= ~HAMMER_RESF_ONDELAY;
936         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
937         --hmp->rsv_fromdelay;
938         hammer_blockmap_reserve_complete(hmp, resv);
939 }
940
941 /*
942  * Backend function - free (offset, bytes) in a zone.
943  *
944  * XXX error return
945  */
946 void
947 hammer_blockmap_free(hammer_transaction_t trans,
948                      hammer_off_t zone_offset, int bytes)
949 {
950         hammer_mount_t hmp;
951         hammer_volume_t root_volume;
952         hammer_blockmap_t freemap;
953         struct hammer_blockmap_layer1 *layer1;
954         struct hammer_blockmap_layer2 *layer2;
955         hammer_buffer_t buffer1 = NULL;
956         hammer_buffer_t buffer2 = NULL;
957         hammer_off_t layer1_offset;
958         hammer_off_t layer2_offset;
959         hammer_off_t base_off;
960         int error;
961         int zone;
962
963         if (bytes == 0)
964                 return;
965         hmp = trans->hmp;
966
967         /*
968          * Alignment
969          */
970         bytes = (bytes + 15) & ~15;
971         KKASSERT(bytes <= HAMMER_XBUFSIZE);
972         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
973                   ~HAMMER_BIGBLOCK_MASK64) == 0);
974
975         /*
976          * Basic zone validation & locking
977          */
978         zone = HAMMER_ZONE_DECODE(zone_offset);
979         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
980         root_volume = trans->rootvol;
981         error = 0;
982
983         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
984
985         /*
986          * Dive layer 1.
987          */
988         layer1_offset = freemap->phys_offset +
989                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
990         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
991         if (error)
992                 goto failed;
993         KKASSERT(layer1->phys_offset &&
994                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
995         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
996                 hammer_lock_ex(&hmp->blkmap_lock);
997                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
998                         hpanic("CRC FAILED: LAYER1");
999                 hammer_unlock(&hmp->blkmap_lock);
1000         }
1001
1002         /*
1003          * Dive layer 2, each entry represents a big-block.
1004          */
1005         layer2_offset = layer1->phys_offset +
1006                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1007         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1008         if (error)
1009                 goto failed;
1010         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1011                 hammer_lock_ex(&hmp->blkmap_lock);
1012                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1013                         hpanic("CRC FAILED: LAYER2");
1014                 hammer_unlock(&hmp->blkmap_lock);
1015         }
1016
1017         hammer_lock_ex(&hmp->blkmap_lock);
1018
1019         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1020
1021         /*
1022          * Free space previously allocated via blockmap_alloc().
1023          *
1024          * NOTE: bytes_free can be and remain negative due to de-dup ops
1025          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1026          */
1027         KKASSERT(layer2->zone == zone);
1028         layer2->bytes_free += bytes;
1029         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1030
1031         /*
1032          * If a big-block becomes entirely free we must create a covering
1033          * reservation to prevent premature reuse.  Note, however, that
1034          * the big-block and/or reservation may still have an append_off
1035          * that allows further (non-reused) allocations.
1036          *
1037          * Once the reservation has been made we re-check layer2 and if
1038          * the big-block is still entirely free we reset the layer2 entry.
1039          * The reservation will prevent premature reuse.
1040          *
1041          * NOTE: hammer_buffer's are only invalidated when the reservation
1042          * is completed, if the layer2 entry is still completely free at
1043          * that time.  Any allocations from the reservation that may have
1044          * occured in the mean time, or active references on the reservation
1045          * from new pending allocations, will prevent the invalidation from
1046          * occuring.
1047          */
1048         if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1049                 base_off = hammer_xlate_to_zone2(zone_offset &
1050                                                 ~HAMMER_BIGBLOCK_MASK64);
1051
1052                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1053                 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1054                         layer2->zone = 0;
1055                         layer2->append_off = 0;
1056                         hammer_modify_buffer(trans, buffer1,
1057                                              layer1, sizeof(*layer1));
1058                         ++layer1->blocks_free;
1059                         layer1->layer1_crc = crc32(layer1,
1060                                                    HAMMER_LAYER1_CRCSIZE);
1061                         hammer_modify_buffer_done(buffer1);
1062                         hammer_modify_volume_field(trans,
1063                                         trans->rootvol,
1064                                         vol0_stat_freebigblocks);
1065                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1066                         hmp->copy_stat_freebigblocks =
1067                            root_volume->ondisk->vol0_stat_freebigblocks;
1068                         hammer_modify_volume_done(trans->rootvol);
1069                 }
1070         }
1071         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1072         hammer_modify_buffer_done(buffer2);
1073         hammer_unlock(&hmp->blkmap_lock);
1074
1075 failed:
1076         if (buffer1)
1077                 hammer_rel_buffer(buffer1, 0);
1078         if (buffer2)
1079                 hammer_rel_buffer(buffer2, 0);
1080 }
1081
1082 int
1083 hammer_blockmap_dedup(hammer_transaction_t trans,
1084                      hammer_off_t zone_offset, int bytes)
1085 {
1086         hammer_mount_t hmp;
1087         hammer_blockmap_t freemap;
1088         struct hammer_blockmap_layer1 *layer1;
1089         struct hammer_blockmap_layer2 *layer2;
1090         hammer_buffer_t buffer1 = NULL;
1091         hammer_buffer_t buffer2 = NULL;
1092         hammer_off_t layer1_offset;
1093         hammer_off_t layer2_offset;
1094         int32_t temp;
1095         int error;
1096         int zone __debugvar;
1097
1098         if (bytes == 0)
1099                 return (0);
1100         hmp = trans->hmp;
1101
1102         /*
1103          * Alignment
1104          */
1105         bytes = (bytes + 15) & ~15;
1106         KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1107         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1108                   ~HAMMER_BIGBLOCK_MASK64) == 0);
1109
1110         /*
1111          * Basic zone validation & locking
1112          */
1113         zone = HAMMER_ZONE_DECODE(zone_offset);
1114         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1115         error = 0;
1116
1117         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1118
1119         /*
1120          * Dive layer 1.
1121          */
1122         layer1_offset = freemap->phys_offset +
1123                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1124         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1125         if (error)
1126                 goto failed;
1127         KKASSERT(layer1->phys_offset &&
1128                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1129         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1130                 hammer_lock_ex(&hmp->blkmap_lock);
1131                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1132                         hpanic("CRC FAILED: LAYER1");
1133                 hammer_unlock(&hmp->blkmap_lock);
1134         }
1135
1136         /*
1137          * Dive layer 2, each entry represents a big-block.
1138          */
1139         layer2_offset = layer1->phys_offset +
1140                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1141         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1142         if (error)
1143                 goto failed;
1144         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1145                 hammer_lock_ex(&hmp->blkmap_lock);
1146                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1147                         hpanic("CRC FAILED: LAYER2");
1148                 hammer_unlock(&hmp->blkmap_lock);
1149         }
1150
1151         hammer_lock_ex(&hmp->blkmap_lock);
1152
1153         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1154
1155         /*
1156          * Free space previously allocated via blockmap_alloc().
1157          *
1158          * NOTE: bytes_free can be and remain negative due to de-dup ops
1159          *       but can never become larger than HAMMER_BIGBLOCK_SIZE.
1160          */
1161         KKASSERT(layer2->zone == zone);
1162         temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1163         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1164         if (temp > layer2->bytes_free) {
1165                 error = ERANGE;
1166                 goto underflow;
1167         }
1168         layer2->bytes_free -= bytes;
1169
1170         KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1171
1172         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1173 underflow:
1174         hammer_modify_buffer_done(buffer2);
1175         hammer_unlock(&hmp->blkmap_lock);
1176
1177 failed:
1178         if (buffer1)
1179                 hammer_rel_buffer(buffer1, 0);
1180         if (buffer2)
1181                 hammer_rel_buffer(buffer2, 0);
1182         return (error);
1183 }
1184
1185 /*
1186  * Backend function - finalize (offset, bytes) in a zone.
1187  *
1188  * Allocate space that was previously reserved by the frontend.
1189  */
1190 int
1191 hammer_blockmap_finalize(hammer_transaction_t trans,
1192                          hammer_reserve_t resv,
1193                          hammer_off_t zone_offset, int bytes)
1194 {
1195         hammer_mount_t hmp;
1196         hammer_volume_t root_volume;
1197         hammer_blockmap_t freemap;
1198         struct hammer_blockmap_layer1 *layer1;
1199         struct hammer_blockmap_layer2 *layer2;
1200         hammer_buffer_t buffer1 = NULL;
1201         hammer_buffer_t buffer2 = NULL;
1202         hammer_off_t layer1_offset;
1203         hammer_off_t layer2_offset;
1204         int error;
1205         int zone;
1206         int offset;
1207
1208         if (bytes == 0)
1209                 return(0);
1210         hmp = trans->hmp;
1211
1212         /*
1213          * Alignment
1214          */
1215         bytes = (bytes + 15) & ~15;
1216         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1217
1218         /*
1219          * Basic zone validation & locking
1220          */
1221         zone = HAMMER_ZONE_DECODE(zone_offset);
1222         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1223         root_volume = trans->rootvol;
1224         error = 0;
1225
1226         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1227
1228         /*
1229          * Dive layer 1.
1230          */
1231         layer1_offset = freemap->phys_offset +
1232                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1233         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1234         if (error)
1235                 goto failed;
1236         KKASSERT(layer1->phys_offset &&
1237                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1238         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1239                 hammer_lock_ex(&hmp->blkmap_lock);
1240                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1241                         hpanic("CRC FAILED: LAYER1");
1242                 hammer_unlock(&hmp->blkmap_lock);
1243         }
1244
1245         /*
1246          * Dive layer 2, each entry represents a big-block.
1247          */
1248         layer2_offset = layer1->phys_offset +
1249                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1250         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1251         if (error)
1252                 goto failed;
1253         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1254                 hammer_lock_ex(&hmp->blkmap_lock);
1255                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1256                         hpanic("CRC FAILED: LAYER2");
1257                 hammer_unlock(&hmp->blkmap_lock);
1258         }
1259
1260         hammer_lock_ex(&hmp->blkmap_lock);
1261
1262         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1263
1264         /*
1265          * Finalize some or all of the space covered by a current
1266          * reservation.  An allocation in the same layer may have
1267          * already assigned ownership.
1268          */
1269         if (layer2->zone == 0) {
1270                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1271                 --layer1->blocks_free;
1272                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
1273                 hammer_modify_buffer_done(buffer1);
1274                 layer2->zone = zone;
1275                 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1276                 KKASSERT(layer2->append_off == 0);
1277                 hammer_modify_volume_field(trans,
1278                                 trans->rootvol,
1279                                 vol0_stat_freebigblocks);
1280                 --root_volume->ondisk->vol0_stat_freebigblocks;
1281                 hmp->copy_stat_freebigblocks =
1282                    root_volume->ondisk->vol0_stat_freebigblocks;
1283                 hammer_modify_volume_done(trans->rootvol);
1284         }
1285         if (layer2->zone != zone)
1286                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1287         KKASSERT(layer2->zone == zone);
1288         KKASSERT(bytes != 0);
1289         layer2->bytes_free -= bytes;
1290
1291         if (resv) {
1292                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1293         }
1294
1295         /*
1296          * Finalizations can occur out of order, or combined with allocations.
1297          * append_off must be set to the highest allocated offset.
1298          */
1299         offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1300         if (layer2->append_off < offset)
1301                 layer2->append_off = offset;
1302
1303         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1304         hammer_modify_buffer_done(buffer2);
1305         hammer_unlock(&hmp->blkmap_lock);
1306
1307 failed:
1308         if (buffer1)
1309                 hammer_rel_buffer(buffer1, 0);
1310         if (buffer2)
1311                 hammer_rel_buffer(buffer2, 0);
1312         return(error);
1313 }
1314
1315 /*
1316  * Return the approximate number of free bytes in the big-block
1317  * containing the specified blockmap offset.
1318  *
1319  * WARNING: A negative number can be returned if data de-dup exists,
1320  *          and the result will also not represent he actual number
1321  *          of free bytes in this case.
1322  *
1323  *          This code is used only by the reblocker.
1324  */
1325 int
1326 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1327                         int *curp, int *errorp)
1328 {
1329         hammer_volume_t root_volume;
1330         hammer_blockmap_t blockmap;
1331         hammer_blockmap_t freemap;
1332         struct hammer_blockmap_layer1 *layer1;
1333         struct hammer_blockmap_layer2 *layer2;
1334         hammer_buffer_t buffer = NULL;
1335         hammer_off_t layer1_offset;
1336         hammer_off_t layer2_offset;
1337         int32_t bytes;
1338         int zone;
1339
1340         zone = HAMMER_ZONE_DECODE(zone_offset);
1341         KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1342         root_volume = hammer_get_root_volume(hmp, errorp);
1343         if (*errorp) {
1344                 *curp = 0;
1345                 return(0);
1346         }
1347         blockmap = &hmp->blockmap[zone];
1348         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1349
1350         /*
1351          * Dive layer 1.
1352          */
1353         layer1_offset = freemap->phys_offset +
1354                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1355         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1356         if (*errorp) {
1357                 *curp = 0;
1358                 bytes = 0;
1359                 goto failed;
1360         }
1361         KKASSERT(layer1->phys_offset);
1362         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1363                 hammer_lock_ex(&hmp->blkmap_lock);
1364                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1365                         hpanic("CRC FAILED: LAYER1");
1366                 hammer_unlock(&hmp->blkmap_lock);
1367         }
1368
1369         /*
1370          * Dive layer 2, each entry represents a big-block.
1371          *
1372          * (reuse buffer, layer1 pointer becomes invalid)
1373          */
1374         layer2_offset = layer1->phys_offset +
1375                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1376         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1377         if (*errorp) {
1378                 *curp = 0;
1379                 bytes = 0;
1380                 goto failed;
1381         }
1382         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1383                 hammer_lock_ex(&hmp->blkmap_lock);
1384                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1385                         hpanic("CRC FAILED: LAYER2");
1386                 hammer_unlock(&hmp->blkmap_lock);
1387         }
1388         KKASSERT(layer2->zone == zone);
1389
1390         bytes = layer2->bytes_free;
1391
1392         /*
1393          * *curp becomes 1 only when no error and,
1394          * next_offset and zone_offset are in the same big-block.
1395          */
1396         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1397                 *curp = 0;  /* not same */
1398         else
1399                 *curp = 1;
1400 failed:
1401         if (buffer)
1402                 hammer_rel_buffer(buffer, 0);
1403         hammer_rel_volume(root_volume, 0);
1404         if (hammer_debug_general & 0x4000) {
1405                 hdkprintf("%016llx -> %d\n", (long long)zone_offset, bytes);
1406         }
1407         return(bytes);
1408 }
1409
1410
1411 /*
1412  * Lookup a blockmap offset and verify blockmap layers.
1413  */
1414 hammer_off_t
1415 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1416                         int *errorp)
1417 {
1418         hammer_volume_t root_volume;
1419         hammer_blockmap_t freemap;
1420         struct hammer_blockmap_layer1 *layer1;
1421         struct hammer_blockmap_layer2 *layer2;
1422         hammer_buffer_t buffer = NULL;
1423         hammer_off_t layer1_offset;
1424         hammer_off_t layer2_offset;
1425         hammer_off_t result_offset;
1426         hammer_off_t base_off;
1427         hammer_reserve_t resv __debugvar;
1428         int zone;
1429
1430         /*
1431          * Calculate the zone-2 offset.
1432          */
1433         zone = HAMMER_ZONE_DECODE(zone_offset);
1434         result_offset = hammer_xlate_to_zone2(zone_offset);
1435
1436         /*
1437          * Validate the allocation zone
1438          */
1439         root_volume = hammer_get_root_volume(hmp, errorp);
1440         if (*errorp)
1441                 return(0);
1442         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1443         KKASSERT(freemap->phys_offset != 0);
1444
1445         /*
1446          * Dive layer 1.
1447          */
1448         layer1_offset = freemap->phys_offset +
1449                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1450         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1451         if (*errorp)
1452                 goto failed;
1453         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1454         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1455                 hammer_lock_ex(&hmp->blkmap_lock);
1456                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1457                         hpanic("CRC FAILED: LAYER1");
1458                 hammer_unlock(&hmp->blkmap_lock);
1459         }
1460
1461         /*
1462          * Dive layer 2, each entry represents a big-block.
1463          */
1464         layer2_offset = layer1->phys_offset +
1465                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1466         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1467
1468         if (*errorp)
1469                 goto failed;
1470         if (layer2->zone == 0) {
1471                 base_off = hammer_xlate_to_zone2(zone_offset &
1472                                                 ~HAMMER_BIGBLOCK_MASK64);
1473                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1474                                  base_off);
1475                 KKASSERT(resv && resv->zone == zone);
1476
1477         } else if (layer2->zone != zone) {
1478                 hpanic("bad zone %d/%d", layer2->zone, zone);
1479         }
1480         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1481                 hammer_lock_ex(&hmp->blkmap_lock);
1482                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1483                         hpanic("CRC FAILED: LAYER2");
1484                 hammer_unlock(&hmp->blkmap_lock);
1485         }
1486
1487 failed:
1488         if (buffer)
1489                 hammer_rel_buffer(buffer, 0);
1490         hammer_rel_volume(root_volume, 0);
1491         if (hammer_debug_general & 0x0800) {
1492                 hdkprintf("%016llx -> %016llx\n",
1493                         (long long)zone_offset, (long long)result_offset);
1494         }
1495         return(result_offset);
1496 }
1497
1498
1499 /*
1500  * Check space availability
1501  *
1502  * MPSAFE - does not require fs_token
1503  */
1504 int
1505 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1506 {
1507         const int in_size = sizeof(struct hammer_inode_data) +
1508                             sizeof(union hammer_btree_elm);
1509         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1510         int64_t usedbytes;
1511
1512         usedbytes = hmp->rsv_inodes * in_size +
1513                     hmp->rsv_recs * rec_size +
1514                     hmp->rsv_databytes +
1515                     ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1516                     ((int64_t)hammer_limit_dirtybufspace) +
1517                     (slop << HAMMER_BIGBLOCK_BITS);
1518
1519         hammer_count_extra_space_used = usedbytes;      /* debugging */
1520         if (resp)
1521                 *resp = usedbytes;
1522
1523         if (hmp->copy_stat_freebigblocks >=
1524             (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1525                 return(0);
1526         }
1527         return (ENOSPC);
1528 }
1529
1530 static int
1531 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1532 {
1533         hammer_blockmap_t freemap;
1534         struct hammer_blockmap_layer1 *layer1;
1535         hammer_buffer_t buffer1 = NULL;
1536         hammer_off_t layer1_offset;
1537         int error = 0;
1538
1539         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1540
1541         layer1_offset = freemap->phys_offset +
1542                         HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1543         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1544         if (error)
1545                 goto end;
1546
1547         /*
1548          * No more physically available space in layer1s
1549          * of the current volume, go to the next volume.
1550          */
1551         if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1552                 hammer_skip_volume(offsetp);
1553 end:
1554         if (buffer1)
1555                 hammer_rel_buffer(buffer1, 0);
1556         return(error);
1557 }
1558
1559 static void
1560 hammer_skip_volume(hammer_off_t *offsetp)
1561 {
1562         hammer_off_t offset;
1563         int zone, vol_no;
1564
1565         offset = *offsetp;
1566         zone = HAMMER_ZONE_DECODE(offset);
1567         vol_no = HAMMER_VOL_DECODE(offset) + 1;
1568         KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1569
1570         if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1571                 vol_no = 0;
1572                 ++zone;
1573         }
1574
1575         *offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1576 }