HAMMER VFS - Attempt to fix a low-memory deadlock
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53              hammer_res_rb_compare, hammer_off_t, zone_offset);
54
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58         if (res1->zone_offset < res2->zone_offset)
59                 return(-1);
60         if (res1->zone_offset > res2->zone_offset)
61                 return(1);
62         return(0);
63 }
64
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70                       hammer_off_t hint, int *errorp)
71 {
72         hammer_mount_t hmp;
73         hammer_volume_t root_volume;
74         hammer_blockmap_t blockmap;
75         hammer_blockmap_t freemap;
76         hammer_reserve_t resv;
77         struct hammer_blockmap_layer1 *layer1;
78         struct hammer_blockmap_layer2 *layer2;
79         hammer_buffer_t buffer1 = NULL;
80         hammer_buffer_t buffer2 = NULL;
81         hammer_buffer_t buffer3 = NULL;
82         hammer_off_t tmp_offset;
83         hammer_off_t next_offset;
84         hammer_off_t result_offset;
85         hammer_off_t layer1_offset;
86         hammer_off_t layer2_offset;
87         hammer_off_t base_off;
88         int loops = 0;
89         int offset;             /* offset within big-block */
90         int use_hint;
91
92         hmp = trans->hmp;
93
94         /*
95          * Deal with alignment and buffer-boundary issues.
96          *
97          * Be careful, certain primary alignments are used below to allocate
98          * new blockmap blocks.
99          */
100         bytes = (bytes + 15) & ~15;
101         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
103
104         /*
105          * Setup
106          */
107         root_volume = trans->rootvol;
108         *errorp = 0;
109         blockmap = &hmp->blockmap[zone];
110         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112
113         /*
114          * Use the hint if we have one.
115          */
116         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117                 next_offset = (hint + 15) & ~(hammer_off_t)15;
118                 use_hint = 1;
119         } else {
120                 next_offset = blockmap->next_offset;
121                 use_hint = 0;
122         }
123 again:
124
125         /*
126          * use_hint is turned off if we leave the hinted big-block.
127          */
128         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129                 next_offset = blockmap->next_offset;
130                 use_hint = 0;
131         }
132
133         /*
134          * Check for wrap
135          */
136         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137                 if (++loops == 2) {
138                         result_offset = 0;
139                         *errorp = ENOSPC;
140                         goto failed;
141                 }
142                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143         }
144
145         /*
146          * The allocation request may not cross a buffer boundary.  Special
147          * large allocations must not cross a large-block boundary.
148          */
149         tmp_offset = next_offset + bytes - 1;
150         if (bytes <= HAMMER_BUFSIZE) {
151                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153                         goto again;
154                 }
155         } else {
156                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158                         goto again;
159                 }
160         }
161         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
162
163         /*
164          * Dive layer 1.
165          */
166         layer1_offset = freemap->phys_offset +
167                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168
169         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170         if (*errorp) {
171                 result_offset = 0;
172                 goto failed;
173         }
174
175         /*
176          * Check CRC.
177          */
178         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179                 hammer_lock_ex(&hmp->blkmap_lock);
180                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181                         panic("CRC FAILED: LAYER1");
182                 hammer_unlock(&hmp->blkmap_lock);
183         }
184
185         /*
186          * If we are at a big-block boundary and layer1 indicates no 
187          * free big-blocks, then we cannot allocate a new bigblock in
188          * layer2, skip to the next layer1 entry.
189          */
190         if (offset == 0 && layer1->blocks_free == 0) {
191                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
193                 goto again;
194         }
195         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
196
197         /*
198          * Skip this layer1 entry if it is pointing to a layer2 big-block
199          * on a volume that we are currently trying to remove from the
200          * file-system. This is used by the volume-del code together with
201          * the reblocker to free up a volume.
202          */
203         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204             hmp->volume_to_remove) {
205                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
207                 goto again;
208         }
209
210         /*
211          * Dive layer 2, each entry represents a large-block.
212          */
213         layer2_offset = layer1->phys_offset +
214                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
216         if (*errorp) {
217                 result_offset = 0;
218                 goto failed;
219         }
220
221         /*
222          * Check CRC.  This can race another thread holding the lock
223          * and in the middle of modifying layer2.
224          */
225         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
226                 hammer_lock_ex(&hmp->blkmap_lock);
227                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228                         panic("CRC FAILED: LAYER2");
229                 hammer_unlock(&hmp->blkmap_lock);
230         }
231
232         /*
233          * Skip the layer if the zone is owned by someone other then us.
234          */
235         if (layer2->zone && layer2->zone != zone) {
236                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237                 goto again;
238         }
239         if (offset < layer2->append_off) {
240                 next_offset += layer2->append_off - offset;
241                 goto again;
242         }
243
244 #if 0
245         /*
246          * If operating in the current non-hint blockmap block, do not
247          * allow it to get over-full.  Also drop any active hinting so
248          * blockmap->next_offset is updated at the end.
249          *
250          * We do this for B-Tree and meta-data allocations to provide
251          * localization for updates.
252          */
253         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
254              zone == HAMMER_ZONE_META_INDEX) &&
255             offset >= HAMMER_LARGEBLOCK_OVERFILL &&
256             !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
257         ) {
258                 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
259                         next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
260                         use_hint = 0;
261                         goto again;
262                 }
263         }
264 #endif
265
266         /*
267          * We need the lock from this point on.  We have to re-check zone
268          * ownership after acquiring the lock and also check for reservations.
269          */
270         hammer_lock_ex(&hmp->blkmap_lock);
271
272         if (layer2->zone && layer2->zone != zone) {
273                 hammer_unlock(&hmp->blkmap_lock);
274                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
275                 goto again;
276         }
277         if (offset < layer2->append_off) {
278                 hammer_unlock(&hmp->blkmap_lock);
279                 next_offset += layer2->append_off - offset;
280                 goto again;
281         }
282
283         /*
284          * The bigblock might be reserved by another zone.  If it is reserved
285          * by our zone we may have to move next_offset past the append_off.
286          */
287         base_off = (next_offset &
288                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
289                     HAMMER_ZONE_RAW_BUFFER;
290         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
291         if (resv) {
292                 if (resv->zone != zone) {
293                         hammer_unlock(&hmp->blkmap_lock);
294                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
295                                       ~HAMMER_LARGEBLOCK_MASK64;
296                         goto again;
297                 }
298                 if (offset < resv->append_off) {
299                         hammer_unlock(&hmp->blkmap_lock);
300                         next_offset += resv->append_off - offset;
301                         goto again;
302                 }
303                 ++resv->refs;
304         }
305
306         /*
307          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
308          * of the layer for real.  At this point we've validated any
309          * reservation that might exist and can just ignore resv.
310          */
311         if (layer2->zone == 0) {
312                 /*
313                  * Assign the bigblock to our zone
314                  */
315                 hammer_modify_buffer(trans, buffer1,
316                                      layer1, sizeof(*layer1));
317                 --layer1->blocks_free;
318                 layer1->layer1_crc = crc32(layer1,
319                                            HAMMER_LAYER1_CRCSIZE);
320                 hammer_modify_buffer_done(buffer1);
321                 hammer_modify_buffer(trans, buffer2,
322                                      layer2, sizeof(*layer2));
323                 layer2->zone = zone;
324                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
325                 KKASSERT(layer2->append_off == 0);
326                 hammer_modify_volume_field(trans, trans->rootvol,
327                                            vol0_stat_freebigblocks);
328                 --root_volume->ondisk->vol0_stat_freebigblocks;
329                 hmp->copy_stat_freebigblocks =
330                         root_volume->ondisk->vol0_stat_freebigblocks;
331                 hammer_modify_volume_done(trans->rootvol);
332         } else {
333                 hammer_modify_buffer(trans, buffer2,
334                                      layer2, sizeof(*layer2));
335         }
336         KKASSERT(layer2->zone == zone);
337
338         /*
339          * NOTE: bytes_free can legally go negative due to de-dup.
340          */
341         layer2->bytes_free -= bytes;
342         KKASSERT(layer2->append_off <= offset);
343         layer2->append_off = offset + bytes;
344         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
345         hammer_modify_buffer_done(buffer2);
346
347         /*
348          * We hold the blockmap lock and should be the only ones
349          * capable of modifying resv->append_off.  Track the allocation
350          * as appropriate.
351          */
352         KKASSERT(bytes != 0);
353         if (resv) {
354                 KKASSERT(resv->append_off <= offset);
355                 resv->append_off = offset + bytes;
356                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
357                 hammer_blockmap_reserve_complete(hmp, resv);
358         }
359
360         /*
361          * If we are allocating from the base of a new buffer we can avoid
362          * a disk read by calling hammer_bnew().
363          */
364         if ((next_offset & HAMMER_BUFMASK) == 0) {
365                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
366                                 errorp, &buffer3);
367         }
368         result_offset = next_offset;
369
370         /*
371          * If we weren't supplied with a hint or could not use the hint
372          * then we wound up using blockmap->next_offset as the hint and
373          * need to save it.
374          */
375         if (use_hint == 0) {
376                 hammer_modify_volume(NULL, root_volume, NULL, 0);
377                 blockmap->next_offset = next_offset + bytes;
378                 hammer_modify_volume_done(root_volume);
379         }
380         hammer_unlock(&hmp->blkmap_lock);
381 failed:
382
383         /*
384          * Cleanup
385          */
386         if (buffer1)
387                 hammer_rel_buffer(buffer1, 0);
388         if (buffer2)
389                 hammer_rel_buffer(buffer2, 0);
390         if (buffer3)
391                 hammer_rel_buffer(buffer3, 0);
392
393         return(result_offset);
394 }
395
396 /*
397  * Frontend function - Reserve bytes in a zone.
398  *
399  * This code reserves bytes out of a blockmap without committing to any
400  * meta-data modifications, allowing the front-end to directly issue disk
401  * write I/O for large blocks of data
402  *
403  * The backend later finalizes the reservation with hammer_blockmap_finalize()
404  * upon committing the related record.
405  */
406 hammer_reserve_t
407 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
408                         hammer_off_t *zone_offp, int *errorp)
409 {
410         hammer_volume_t root_volume;
411         hammer_blockmap_t blockmap;
412         hammer_blockmap_t freemap;
413         struct hammer_blockmap_layer1 *layer1;
414         struct hammer_blockmap_layer2 *layer2;
415         hammer_buffer_t buffer1 = NULL;
416         hammer_buffer_t buffer2 = NULL;
417         hammer_buffer_t buffer3 = NULL;
418         hammer_off_t tmp_offset;
419         hammer_off_t next_offset;
420         hammer_off_t layer1_offset;
421         hammer_off_t layer2_offset;
422         hammer_off_t base_off;
423         hammer_reserve_t resv;
424         hammer_reserve_t resx;
425         int loops = 0;
426         int offset;
427
428         /*
429          * Setup
430          */
431         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
432         root_volume = hammer_get_root_volume(hmp, errorp);
433         if (*errorp)
434                 return(NULL);
435         blockmap = &hmp->blockmap[zone];
436         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
437         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
438
439         /*
440          * Deal with alignment and buffer-boundary issues.
441          *
442          * Be careful, certain primary alignments are used below to allocate
443          * new blockmap blocks.
444          */
445         bytes = (bytes + 15) & ~15;
446         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
447
448         next_offset = blockmap->next_offset;
449 again:
450         resv = NULL;
451         /*
452          * Check for wrap
453          */
454         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
455                 if (++loops == 2) {
456                         *errorp = ENOSPC;
457                         goto failed;
458                 }
459                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
460         }
461
462         /*
463          * The allocation request may not cross a buffer boundary.  Special
464          * large allocations must not cross a large-block boundary.
465          */
466         tmp_offset = next_offset + bytes - 1;
467         if (bytes <= HAMMER_BUFSIZE) {
468                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
469                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
470                         goto again;
471                 }
472         } else {
473                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
474                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
475                         goto again;
476                 }
477         }
478         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
479
480         /*
481          * Dive layer 1.
482          */
483         layer1_offset = freemap->phys_offset +
484                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
485         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
486         if (*errorp)
487                 goto failed;
488
489         /*
490          * Check CRC.
491          */
492         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
493                 hammer_lock_ex(&hmp->blkmap_lock);
494                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
495                         panic("CRC FAILED: LAYER1");
496                 hammer_unlock(&hmp->blkmap_lock);
497         }
498
499         /*
500          * If we are at a big-block boundary and layer1 indicates no 
501          * free big-blocks, then we cannot allocate a new bigblock in
502          * layer2, skip to the next layer1 entry.
503          */
504         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
505             layer1->blocks_free == 0) {
506                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
507                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
508                 goto again;
509         }
510         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
511
512         /*
513          * Dive layer 2, each entry represents a large-block.
514          */
515         layer2_offset = layer1->phys_offset +
516                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
517         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
518         if (*errorp)
519                 goto failed;
520
521         /*
522          * Check CRC if not allocating into uninitialized space (which we
523          * aren't when reserving space).
524          */
525         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
526                 hammer_lock_ex(&hmp->blkmap_lock);
527                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
528                         panic("CRC FAILED: LAYER2");
529                 hammer_unlock(&hmp->blkmap_lock);
530         }
531
532         /*
533          * Skip the layer if the zone is owned by someone other then us.
534          */
535         if (layer2->zone && layer2->zone != zone) {
536                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
537                 goto again;
538         }
539         if (offset < layer2->append_off) {
540                 next_offset += layer2->append_off - offset;
541                 goto again;
542         }
543
544         /*
545          * We need the lock from this point on.  We have to re-check zone
546          * ownership after acquiring the lock and also check for reservations.
547          */
548         hammer_lock_ex(&hmp->blkmap_lock);
549
550         if (layer2->zone && layer2->zone != zone) {
551                 hammer_unlock(&hmp->blkmap_lock);
552                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
553                 goto again;
554         }
555         if (offset < layer2->append_off) {
556                 hammer_unlock(&hmp->blkmap_lock);
557                 next_offset += layer2->append_off - offset;
558                 goto again;
559         }
560
561         /*
562          * The bigblock might be reserved by another zone.  If it is reserved
563          * by our zone we may have to move next_offset past the append_off.
564          */
565         base_off = (next_offset &
566                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
567                     HAMMER_ZONE_RAW_BUFFER;
568         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
569         if (resv) {
570                 if (resv->zone != zone) {
571                         hammer_unlock(&hmp->blkmap_lock);
572                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
573                                       ~HAMMER_LARGEBLOCK_MASK64;
574                         goto again;
575                 }
576                 if (offset < resv->append_off) {
577                         hammer_unlock(&hmp->blkmap_lock);
578                         next_offset += resv->append_off - offset;
579                         goto again;
580                 }
581                 ++resv->refs;
582                 resx = NULL;
583         } else {
584                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
585                                M_WAITOK | M_ZERO | M_USE_RESERVE);
586                 resx->refs = 1;
587                 resx->zone = zone;
588                 resx->zone_offset = base_off;
589                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
590                         resx->flags |= HAMMER_RESF_LAYER2FREE;
591                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
592                 KKASSERT(resv == NULL);
593                 resv = resx;
594                 ++hammer_count_reservations;
595         }
596         resv->append_off = offset + bytes;
597
598         /*
599          * If we are not reserving a whole buffer but are at the start of
600          * a new block, call hammer_bnew() to avoid a disk read.
601          *
602          * If we are reserving a whole buffer (or more), the caller will
603          * probably use a direct read, so do nothing.
604          *
605          * If we do not have a whole lot of system memory we really can't
606          * afford to block while holding the blkmap_lock!
607          */
608         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
609                 if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE))
610                         hammer_bnew(hmp, next_offset, errorp, &buffer3);
611         }
612
613         /*
614          * Adjust our iterator and alloc_offset.  The layer1 and layer2
615          * space beyond alloc_offset is uninitialized.  alloc_offset must
616          * be big-block aligned.
617          */
618         blockmap->next_offset = next_offset + bytes;
619         hammer_unlock(&hmp->blkmap_lock);
620
621 failed:
622         if (buffer1)
623                 hammer_rel_buffer(buffer1, 0);
624         if (buffer2)
625                 hammer_rel_buffer(buffer2, 0);
626         if (buffer3)
627                 hammer_rel_buffer(buffer3, 0);
628         hammer_rel_volume(root_volume, 0);
629         *zone_offp = next_offset;
630
631         return(resv);
632 }
633
634 /*
635  * Frontend function - Dedup bytes in a zone.
636  *
637  * Dedup reservations work exactly the same as normal write reservations
638  * except we only adjust bytes_free field and don't touch append offset.
639  * Finalization mechanic for dedup reservations is also the same as for
640  * normal write ones - the backend finalizes the reservation with
641  * hammer_blockmap_finalize().
642  */
643 hammer_reserve_t
644 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
645                               hammer_off_t zone_offset, int *errorp)
646 {
647         hammer_volume_t root_volume;
648         hammer_blockmap_t freemap;
649         struct hammer_blockmap_layer1 *layer1;
650         struct hammer_blockmap_layer2 *layer2;
651         hammer_buffer_t buffer1 = NULL;
652         hammer_buffer_t buffer2 = NULL;
653         hammer_off_t layer1_offset;
654         hammer_off_t layer2_offset;
655         hammer_off_t base_off;
656         hammer_reserve_t resv = NULL;
657         hammer_reserve_t resx = NULL;
658
659         /*
660          * Setup
661          */
662         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
663         root_volume = hammer_get_root_volume(hmp, errorp);
664         if (*errorp)
665                 return (NULL);
666         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
667         KKASSERT(freemap->phys_offset != 0);
668
669         bytes = (bytes + 15) & ~15;
670         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
671
672         /*
673          * Dive layer 1.
674          */
675         layer1_offset = freemap->phys_offset +
676                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
677         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
678         if (*errorp)
679                 goto failed;
680
681         /*
682          * Check CRC.
683          */
684         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
685                 hammer_lock_ex(&hmp->blkmap_lock);
686                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
687                         panic("CRC FAILED: LAYER1");
688                 hammer_unlock(&hmp->blkmap_lock);
689         }
690         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
691
692         /*
693          * Dive layer 2, each entry represents a large-block.
694          */
695         layer2_offset = layer1->phys_offset +
696                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
697         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
698         if (*errorp)
699                 goto failed;
700
701         /*
702          * Check CRC.
703          */
704         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
705                 hammer_lock_ex(&hmp->blkmap_lock);
706                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
707                         panic("CRC FAILED: LAYER2");
708                 hammer_unlock(&hmp->blkmap_lock);
709         }
710
711         /*
712          * Fail if the zone is owned by someone other than us.
713          */
714         if (layer2->zone && layer2->zone != zone)
715                 goto failed;
716
717         /*
718          * We need the lock from this point on.  We have to re-check zone
719          * ownership after acquiring the lock and also check for reservations.
720          */
721         hammer_lock_ex(&hmp->blkmap_lock);
722
723         if (layer2->zone && layer2->zone != zone) {
724                 hammer_unlock(&hmp->blkmap_lock);
725                 goto failed;
726         }
727
728         base_off = (zone_offset &
729                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
730                     HAMMER_ZONE_RAW_BUFFER;
731         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
732         if (resv) {
733                 if (resv->zone != zone) {
734                         hammer_unlock(&hmp->blkmap_lock);
735                         resv = NULL;
736                         goto failed;
737                 }
738                 /*
739                  * Due to possible big block underflow we can't simply
740                  * subtract bytes from bytes_free.
741                  */
742                 if (update_bytes_free(resv, bytes) == 0) {
743                         hammer_unlock(&hmp->blkmap_lock);
744                         resv = NULL;
745                         goto failed;
746                 }
747                 ++resv->refs;
748                 resx = NULL;
749         } else {
750                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
751                                M_WAITOK | M_ZERO | M_USE_RESERVE);
752                 resx->refs = 1;
753                 resx->zone = zone;
754                 resx->bytes_free = layer2->bytes_free;
755                 /*
756                  * Due to possible big block underflow we can't simply
757                  * subtract bytes from bytes_free.
758                  */
759                 if (update_bytes_free(resx, bytes) == 0) {
760                         hammer_unlock(&hmp->blkmap_lock);
761                         kfree(resx, hmp->m_misc);
762                         goto failed;
763                 }
764                 resx->zone_offset = base_off;
765                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
766                 KKASSERT(resv == NULL);
767                 resv = resx;
768                 ++hammer_count_reservations;
769         }
770
771         hammer_unlock(&hmp->blkmap_lock);
772
773 failed:
774         if (buffer1)
775                 hammer_rel_buffer(buffer1, 0);
776         if (buffer2)
777                 hammer_rel_buffer(buffer2, 0);
778         hammer_rel_volume(root_volume, 0);
779
780         return(resv);
781 }
782
783 static int
784 update_bytes_free(hammer_reserve_t resv, int bytes)
785 {
786         int32_t temp;
787
788         /*
789          * Big-block underflow check
790          */
791         temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
792         cpu_ccfence(); /* XXX do we really need it ? */
793         if (temp > resv->bytes_free) {
794                 kprintf("BIGBLOCK UNDERFLOW\n");
795                 return (0);
796         }
797
798         resv->bytes_free -= bytes;
799         return (1);
800 }
801
802 /*
803  * Dereference a reservation structure.  Upon the final release the
804  * underlying big-block is checked and if it is entirely free we delete
805  * any related HAMMER buffers to avoid potential conflicts with future
806  * reuse of the big-block.
807  */
808 void
809 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
810 {
811         hammer_off_t base_offset;
812         int error;
813
814         KKASSERT(resv->refs > 0);
815         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
816                  HAMMER_ZONE_RAW_BUFFER);
817
818         /*
819          * Setting append_off to the max prevents any new allocations
820          * from occuring while we are trying to dispose of the reservation,
821          * allowing us to safely delete any related HAMMER buffers.
822          *
823          * If we are unable to clean out all related HAMMER buffers we
824          * requeue the delay.
825          */
826         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
827                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
828                 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
829                 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
830                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
831                         hammer_dedup_cache_inval(hmp, base_offset);
832                 error = hammer_del_buffers(hmp, base_offset,
833                                            resv->zone_offset,
834                                            HAMMER_LARGEBLOCK_SIZE,
835                                            1);
836                 if (hammer_debug_general & 0x20000) {
837                         kprintf("hammer: dellgblk %016jx error %d\n",
838                                 (intmax_t)base_offset, error);
839                 }
840                 if (error)
841                         hammer_reserve_setdelay(hmp, resv);
842         }
843         if (--resv->refs == 0) {
844                 if (hammer_debug_general & 0x20000) {
845                         kprintf("hammer: delresvr %016jx zone %02x\n",
846                                 (intmax_t)resv->zone_offset, resv->zone);
847                 }
848                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
849                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
850                 kfree(resv, hmp->m_misc);
851                 --hammer_count_reservations;
852         }
853 }
854
855 /*
856  * Prevent a potentially free big-block from being reused until after
857  * the related flushes have completely cycled, otherwise crash recovery
858  * could resurrect a data block that was already reused and overwritten.
859  *
860  * The caller might reset the underlying layer2 entry's append_off to 0, so
861  * our covering append_off must be set to max to prevent any reallocation
862  * until after the flush delays complete, not to mention proper invalidation
863  * of any underlying cached blocks.
864  */
865 static void
866 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
867                         int zone, struct hammer_blockmap_layer2 *layer2)
868 {
869         hammer_reserve_t resv;
870
871         /*
872          * Allocate the reservation if necessary.
873          *
874          * NOTE: need lock in future around resv lookup/allocation and
875          * the setdelay call, currently refs is not bumped until the call.
876          */
877 again:
878         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
879         if (resv == NULL) {
880                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
881                                M_WAITOK | M_ZERO | M_USE_RESERVE);
882                 resv->zone = zone;
883                 resv->zone_offset = base_offset;
884                 resv->refs = 0;
885                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
886
887                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
888                         resv->flags |= HAMMER_RESF_LAYER2FREE;
889                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
890                         kfree(resv, hmp->m_misc);
891                         goto again;
892                 }
893                 ++hammer_count_reservations;
894         } else {
895                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
896                         resv->flags |= HAMMER_RESF_LAYER2FREE;
897         }
898         hammer_reserve_setdelay(hmp, resv);
899 }
900
901 /*
902  * Enter the reservation on the on-delay list, or move it if it
903  * is already on the list.
904  */
905 static void
906 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
907 {
908         if (resv->flags & HAMMER_RESF_ONDELAY) {
909                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
910                 resv->flush_group = hmp->flusher.next + 1;
911                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912         } else {
913                 ++resv->refs;
914                 ++hmp->rsv_fromdelay;
915                 resv->flags |= HAMMER_RESF_ONDELAY;
916                 resv->flush_group = hmp->flusher.next + 1;
917                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
918         }
919 }
920
921 /*
922  * Reserve has reached its flush point, remove it from the delay list
923  * and finish it off.  hammer_blockmap_reserve_complete() inherits
924  * the ondelay reference.
925  */
926 void
927 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
928 {
929         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
930         resv->flags &= ~HAMMER_RESF_ONDELAY;
931         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
932         --hmp->rsv_fromdelay;
933         hammer_blockmap_reserve_complete(hmp, resv);
934 }
935
936 /*
937  * Backend function - free (offset, bytes) in a zone.
938  *
939  * XXX error return
940  */
941 void
942 hammer_blockmap_free(hammer_transaction_t trans,
943                      hammer_off_t zone_offset, int bytes)
944 {
945         hammer_mount_t hmp;
946         hammer_volume_t root_volume;
947         hammer_blockmap_t freemap;
948         struct hammer_blockmap_layer1 *layer1;
949         struct hammer_blockmap_layer2 *layer2;
950         hammer_buffer_t buffer1 = NULL;
951         hammer_buffer_t buffer2 = NULL;
952         hammer_off_t layer1_offset;
953         hammer_off_t layer2_offset;
954         hammer_off_t base_off;
955         int error;
956         int zone;
957
958         if (bytes == 0)
959                 return;
960         hmp = trans->hmp;
961
962         /*
963          * Alignment
964          */
965         bytes = (bytes + 15) & ~15;
966         KKASSERT(bytes <= HAMMER_XBUFSIZE);
967         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
968                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
969
970         /*
971          * Basic zone validation & locking
972          */
973         zone = HAMMER_ZONE_DECODE(zone_offset);
974         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
975         root_volume = trans->rootvol;
976         error = 0;
977
978         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
979
980         /*
981          * Dive layer 1.
982          */
983         layer1_offset = freemap->phys_offset +
984                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
985         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
986         if (error)
987                 goto failed;
988         KKASSERT(layer1->phys_offset &&
989                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
990         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
991                 hammer_lock_ex(&hmp->blkmap_lock);
992                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
993                         panic("CRC FAILED: LAYER1");
994                 hammer_unlock(&hmp->blkmap_lock);
995         }
996
997         /*
998          * Dive layer 2, each entry represents a large-block.
999          */
1000         layer2_offset = layer1->phys_offset +
1001                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1002         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1003         if (error)
1004                 goto failed;
1005         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1006                 hammer_lock_ex(&hmp->blkmap_lock);
1007                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1008                         panic("CRC FAILED: LAYER2");
1009                 hammer_unlock(&hmp->blkmap_lock);
1010         }
1011
1012         hammer_lock_ex(&hmp->blkmap_lock);
1013
1014         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1015
1016         /*
1017          * Free space previously allocated via blockmap_alloc().
1018          *
1019          * NOTE: bytes_free can be and remain negative due to de-dup ops
1020          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1021          */
1022         KKASSERT(layer2->zone == zone);
1023         layer2->bytes_free += bytes;
1024         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1025
1026         /*
1027          * If a big-block becomes entirely free we must create a covering
1028          * reservation to prevent premature reuse.  Note, however, that
1029          * the big-block and/or reservation may still have an append_off
1030          * that allows further (non-reused) allocations.
1031          *
1032          * Once the reservation has been made we re-check layer2 and if
1033          * the big-block is still entirely free we reset the layer2 entry.
1034          * The reservation will prevent premature reuse.
1035          *
1036          * NOTE: hammer_buffer's are only invalidated when the reservation
1037          * is completed, if the layer2 entry is still completely free at
1038          * that time.  Any allocations from the reservation that may have
1039          * occured in the mean time, or active references on the reservation
1040          * from new pending allocations, will prevent the invalidation from
1041          * occuring.
1042          */
1043         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1044                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1045
1046                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1047                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1048                         layer2->zone = 0;
1049                         layer2->append_off = 0;
1050                         hammer_modify_buffer(trans, buffer1,
1051                                              layer1, sizeof(*layer1));
1052                         ++layer1->blocks_free;
1053                         layer1->layer1_crc = crc32(layer1,
1054                                                    HAMMER_LAYER1_CRCSIZE);
1055                         hammer_modify_buffer_done(buffer1);
1056                         hammer_modify_volume_field(trans,
1057                                         trans->rootvol,
1058                                         vol0_stat_freebigblocks);
1059                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1060                         hmp->copy_stat_freebigblocks =
1061                            root_volume->ondisk->vol0_stat_freebigblocks;
1062                         hammer_modify_volume_done(trans->rootvol);
1063                 }
1064         }
1065         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1066         hammer_modify_buffer_done(buffer2);
1067         hammer_unlock(&hmp->blkmap_lock);
1068
1069 failed:
1070         if (buffer1)
1071                 hammer_rel_buffer(buffer1, 0);
1072         if (buffer2)
1073                 hammer_rel_buffer(buffer2, 0);
1074 }
1075
1076 int
1077 hammer_blockmap_dedup(hammer_transaction_t trans,
1078                      hammer_off_t zone_offset, int bytes)
1079 {
1080         hammer_mount_t hmp;
1081         hammer_volume_t root_volume;
1082         hammer_blockmap_t freemap;
1083         struct hammer_blockmap_layer1 *layer1;
1084         struct hammer_blockmap_layer2 *layer2;
1085         hammer_buffer_t buffer1 = NULL;
1086         hammer_buffer_t buffer2 = NULL;
1087         hammer_off_t layer1_offset;
1088         hammer_off_t layer2_offset;
1089         int32_t temp;
1090         int error;
1091         int zone;
1092
1093         if (bytes == 0)
1094                 return (0);
1095         hmp = trans->hmp;
1096
1097         /*
1098          * Alignment
1099          */
1100         bytes = (bytes + 15) & ~15;
1101         KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1102         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1103                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
1104
1105         /*
1106          * Basic zone validation & locking
1107          */
1108         zone = HAMMER_ZONE_DECODE(zone_offset);
1109         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1110         root_volume = trans->rootvol;
1111         error = 0;
1112
1113         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1114
1115         /*
1116          * Dive layer 1.
1117          */
1118         layer1_offset = freemap->phys_offset +
1119                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1120         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1121         if (error)
1122                 goto failed;
1123         KKASSERT(layer1->phys_offset &&
1124                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1125         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1126                 hammer_lock_ex(&hmp->blkmap_lock);
1127                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1128                         panic("CRC FAILED: LAYER1");
1129                 hammer_unlock(&hmp->blkmap_lock);
1130         }
1131
1132         /*
1133          * Dive layer 2, each entry represents a large-block.
1134          */
1135         layer2_offset = layer1->phys_offset +
1136                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1137         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1138         if (error)
1139                 goto failed;
1140         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1141                 hammer_lock_ex(&hmp->blkmap_lock);
1142                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1143                         panic("CRC FAILED: LAYER2");
1144                 hammer_unlock(&hmp->blkmap_lock);
1145         }
1146
1147         hammer_lock_ex(&hmp->blkmap_lock);
1148
1149         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1150
1151         /*
1152          * Free space previously allocated via blockmap_alloc().
1153          *
1154          * NOTE: bytes_free can be and remain negative due to de-dup ops
1155          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1156          */
1157         KKASSERT(layer2->zone == zone);
1158         temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1159         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1160         if (temp > layer2->bytes_free) {
1161                 error = ERANGE;
1162                 goto underflow;
1163         }
1164         layer2->bytes_free -= bytes;
1165
1166         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1167
1168         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1169 underflow:
1170         hammer_modify_buffer_done(buffer2);
1171         hammer_unlock(&hmp->blkmap_lock);
1172
1173 failed:
1174         if (buffer1)
1175                 hammer_rel_buffer(buffer1, 0);
1176         if (buffer2)
1177                 hammer_rel_buffer(buffer2, 0);
1178         return (error);
1179 }
1180
1181 /*
1182  * Backend function - finalize (offset, bytes) in a zone.
1183  *
1184  * Allocate space that was previously reserved by the frontend.
1185  */
1186 int
1187 hammer_blockmap_finalize(hammer_transaction_t trans,
1188                          hammer_reserve_t resv,
1189                          hammer_off_t zone_offset, int bytes)
1190 {
1191         hammer_mount_t hmp;
1192         hammer_volume_t root_volume;
1193         hammer_blockmap_t freemap;
1194         struct hammer_blockmap_layer1 *layer1;
1195         struct hammer_blockmap_layer2 *layer2;
1196         hammer_buffer_t buffer1 = NULL;
1197         hammer_buffer_t buffer2 = NULL;
1198         hammer_off_t layer1_offset;
1199         hammer_off_t layer2_offset;
1200         int error;
1201         int zone;
1202         int offset;
1203
1204         if (bytes == 0)
1205                 return(0);
1206         hmp = trans->hmp;
1207
1208         /*
1209          * Alignment
1210          */
1211         bytes = (bytes + 15) & ~15;
1212         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1213
1214         /*
1215          * Basic zone validation & locking
1216          */
1217         zone = HAMMER_ZONE_DECODE(zone_offset);
1218         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1219         root_volume = trans->rootvol;
1220         error = 0;
1221
1222         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1223
1224         /*
1225          * Dive layer 1.
1226          */
1227         layer1_offset = freemap->phys_offset +
1228                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1229         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1230         if (error)
1231                 goto failed;
1232         KKASSERT(layer1->phys_offset &&
1233                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1234         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1235                 hammer_lock_ex(&hmp->blkmap_lock);
1236                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1237                         panic("CRC FAILED: LAYER1");
1238                 hammer_unlock(&hmp->blkmap_lock);
1239         }
1240
1241         /*
1242          * Dive layer 2, each entry represents a large-block.
1243          */
1244         layer2_offset = layer1->phys_offset +
1245                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1246         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1247         if (error)
1248                 goto failed;
1249         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1250                 hammer_lock_ex(&hmp->blkmap_lock);
1251                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1252                         panic("CRC FAILED: LAYER2");
1253                 hammer_unlock(&hmp->blkmap_lock);
1254         }
1255
1256         hammer_lock_ex(&hmp->blkmap_lock);
1257
1258         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1259
1260         /*
1261          * Finalize some or all of the space covered by a current
1262          * reservation.  An allocation in the same layer may have
1263          * already assigned ownership.
1264          */
1265         if (layer2->zone == 0) {
1266                 hammer_modify_buffer(trans, buffer1,
1267                                      layer1, sizeof(*layer1));
1268                 --layer1->blocks_free;
1269                 layer1->layer1_crc = crc32(layer1,
1270                                            HAMMER_LAYER1_CRCSIZE);
1271                 hammer_modify_buffer_done(buffer1);
1272                 layer2->zone = zone;
1273                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1274                 KKASSERT(layer2->append_off == 0);
1275                 hammer_modify_volume_field(trans,
1276                                 trans->rootvol,
1277                                 vol0_stat_freebigblocks);
1278                 --root_volume->ondisk->vol0_stat_freebigblocks;
1279                 hmp->copy_stat_freebigblocks =
1280                    root_volume->ondisk->vol0_stat_freebigblocks;
1281                 hammer_modify_volume_done(trans->rootvol);
1282         }
1283         if (layer2->zone != zone)
1284                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1285         KKASSERT(layer2->zone == zone);
1286         KKASSERT(bytes != 0);
1287         layer2->bytes_free -= bytes;
1288
1289         if (resv) {
1290                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1291         }
1292
1293         /*
1294          * Finalizations can occur out of order, or combined with allocations.
1295          * append_off must be set to the highest allocated offset.
1296          */
1297         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1298         if (layer2->append_off < offset)
1299                 layer2->append_off = offset;
1300
1301         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1302         hammer_modify_buffer_done(buffer2);
1303         hammer_unlock(&hmp->blkmap_lock);
1304
1305 failed:
1306         if (buffer1)
1307                 hammer_rel_buffer(buffer1, 0);
1308         if (buffer2)
1309                 hammer_rel_buffer(buffer2, 0);
1310         return(error);
1311 }
1312
1313 /*
1314  * Return the approximate number of free bytes in the big-block
1315  * containing the specified blockmap offset.
1316  *
1317  * WARNING: A negative number can be returned if data de-dup exists,
1318  *          and the result will also not represent he actual number
1319  *          of free bytes in this case.
1320  *
1321  *          This code is used only by the reblocker.
1322  */
1323 int
1324 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1325                         int *curp, int *errorp)
1326 {
1327         hammer_volume_t root_volume;
1328         hammer_blockmap_t blockmap;
1329         hammer_blockmap_t freemap;
1330         struct hammer_blockmap_layer1 *layer1;
1331         struct hammer_blockmap_layer2 *layer2;
1332         hammer_buffer_t buffer = NULL;
1333         hammer_off_t layer1_offset;
1334         hammer_off_t layer2_offset;
1335         int32_t bytes;
1336         int zone;
1337
1338         zone = HAMMER_ZONE_DECODE(zone_offset);
1339         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1340         root_volume = hammer_get_root_volume(hmp, errorp);
1341         if (*errorp) {
1342                 *curp = 0;
1343                 return(0);
1344         }
1345         blockmap = &hmp->blockmap[zone];
1346         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1347
1348         /*
1349          * Dive layer 1.
1350          */
1351         layer1_offset = freemap->phys_offset +
1352                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1353         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1354         if (*errorp) {
1355                 bytes = 0;
1356                 goto failed;
1357         }
1358         KKASSERT(layer1->phys_offset);
1359         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1360                 hammer_lock_ex(&hmp->blkmap_lock);
1361                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1362                         panic("CRC FAILED: LAYER1");
1363                 hammer_unlock(&hmp->blkmap_lock);
1364         }
1365
1366         /*
1367          * Dive layer 2, each entry represents a large-block.
1368          *
1369          * (reuse buffer, layer1 pointer becomes invalid)
1370          */
1371         layer2_offset = layer1->phys_offset +
1372                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1373         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1374         if (*errorp) {
1375                 bytes = 0;
1376                 goto failed;
1377         }
1378         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1379                 hammer_lock_ex(&hmp->blkmap_lock);
1380                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1381                         panic("CRC FAILED: LAYER2");
1382                 hammer_unlock(&hmp->blkmap_lock);
1383         }
1384         KKASSERT(layer2->zone == zone);
1385
1386         bytes = layer2->bytes_free;
1387
1388         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1389                 *curp = 0;
1390         else
1391                 *curp = 1;
1392 failed:
1393         if (buffer)
1394                 hammer_rel_buffer(buffer, 0);
1395         hammer_rel_volume(root_volume, 0);
1396         if (hammer_debug_general & 0x0800) {
1397                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1398                         (long long)zone_offset, bytes);
1399         }
1400         return(bytes);
1401 }
1402
1403
1404 /*
1405  * Lookup a blockmap offset.
1406  */
1407 hammer_off_t
1408 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1409                        int *errorp)
1410 {
1411         hammer_volume_t root_volume;
1412         hammer_blockmap_t freemap;
1413         struct hammer_blockmap_layer1 *layer1;
1414         struct hammer_blockmap_layer2 *layer2;
1415         hammer_buffer_t buffer = NULL;
1416         hammer_off_t layer1_offset;
1417         hammer_off_t layer2_offset;
1418         hammer_off_t result_offset;
1419         hammer_off_t base_off;
1420         hammer_reserve_t resv;
1421         int zone;
1422
1423         /*
1424          * Calculate the zone-2 offset.
1425          */
1426         zone = HAMMER_ZONE_DECODE(zone_offset);
1427         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1428
1429         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1430                         HAMMER_ZONE_RAW_BUFFER;
1431
1432         /*
1433          * We can actually stop here, normal blockmaps are now direct-mapped
1434          * onto the freemap and so represent zone-2 addresses.
1435          */
1436         if (hammer_verify_zone == 0) {
1437                 *errorp = 0;
1438                 return(result_offset);
1439         }
1440
1441         /*
1442          * Validate the allocation zone
1443          */
1444         root_volume = hammer_get_root_volume(hmp, errorp);
1445         if (*errorp)
1446                 return(0);
1447         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1448         KKASSERT(freemap->phys_offset != 0);
1449
1450         /*
1451          * Dive layer 1.
1452          */
1453         layer1_offset = freemap->phys_offset +
1454                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1455         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1456         if (*errorp)
1457                 goto failed;
1458         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1459         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1460                 hammer_lock_ex(&hmp->blkmap_lock);
1461                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1462                         panic("CRC FAILED: LAYER1");
1463                 hammer_unlock(&hmp->blkmap_lock);
1464         }
1465
1466         /*
1467          * Dive layer 2, each entry represents a large-block.
1468          */
1469         layer2_offset = layer1->phys_offset +
1470                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1471         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1472
1473         if (*errorp)
1474                 goto failed;
1475         if (layer2->zone == 0) {
1476                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1477                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1478                                  base_off);
1479                 KKASSERT(resv && resv->zone == zone);
1480
1481         } else if (layer2->zone != zone) {
1482                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1483                         layer2->zone, zone);
1484         }
1485         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1486                 hammer_lock_ex(&hmp->blkmap_lock);
1487                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1488                         panic("CRC FAILED: LAYER2");
1489                 hammer_unlock(&hmp->blkmap_lock);
1490         }
1491
1492 failed:
1493         if (buffer)
1494                 hammer_rel_buffer(buffer, 0);
1495         hammer_rel_volume(root_volume, 0);
1496         if (hammer_debug_general & 0x0800) {
1497                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1498                         (long long)zone_offset, (long long)result_offset);
1499         }
1500         return(result_offset);
1501 }
1502
1503
1504 /*
1505  * Check space availability
1506  *
1507  * MPSAFE - does not require fs_token
1508  */
1509 int
1510 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1511 {
1512         const int in_size = sizeof(struct hammer_inode_data) +
1513                             sizeof(union hammer_btree_elm);
1514         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1515         int64_t usedbytes;
1516
1517         usedbytes = hmp->rsv_inodes * in_size +
1518                     hmp->rsv_recs * rec_size +
1519                     hmp->rsv_databytes +
1520                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1521                     ((int64_t)hidirtybufspace << 2) +
1522                     (slop << HAMMER_LARGEBLOCK_BITS);
1523
1524         hammer_count_extra_space_used = usedbytes;      /* debugging */
1525         if (resp)
1526                 *resp = usedbytes;
1527
1528         if (hmp->copy_stat_freebigblocks >=
1529             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1530                 return(0);
1531         }
1532         return (ENOSPC);
1533 }
1534