HAMMER - Add live dedup sysctl and support
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53              hammer_res_rb_compare, hammer_off_t, zone_offset);
54
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58         if (res1->zone_offset < res2->zone_offset)
59                 return(-1);
60         if (res1->zone_offset > res2->zone_offset)
61                 return(1);
62         return(0);
63 }
64
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70                       hammer_off_t hint, int *errorp)
71 {
72         hammer_mount_t hmp;
73         hammer_volume_t root_volume;
74         hammer_blockmap_t blockmap;
75         hammer_blockmap_t freemap;
76         hammer_reserve_t resv;
77         struct hammer_blockmap_layer1 *layer1;
78         struct hammer_blockmap_layer2 *layer2;
79         hammer_buffer_t buffer1 = NULL;
80         hammer_buffer_t buffer2 = NULL;
81         hammer_buffer_t buffer3 = NULL;
82         hammer_off_t tmp_offset;
83         hammer_off_t next_offset;
84         hammer_off_t result_offset;
85         hammer_off_t layer1_offset;
86         hammer_off_t layer2_offset;
87         hammer_off_t base_off;
88         int loops = 0;
89         int offset;             /* offset within big-block */
90         int use_hint;
91
92         hmp = trans->hmp;
93
94         /*
95          * Deal with alignment and buffer-boundary issues.
96          *
97          * Be careful, certain primary alignments are used below to allocate
98          * new blockmap blocks.
99          */
100         bytes = (bytes + 15) & ~15;
101         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
103
104         /*
105          * Setup
106          */
107         root_volume = trans->rootvol;
108         *errorp = 0;
109         blockmap = &hmp->blockmap[zone];
110         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112
113         /*
114          * Use the hint if we have one.
115          */
116         if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117                 next_offset = (hint + 15) & ~(hammer_off_t)15;
118                 use_hint = 1;
119         } else {
120                 next_offset = blockmap->next_offset;
121                 use_hint = 0;
122         }
123 again:
124
125         /*
126          * use_hint is turned off if we leave the hinted big-block.
127          */
128         if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129                 next_offset = blockmap->next_offset;
130                 use_hint = 0;
131         }
132
133         /*
134          * Check for wrap
135          */
136         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137                 if (++loops == 2) {
138                         result_offset = 0;
139                         *errorp = ENOSPC;
140                         goto failed;
141                 }
142                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143         }
144
145         /*
146          * The allocation request may not cross a buffer boundary.  Special
147          * large allocations must not cross a large-block boundary.
148          */
149         tmp_offset = next_offset + bytes - 1;
150         if (bytes <= HAMMER_BUFSIZE) {
151                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153                         goto again;
154                 }
155         } else {
156                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158                         goto again;
159                 }
160         }
161         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
162
163         /*
164          * Dive layer 1.
165          */
166         layer1_offset = freemap->phys_offset +
167                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168
169         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170         if (*errorp) {
171                 result_offset = 0;
172                 goto failed;
173         }
174
175         /*
176          * Check CRC.
177          */
178         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179                 hammer_lock_ex(&hmp->blkmap_lock);
180                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181                         panic("CRC FAILED: LAYER1");
182                 hammer_unlock(&hmp->blkmap_lock);
183         }
184
185         /*
186          * If we are at a big-block boundary and layer1 indicates no 
187          * free big-blocks, then we cannot allocate a new bigblock in
188          * layer2, skip to the next layer1 entry.
189          */
190         if (offset == 0 && layer1->blocks_free == 0) {
191                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
193                 goto again;
194         }
195         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
196
197         /*
198          * Skip this layer1 entry if it is pointing to a layer2 big-block
199          * on a volume that we are currently trying to remove from the
200          * file-system. This is used by the volume-del code together with
201          * the reblocker to free up a volume.
202          */
203         if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204             hmp->volume_to_remove) {
205                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
207                 goto again;
208         }
209
210         /*
211          * Dive layer 2, each entry represents a large-block.
212          */
213         layer2_offset = layer1->phys_offset +
214                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
216         if (*errorp) {
217                 result_offset = 0;
218                 goto failed;
219         }
220
221         /*
222          * Check CRC.  This can race another thread holding the lock
223          * and in the middle of modifying layer2.
224          */
225         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
226                 hammer_lock_ex(&hmp->blkmap_lock);
227                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228                         panic("CRC FAILED: LAYER2");
229                 hammer_unlock(&hmp->blkmap_lock);
230         }
231
232         /*
233          * Skip the layer if the zone is owned by someone other then us.
234          */
235         if (layer2->zone && layer2->zone != zone) {
236                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237                 goto again;
238         }
239         if (offset < layer2->append_off) {
240                 next_offset += layer2->append_off - offset;
241                 goto again;
242         }
243
244         /*
245          * If operating in the current non-hint blockmap block, do not
246          * allow it to get over-full.  Also drop any active hinting so
247          * blockmap->next_offset is updated at the end.
248          *
249          * We do this for B-Tree and meta-data allocations to provide
250          * localization for updates.
251          */
252         if ((zone == HAMMER_ZONE_BTREE_INDEX ||
253              zone == HAMMER_ZONE_META_INDEX) &&
254             offset >= HAMMER_LARGEBLOCK_OVERFILL &&
255             !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
256         ) {
257                 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
258                         next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
259                         use_hint = 0;
260                         goto again;
261                 }
262         }
263
264         /*
265          * We need the lock from this point on.  We have to re-check zone
266          * ownership after acquiring the lock and also check for reservations.
267          */
268         hammer_lock_ex(&hmp->blkmap_lock);
269
270         if (layer2->zone && layer2->zone != zone) {
271                 hammer_unlock(&hmp->blkmap_lock);
272                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
273                 goto again;
274         }
275         if (offset < layer2->append_off) {
276                 hammer_unlock(&hmp->blkmap_lock);
277                 next_offset += layer2->append_off - offset;
278                 goto again;
279         }
280
281         /*
282          * The bigblock might be reserved by another zone.  If it is reserved
283          * by our zone we may have to move next_offset past the append_off.
284          */
285         base_off = (next_offset &
286                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
287                     HAMMER_ZONE_RAW_BUFFER;
288         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
289         if (resv) {
290                 if (resv->zone != zone) {
291                         hammer_unlock(&hmp->blkmap_lock);
292                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
293                                       ~HAMMER_LARGEBLOCK_MASK64;
294                         goto again;
295                 }
296                 if (offset < resv->append_off) {
297                         hammer_unlock(&hmp->blkmap_lock);
298                         next_offset += resv->append_off - offset;
299                         goto again;
300                 }
301                 ++resv->refs;
302         }
303
304         /*
305          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
306          * of the layer for real.  At this point we've validated any
307          * reservation that might exist and can just ignore resv.
308          */
309         if (layer2->zone == 0) {
310                 /*
311                  * Assign the bigblock to our zone
312                  */
313                 hammer_modify_buffer(trans, buffer1,
314                                      layer1, sizeof(*layer1));
315                 --layer1->blocks_free;
316                 layer1->layer1_crc = crc32(layer1,
317                                            HAMMER_LAYER1_CRCSIZE);
318                 hammer_modify_buffer_done(buffer1);
319                 hammer_modify_buffer(trans, buffer2,
320                                      layer2, sizeof(*layer2));
321                 layer2->zone = zone;
322                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
323                 KKASSERT(layer2->append_off == 0);
324                 hammer_modify_volume_field(trans, trans->rootvol,
325                                            vol0_stat_freebigblocks);
326                 --root_volume->ondisk->vol0_stat_freebigblocks;
327                 hmp->copy_stat_freebigblocks =
328                         root_volume->ondisk->vol0_stat_freebigblocks;
329                 hammer_modify_volume_done(trans->rootvol);
330         } else {
331                 hammer_modify_buffer(trans, buffer2,
332                                      layer2, sizeof(*layer2));
333         }
334         KKASSERT(layer2->zone == zone);
335
336         /*
337          * NOTE: bytes_free can legally go negative due to de-dup.
338          */
339         layer2->bytes_free -= bytes;
340         KKASSERT(layer2->append_off <= offset);
341         layer2->append_off = offset + bytes;
342         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
343         hammer_modify_buffer_done(buffer2);
344
345         /*
346          * We hold the blockmap lock and should be the only ones
347          * capable of modifying resv->append_off.  Track the allocation
348          * as appropriate.
349          */
350         KKASSERT(bytes != 0);
351         if (resv) {
352                 KKASSERT(resv->append_off <= offset);
353                 resv->append_off = offset + bytes;
354                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
355                 hammer_blockmap_reserve_complete(hmp, resv);
356         }
357
358         /*
359          * If we are allocating from the base of a new buffer we can avoid
360          * a disk read by calling hammer_bnew().
361          */
362         if ((next_offset & HAMMER_BUFMASK) == 0) {
363                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
364                                 errorp, &buffer3);
365         }
366         result_offset = next_offset;
367
368         /*
369          * If we weren't supplied with a hint or could not use the hint
370          * then we wound up using blockmap->next_offset as the hint and
371          * need to save it.
372          */
373         if (use_hint == 0) {
374                 hammer_modify_volume(NULL, root_volume, NULL, 0);
375                 blockmap->next_offset = next_offset + bytes;
376                 hammer_modify_volume_done(root_volume);
377         }
378         hammer_unlock(&hmp->blkmap_lock);
379 failed:
380
381         /*
382          * Cleanup
383          */
384         if (buffer1)
385                 hammer_rel_buffer(buffer1, 0);
386         if (buffer2)
387                 hammer_rel_buffer(buffer2, 0);
388         if (buffer3)
389                 hammer_rel_buffer(buffer3, 0);
390
391         return(result_offset);
392 }
393
394 /*
395  * Frontend function - Reserve bytes in a zone.
396  *
397  * This code reserves bytes out of a blockmap without committing to any
398  * meta-data modifications, allowing the front-end to directly issue disk
399  * write I/O for large blocks of data
400  *
401  * The backend later finalizes the reservation with hammer_blockmap_finalize()
402  * upon committing the related record.
403  */
404 hammer_reserve_t
405 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
406                         hammer_off_t *zone_offp, int *errorp)
407 {
408         hammer_volume_t root_volume;
409         hammer_blockmap_t blockmap;
410         hammer_blockmap_t freemap;
411         struct hammer_blockmap_layer1 *layer1;
412         struct hammer_blockmap_layer2 *layer2;
413         hammer_buffer_t buffer1 = NULL;
414         hammer_buffer_t buffer2 = NULL;
415         hammer_buffer_t buffer3 = NULL;
416         hammer_off_t tmp_offset;
417         hammer_off_t next_offset;
418         hammer_off_t layer1_offset;
419         hammer_off_t layer2_offset;
420         hammer_off_t base_off;
421         hammer_reserve_t resv;
422         hammer_reserve_t resx;
423         int loops = 0;
424         int offset;
425
426         /*
427          * Setup
428          */
429         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
430         root_volume = hammer_get_root_volume(hmp, errorp);
431         if (*errorp)
432                 return(NULL);
433         blockmap = &hmp->blockmap[zone];
434         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
435         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
436
437         /*
438          * Deal with alignment and buffer-boundary issues.
439          *
440          * Be careful, certain primary alignments are used below to allocate
441          * new blockmap blocks.
442          */
443         bytes = (bytes + 15) & ~15;
444         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
445
446         next_offset = blockmap->next_offset;
447 again:
448         resv = NULL;
449         /*
450          * Check for wrap
451          */
452         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
453                 if (++loops == 2) {
454                         *errorp = ENOSPC;
455                         goto failed;
456                 }
457                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
458         }
459
460         /*
461          * The allocation request may not cross a buffer boundary.  Special
462          * large allocations must not cross a large-block boundary.
463          */
464         tmp_offset = next_offset + bytes - 1;
465         if (bytes <= HAMMER_BUFSIZE) {
466                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
467                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
468                         goto again;
469                 }
470         } else {
471                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
472                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
473                         goto again;
474                 }
475         }
476         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
477
478         /*
479          * Dive layer 1.
480          */
481         layer1_offset = freemap->phys_offset +
482                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
483         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
484         if (*errorp)
485                 goto failed;
486
487         /*
488          * Check CRC.
489          */
490         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
491                 hammer_lock_ex(&hmp->blkmap_lock);
492                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
493                         panic("CRC FAILED: LAYER1");
494                 hammer_unlock(&hmp->blkmap_lock);
495         }
496
497         /*
498          * If we are at a big-block boundary and layer1 indicates no 
499          * free big-blocks, then we cannot allocate a new bigblock in
500          * layer2, skip to the next layer1 entry.
501          */
502         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
503             layer1->blocks_free == 0) {
504                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
505                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
506                 goto again;
507         }
508         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
509
510         /*
511          * Dive layer 2, each entry represents a large-block.
512          */
513         layer2_offset = layer1->phys_offset +
514                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
515         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
516         if (*errorp)
517                 goto failed;
518
519         /*
520          * Check CRC if not allocating into uninitialized space (which we
521          * aren't when reserving space).
522          */
523         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
524                 hammer_lock_ex(&hmp->blkmap_lock);
525                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
526                         panic("CRC FAILED: LAYER2");
527                 hammer_unlock(&hmp->blkmap_lock);
528         }
529
530         /*
531          * Skip the layer if the zone is owned by someone other then us.
532          */
533         if (layer2->zone && layer2->zone != zone) {
534                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
535                 goto again;
536         }
537         if (offset < layer2->append_off) {
538                 next_offset += layer2->append_off - offset;
539                 goto again;
540         }
541
542         /*
543          * We need the lock from this point on.  We have to re-check zone
544          * ownership after acquiring the lock and also check for reservations.
545          */
546         hammer_lock_ex(&hmp->blkmap_lock);
547
548         if (layer2->zone && layer2->zone != zone) {
549                 hammer_unlock(&hmp->blkmap_lock);
550                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
551                 goto again;
552         }
553         if (offset < layer2->append_off) {
554                 hammer_unlock(&hmp->blkmap_lock);
555                 next_offset += layer2->append_off - offset;
556                 goto again;
557         }
558
559         /*
560          * The bigblock might be reserved by another zone.  If it is reserved
561          * by our zone we may have to move next_offset past the append_off.
562          */
563         base_off = (next_offset &
564                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
565                     HAMMER_ZONE_RAW_BUFFER;
566         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
567         if (resv) {
568                 if (resv->zone != zone) {
569                         hammer_unlock(&hmp->blkmap_lock);
570                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
571                                       ~HAMMER_LARGEBLOCK_MASK64;
572                         goto again;
573                 }
574                 if (offset < resv->append_off) {
575                         hammer_unlock(&hmp->blkmap_lock);
576                         next_offset += resv->append_off - offset;
577                         goto again;
578                 }
579                 ++resv->refs;
580                 resx = NULL;
581         } else {
582                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
583                                M_WAITOK | M_ZERO | M_USE_RESERVE);
584                 resx->refs = 1;
585                 resx->zone = zone;
586                 resx->zone_offset = base_off;
587                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
588                         resx->flags |= HAMMER_RESF_LAYER2FREE;
589                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
590                 KKASSERT(resv == NULL);
591                 resv = resx;
592                 ++hammer_count_reservations;
593         }
594         resv->append_off = offset + bytes;
595
596         /*
597          * If we are not reserving a whole buffer but are at the start of
598          * a new block, call hammer_bnew() to avoid a disk read.
599          *
600          * If we are reserving a whole buffer (or more), the caller will
601          * probably use a direct read, so do nothing.
602          */
603         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
604                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
605         }
606
607         /*
608          * Adjust our iterator and alloc_offset.  The layer1 and layer2
609          * space beyond alloc_offset is uninitialized.  alloc_offset must
610          * be big-block aligned.
611          */
612         blockmap->next_offset = next_offset + bytes;
613         hammer_unlock(&hmp->blkmap_lock);
614
615 failed:
616         if (buffer1)
617                 hammer_rel_buffer(buffer1, 0);
618         if (buffer2)
619                 hammer_rel_buffer(buffer2, 0);
620         if (buffer3)
621                 hammer_rel_buffer(buffer3, 0);
622         hammer_rel_volume(root_volume, 0);
623         *zone_offp = next_offset;
624
625         return(resv);
626 }
627
628 /*
629  * Frontend function - Dedup bytes in a zone.
630  *
631  * Dedup reservations work exactly the same as normal write reservations
632  * except we only adjust bytes_free field and don't touch append offset.
633  * Finalization mechanic for dedup reservations is also the same as for
634  * normal write ones - the backend finalizes the reservation with
635  * hammer_blockmap_finalize().
636  */
637 hammer_reserve_t
638 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
639                               hammer_off_t zone_offset, int *errorp)
640 {
641         hammer_volume_t root_volume;
642         hammer_blockmap_t freemap;
643         struct hammer_blockmap_layer1 *layer1;
644         struct hammer_blockmap_layer2 *layer2;
645         hammer_buffer_t buffer1 = NULL;
646         hammer_buffer_t buffer2 = NULL;
647         hammer_off_t layer1_offset;
648         hammer_off_t layer2_offset;
649         hammer_off_t base_off;
650         hammer_reserve_t resv = NULL;
651         hammer_reserve_t resx = NULL;
652
653         /*
654          * Setup
655          */
656         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
657         root_volume = hammer_get_root_volume(hmp, errorp);
658         if (*errorp)
659                 return (NULL);
660         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
661         KKASSERT(freemap->phys_offset != 0);
662
663         bytes = (bytes + 15) & ~15;
664         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
665
666         /*
667          * Dive layer 1.
668          */
669         layer1_offset = freemap->phys_offset +
670                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
671         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
672         if (*errorp)
673                 goto failed;
674
675         /*
676          * Check CRC.
677          */
678         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
679                 hammer_lock_ex(&hmp->blkmap_lock);
680                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
681                         panic("CRC FAILED: LAYER1");
682                 hammer_unlock(&hmp->blkmap_lock);
683         }
684         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
685
686         /*
687          * Dive layer 2, each entry represents a large-block.
688          */
689         layer2_offset = layer1->phys_offset +
690                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
691         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
692         if (*errorp)
693                 goto failed;
694
695         /*
696          * Check CRC.
697          */
698         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
699                 hammer_lock_ex(&hmp->blkmap_lock);
700                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
701                         panic("CRC FAILED: LAYER2");
702                 hammer_unlock(&hmp->blkmap_lock);
703         }
704
705         /*
706          * Fail if the zone is owned by someone other than us.
707          */
708         if (layer2->zone && layer2->zone != zone)
709                 goto failed;
710
711         /*
712          * We need the lock from this point on.  We have to re-check zone
713          * ownership after acquiring the lock and also check for reservations.
714          */
715         hammer_lock_ex(&hmp->blkmap_lock);
716
717         if (layer2->zone && layer2->zone != zone) {
718                 hammer_unlock(&hmp->blkmap_lock);
719                 goto failed;
720         }
721
722         base_off = (zone_offset &
723                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
724                     HAMMER_ZONE_RAW_BUFFER;
725         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
726         if (resv) {
727                 if (resv->zone != zone) {
728                         hammer_unlock(&hmp->blkmap_lock);
729                         resv = NULL;
730                         goto failed;
731                 }
732                 /*
733                  * Due to possible big block underflow we can't simply
734                  * subtract bytes from bytes_free.
735                  */
736                 if (update_bytes_free(resv, bytes) == 0) {
737                         hammer_unlock(&hmp->blkmap_lock);
738                         resv = NULL;
739                         goto failed;
740                 }
741                 ++resv->refs;
742                 resx = NULL;
743         } else {
744                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
745                                M_WAITOK | M_ZERO | M_USE_RESERVE);
746                 resx->refs = 1;
747                 resx->zone = zone;
748                 resx->bytes_free = layer2->bytes_free;
749                 /*
750                  * Due to possible big block underflow we can't simply
751                  * subtract bytes from bytes_free.
752                  */
753                 if (update_bytes_free(resx, bytes) == 0) {
754                         hammer_unlock(&hmp->blkmap_lock);
755                         kfree(resx, hmp->m_misc);
756                         goto failed;
757                 }
758                 resx->zone_offset = base_off;
759                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
760                 KKASSERT(resv == NULL);
761                 resv = resx;
762                 ++hammer_count_reservations;
763         }
764
765         hammer_unlock(&hmp->blkmap_lock);
766
767 failed:
768         if (buffer1)
769                 hammer_rel_buffer(buffer1, 0);
770         if (buffer2)
771                 hammer_rel_buffer(buffer2, 0);
772         hammer_rel_volume(root_volume, 0);
773
774         return(resv);
775 }
776
777 static int
778 update_bytes_free(hammer_reserve_t resv, int bytes)
779 {
780         int32_t temp;
781
782         /*
783          * Big-block underflow check
784          */
785         temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
786         cpu_ccfence(); /* XXX do we really need it ? */
787         if (temp > resv->bytes_free) {
788                 kprintf("BIGBLOCK UNDERFLOW\n");
789                 return (0);
790         }
791
792         resv->bytes_free -= bytes;
793         return (1);
794 }
795
796 /*
797  * Dereference a reservation structure.  Upon the final release the
798  * underlying big-block is checked and if it is entirely free we delete
799  * any related HAMMER buffers to avoid potential conflicts with future
800  * reuse of the big-block.
801  */
802 void
803 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
804 {
805         hammer_off_t base_offset;
806         int error;
807
808         KKASSERT(resv->refs > 0);
809         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
810                  HAMMER_ZONE_RAW_BUFFER);
811
812         /*
813          * Setting append_off to the max prevents any new allocations
814          * from occuring while we are trying to dispose of the reservation,
815          * allowing us to safely delete any related HAMMER buffers.
816          *
817          * If we are unable to clean out all related HAMMER buffers we
818          * requeue the delay.
819          */
820         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
821                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
822                 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
823                 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
824                 if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
825                         hammer_dedup_cache_inval(hmp, base_offset);
826                 error = hammer_del_buffers(hmp, base_offset,
827                                            resv->zone_offset,
828                                            HAMMER_LARGEBLOCK_SIZE,
829                                            1);
830                 if (hammer_debug_general & 0x20000) {
831                         kprintf("hammer: dellgblk %016jx error %d\n",
832                                 (intmax_t)base_offset, error);
833                 }
834                 if (error)
835                         hammer_reserve_setdelay(hmp, resv);
836         }
837         if (--resv->refs == 0) {
838                 if (hammer_debug_general & 0x20000) {
839                         kprintf("hammer: delresvr %016jx zone %02x\n",
840                                 (intmax_t)resv->zone_offset, resv->zone);
841                 }
842                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
843                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
844                 kfree(resv, hmp->m_misc);
845                 --hammer_count_reservations;
846         }
847 }
848
849 /*
850  * Prevent a potentially free big-block from being reused until after
851  * the related flushes have completely cycled, otherwise crash recovery
852  * could resurrect a data block that was already reused and overwritten.
853  *
854  * The caller might reset the underlying layer2 entry's append_off to 0, so
855  * our covering append_off must be set to max to prevent any reallocation
856  * until after the flush delays complete, not to mention proper invalidation
857  * of any underlying cached blocks.
858  */
859 static void
860 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
861                         int zone, struct hammer_blockmap_layer2 *layer2)
862 {
863         hammer_reserve_t resv;
864
865         /*
866          * Allocate the reservation if necessary.
867          *
868          * NOTE: need lock in future around resv lookup/allocation and
869          * the setdelay call, currently refs is not bumped until the call.
870          */
871 again:
872         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
873         if (resv == NULL) {
874                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
875                                M_WAITOK | M_ZERO | M_USE_RESERVE);
876                 resv->zone = zone;
877                 resv->zone_offset = base_offset;
878                 resv->refs = 0;
879                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
880
881                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
882                         resv->flags |= HAMMER_RESF_LAYER2FREE;
883                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
884                         kfree(resv, hmp->m_misc);
885                         goto again;
886                 }
887                 ++hammer_count_reservations;
888         } else {
889                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
890                         resv->flags |= HAMMER_RESF_LAYER2FREE;
891         }
892         hammer_reserve_setdelay(hmp, resv);
893 }
894
895 /*
896  * Enter the reservation on the on-delay list, or move it if it
897  * is already on the list.
898  */
899 static void
900 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
901 {
902         if (resv->flags & HAMMER_RESF_ONDELAY) {
903                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
904                 resv->flush_group = hmp->flusher.next + 1;
905                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
906         } else {
907                 ++resv->refs;
908                 ++hmp->rsv_fromdelay;
909                 resv->flags |= HAMMER_RESF_ONDELAY;
910                 resv->flush_group = hmp->flusher.next + 1;
911                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912         }
913 }
914
915 /*
916  * Reserve has reached its flush point, remove it from the delay list
917  * and finish it off.  hammer_blockmap_reserve_complete() inherits
918  * the ondelay reference.
919  */
920 void
921 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
922 {
923         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
924         resv->flags &= ~HAMMER_RESF_ONDELAY;
925         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
926         --hmp->rsv_fromdelay;
927         hammer_blockmap_reserve_complete(hmp, resv);
928 }
929
930 /*
931  * Backend function - free (offset, bytes) in a zone.
932  *
933  * XXX error return
934  */
935 void
936 hammer_blockmap_free(hammer_transaction_t trans,
937                      hammer_off_t zone_offset, int bytes)
938 {
939         hammer_mount_t hmp;
940         hammer_volume_t root_volume;
941         hammer_blockmap_t freemap;
942         struct hammer_blockmap_layer1 *layer1;
943         struct hammer_blockmap_layer2 *layer2;
944         hammer_buffer_t buffer1 = NULL;
945         hammer_buffer_t buffer2 = NULL;
946         hammer_off_t layer1_offset;
947         hammer_off_t layer2_offset;
948         hammer_off_t base_off;
949         int error;
950         int zone;
951
952         if (bytes == 0)
953                 return;
954         hmp = trans->hmp;
955
956         /*
957          * Alignment
958          */
959         bytes = (bytes + 15) & ~15;
960         KKASSERT(bytes <= HAMMER_XBUFSIZE);
961         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
962                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
963
964         /*
965          * Basic zone validation & locking
966          */
967         zone = HAMMER_ZONE_DECODE(zone_offset);
968         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
969         root_volume = trans->rootvol;
970         error = 0;
971
972         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
973
974         /*
975          * Dive layer 1.
976          */
977         layer1_offset = freemap->phys_offset +
978                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
979         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
980         if (error)
981                 goto failed;
982         KKASSERT(layer1->phys_offset &&
983                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
984         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
985                 hammer_lock_ex(&hmp->blkmap_lock);
986                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
987                         panic("CRC FAILED: LAYER1");
988                 hammer_unlock(&hmp->blkmap_lock);
989         }
990
991         /*
992          * Dive layer 2, each entry represents a large-block.
993          */
994         layer2_offset = layer1->phys_offset +
995                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
996         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
997         if (error)
998                 goto failed;
999         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1000                 hammer_lock_ex(&hmp->blkmap_lock);
1001                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1002                         panic("CRC FAILED: LAYER2");
1003                 hammer_unlock(&hmp->blkmap_lock);
1004         }
1005
1006         hammer_lock_ex(&hmp->blkmap_lock);
1007
1008         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1009
1010         /*
1011          * Free space previously allocated via blockmap_alloc().
1012          *
1013          * NOTE: bytes_free can be and remain negative due to de-dup ops
1014          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1015          */
1016         KKASSERT(layer2->zone == zone);
1017         layer2->bytes_free += bytes;
1018         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1019
1020         /*
1021          * If a big-block becomes entirely free we must create a covering
1022          * reservation to prevent premature reuse.  Note, however, that
1023          * the big-block and/or reservation may still have an append_off
1024          * that allows further (non-reused) allocations.
1025          *
1026          * Once the reservation has been made we re-check layer2 and if
1027          * the big-block is still entirely free we reset the layer2 entry.
1028          * The reservation will prevent premature reuse.
1029          *
1030          * NOTE: hammer_buffer's are only invalidated when the reservation
1031          * is completed, if the layer2 entry is still completely free at
1032          * that time.  Any allocations from the reservation that may have
1033          * occured in the mean time, or active references on the reservation
1034          * from new pending allocations, will prevent the invalidation from
1035          * occuring.
1036          */
1037         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1038                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1039
1040                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1041                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1042                         layer2->zone = 0;
1043                         layer2->append_off = 0;
1044                         hammer_modify_buffer(trans, buffer1,
1045                                              layer1, sizeof(*layer1));
1046                         ++layer1->blocks_free;
1047                         layer1->layer1_crc = crc32(layer1,
1048                                                    HAMMER_LAYER1_CRCSIZE);
1049                         hammer_modify_buffer_done(buffer1);
1050                         hammer_modify_volume_field(trans,
1051                                         trans->rootvol,
1052                                         vol0_stat_freebigblocks);
1053                         ++root_volume->ondisk->vol0_stat_freebigblocks;
1054                         hmp->copy_stat_freebigblocks =
1055                            root_volume->ondisk->vol0_stat_freebigblocks;
1056                         hammer_modify_volume_done(trans->rootvol);
1057                 }
1058         }
1059         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1060         hammer_modify_buffer_done(buffer2);
1061         hammer_unlock(&hmp->blkmap_lock);
1062
1063 failed:
1064         if (buffer1)
1065                 hammer_rel_buffer(buffer1, 0);
1066         if (buffer2)
1067                 hammer_rel_buffer(buffer2, 0);
1068 }
1069
1070 int
1071 hammer_blockmap_dedup(hammer_transaction_t trans,
1072                      hammer_off_t zone_offset, int bytes)
1073 {
1074         hammer_mount_t hmp;
1075         hammer_volume_t root_volume;
1076         hammer_blockmap_t freemap;
1077         struct hammer_blockmap_layer1 *layer1;
1078         struct hammer_blockmap_layer2 *layer2;
1079         hammer_buffer_t buffer1 = NULL;
1080         hammer_buffer_t buffer2 = NULL;
1081         hammer_off_t layer1_offset;
1082         hammer_off_t layer2_offset;
1083         int32_t temp;
1084         int error;
1085         int zone;
1086
1087         if (bytes == 0)
1088                 return (0);
1089         hmp = trans->hmp;
1090
1091         /*
1092          * Alignment
1093          */
1094         bytes = (bytes + 15) & ~15;
1095         KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1096         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1097                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
1098
1099         /*
1100          * Basic zone validation & locking
1101          */
1102         zone = HAMMER_ZONE_DECODE(zone_offset);
1103         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1104         root_volume = trans->rootvol;
1105         error = 0;
1106
1107         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1108
1109         /*
1110          * Dive layer 1.
1111          */
1112         layer1_offset = freemap->phys_offset +
1113                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1114         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1115         if (error)
1116                 goto failed;
1117         KKASSERT(layer1->phys_offset &&
1118                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1119         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1120                 hammer_lock_ex(&hmp->blkmap_lock);
1121                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1122                         panic("CRC FAILED: LAYER1");
1123                 hammer_unlock(&hmp->blkmap_lock);
1124         }
1125
1126         /*
1127          * Dive layer 2, each entry represents a large-block.
1128          */
1129         layer2_offset = layer1->phys_offset +
1130                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1131         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1132         if (error)
1133                 goto failed;
1134         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1135                 hammer_lock_ex(&hmp->blkmap_lock);
1136                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1137                         panic("CRC FAILED: LAYER2");
1138                 hammer_unlock(&hmp->blkmap_lock);
1139         }
1140
1141         hammer_lock_ex(&hmp->blkmap_lock);
1142
1143         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1144
1145         /*
1146          * Free space previously allocated via blockmap_alloc().
1147          *
1148          * NOTE: bytes_free can be and remain negative due to de-dup ops
1149          *       but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1150          */
1151         KKASSERT(layer2->zone == zone);
1152         temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1153         cpu_ccfence(); /* prevent gcc from optimizing temp out */
1154         if (temp > layer2->bytes_free) {
1155                 error = ERANGE;
1156                 goto underflow;
1157         }
1158         layer2->bytes_free -= bytes;
1159
1160         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1161
1162         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1163 underflow:
1164         hammer_modify_buffer_done(buffer2);
1165         hammer_unlock(&hmp->blkmap_lock);
1166
1167 failed:
1168         if (buffer1)
1169                 hammer_rel_buffer(buffer1, 0);
1170         if (buffer2)
1171                 hammer_rel_buffer(buffer2, 0);
1172         return (error);
1173 }
1174
1175 /*
1176  * Backend function - finalize (offset, bytes) in a zone.
1177  *
1178  * Allocate space that was previously reserved by the frontend.
1179  */
1180 int
1181 hammer_blockmap_finalize(hammer_transaction_t trans,
1182                          hammer_reserve_t resv,
1183                          hammer_off_t zone_offset, int bytes)
1184 {
1185         hammer_mount_t hmp;
1186         hammer_volume_t root_volume;
1187         hammer_blockmap_t freemap;
1188         struct hammer_blockmap_layer1 *layer1;
1189         struct hammer_blockmap_layer2 *layer2;
1190         hammer_buffer_t buffer1 = NULL;
1191         hammer_buffer_t buffer2 = NULL;
1192         hammer_off_t layer1_offset;
1193         hammer_off_t layer2_offset;
1194         int error;
1195         int zone;
1196         int offset;
1197
1198         if (bytes == 0)
1199                 return(0);
1200         hmp = trans->hmp;
1201
1202         /*
1203          * Alignment
1204          */
1205         bytes = (bytes + 15) & ~15;
1206         KKASSERT(bytes <= HAMMER_XBUFSIZE);
1207
1208         /*
1209          * Basic zone validation & locking
1210          */
1211         zone = HAMMER_ZONE_DECODE(zone_offset);
1212         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1213         root_volume = trans->rootvol;
1214         error = 0;
1215
1216         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1217
1218         /*
1219          * Dive layer 1.
1220          */
1221         layer1_offset = freemap->phys_offset +
1222                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1223         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1224         if (error)
1225                 goto failed;
1226         KKASSERT(layer1->phys_offset &&
1227                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1228         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1229                 hammer_lock_ex(&hmp->blkmap_lock);
1230                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1231                         panic("CRC FAILED: LAYER1");
1232                 hammer_unlock(&hmp->blkmap_lock);
1233         }
1234
1235         /*
1236          * Dive layer 2, each entry represents a large-block.
1237          */
1238         layer2_offset = layer1->phys_offset +
1239                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1240         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1241         if (error)
1242                 goto failed;
1243         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1244                 hammer_lock_ex(&hmp->blkmap_lock);
1245                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1246                         panic("CRC FAILED: LAYER2");
1247                 hammer_unlock(&hmp->blkmap_lock);
1248         }
1249
1250         hammer_lock_ex(&hmp->blkmap_lock);
1251
1252         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1253
1254         /*
1255          * Finalize some or all of the space covered by a current
1256          * reservation.  An allocation in the same layer may have
1257          * already assigned ownership.
1258          */
1259         if (layer2->zone == 0) {
1260                 hammer_modify_buffer(trans, buffer1,
1261                                      layer1, sizeof(*layer1));
1262                 --layer1->blocks_free;
1263                 layer1->layer1_crc = crc32(layer1,
1264                                            HAMMER_LAYER1_CRCSIZE);
1265                 hammer_modify_buffer_done(buffer1);
1266                 layer2->zone = zone;
1267                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1268                 KKASSERT(layer2->append_off == 0);
1269                 hammer_modify_volume_field(trans,
1270                                 trans->rootvol,
1271                                 vol0_stat_freebigblocks);
1272                 --root_volume->ondisk->vol0_stat_freebigblocks;
1273                 hmp->copy_stat_freebigblocks =
1274                    root_volume->ondisk->vol0_stat_freebigblocks;
1275                 hammer_modify_volume_done(trans->rootvol);
1276         }
1277         if (layer2->zone != zone)
1278                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1279         KKASSERT(layer2->zone == zone);
1280         KKASSERT(bytes != 0);
1281         layer2->bytes_free -= bytes;
1282
1283         if (resv) {
1284                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1285         }
1286
1287         /*
1288          * Finalizations can occur out of order, or combined with allocations.
1289          * append_off must be set to the highest allocated offset.
1290          */
1291         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1292         if (layer2->append_off < offset)
1293                 layer2->append_off = offset;
1294
1295         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1296         hammer_modify_buffer_done(buffer2);
1297         hammer_unlock(&hmp->blkmap_lock);
1298
1299 failed:
1300         if (buffer1)
1301                 hammer_rel_buffer(buffer1, 0);
1302         if (buffer2)
1303                 hammer_rel_buffer(buffer2, 0);
1304         return(error);
1305 }
1306
1307 /*
1308  * Return the approximate number of free bytes in the big-block
1309  * containing the specified blockmap offset.
1310  *
1311  * WARNING: A negative number can be returned if data de-dup exists,
1312  *          and the result will also not represent he actual number
1313  *          of free bytes in this case.
1314  *
1315  *          This code is used only by the reblocker.
1316  */
1317 int
1318 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1319                         int *curp, int *errorp)
1320 {
1321         hammer_volume_t root_volume;
1322         hammer_blockmap_t blockmap;
1323         hammer_blockmap_t freemap;
1324         struct hammer_blockmap_layer1 *layer1;
1325         struct hammer_blockmap_layer2 *layer2;
1326         hammer_buffer_t buffer = NULL;
1327         hammer_off_t layer1_offset;
1328         hammer_off_t layer2_offset;
1329         int32_t bytes;
1330         int zone;
1331
1332         zone = HAMMER_ZONE_DECODE(zone_offset);
1333         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1334         root_volume = hammer_get_root_volume(hmp, errorp);
1335         if (*errorp) {
1336                 *curp = 0;
1337                 return(0);
1338         }
1339         blockmap = &hmp->blockmap[zone];
1340         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1341
1342         /*
1343          * Dive layer 1.
1344          */
1345         layer1_offset = freemap->phys_offset +
1346                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1347         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1348         if (*errorp) {
1349                 bytes = 0;
1350                 goto failed;
1351         }
1352         KKASSERT(layer1->phys_offset);
1353         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1354                 hammer_lock_ex(&hmp->blkmap_lock);
1355                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1356                         panic("CRC FAILED: LAYER1");
1357                 hammer_unlock(&hmp->blkmap_lock);
1358         }
1359
1360         /*
1361          * Dive layer 2, each entry represents a large-block.
1362          *
1363          * (reuse buffer, layer1 pointer becomes invalid)
1364          */
1365         layer2_offset = layer1->phys_offset +
1366                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1367         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1368         if (*errorp) {
1369                 bytes = 0;
1370                 goto failed;
1371         }
1372         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1373                 hammer_lock_ex(&hmp->blkmap_lock);
1374                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1375                         panic("CRC FAILED: LAYER2");
1376                 hammer_unlock(&hmp->blkmap_lock);
1377         }
1378         KKASSERT(layer2->zone == zone);
1379
1380         bytes = layer2->bytes_free;
1381
1382         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1383                 *curp = 0;
1384         else
1385                 *curp = 1;
1386 failed:
1387         if (buffer)
1388                 hammer_rel_buffer(buffer, 0);
1389         hammer_rel_volume(root_volume, 0);
1390         if (hammer_debug_general & 0x0800) {
1391                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1392                         (long long)zone_offset, bytes);
1393         }
1394         return(bytes);
1395 }
1396
1397
1398 /*
1399  * Lookup a blockmap offset.
1400  */
1401 hammer_off_t
1402 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1403                        int *errorp)
1404 {
1405         hammer_volume_t root_volume;
1406         hammer_blockmap_t freemap;
1407         struct hammer_blockmap_layer1 *layer1;
1408         struct hammer_blockmap_layer2 *layer2;
1409         hammer_buffer_t buffer = NULL;
1410         hammer_off_t layer1_offset;
1411         hammer_off_t layer2_offset;
1412         hammer_off_t result_offset;
1413         hammer_off_t base_off;
1414         hammer_reserve_t resv;
1415         int zone;
1416
1417         /*
1418          * Calculate the zone-2 offset.
1419          */
1420         zone = HAMMER_ZONE_DECODE(zone_offset);
1421         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1422
1423         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1424                         HAMMER_ZONE_RAW_BUFFER;
1425
1426         /*
1427          * We can actually stop here, normal blockmaps are now direct-mapped
1428          * onto the freemap and so represent zone-2 addresses.
1429          */
1430         if (hammer_verify_zone == 0) {
1431                 *errorp = 0;
1432                 return(result_offset);
1433         }
1434
1435         /*
1436          * Validate the allocation zone
1437          */
1438         root_volume = hammer_get_root_volume(hmp, errorp);
1439         if (*errorp)
1440                 return(0);
1441         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1442         KKASSERT(freemap->phys_offset != 0);
1443
1444         /*
1445          * Dive layer 1.
1446          */
1447         layer1_offset = freemap->phys_offset +
1448                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1449         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1450         if (*errorp)
1451                 goto failed;
1452         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1453         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1454                 hammer_lock_ex(&hmp->blkmap_lock);
1455                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1456                         panic("CRC FAILED: LAYER1");
1457                 hammer_unlock(&hmp->blkmap_lock);
1458         }
1459
1460         /*
1461          * Dive layer 2, each entry represents a large-block.
1462          */
1463         layer2_offset = layer1->phys_offset +
1464                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1465         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1466
1467         if (*errorp)
1468                 goto failed;
1469         if (layer2->zone == 0) {
1470                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1471                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1472                                  base_off);
1473                 KKASSERT(resv && resv->zone == zone);
1474
1475         } else if (layer2->zone != zone) {
1476                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1477                         layer2->zone, zone);
1478         }
1479         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1480                 hammer_lock_ex(&hmp->blkmap_lock);
1481                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1482                         panic("CRC FAILED: LAYER2");
1483                 hammer_unlock(&hmp->blkmap_lock);
1484         }
1485
1486 failed:
1487         if (buffer)
1488                 hammer_rel_buffer(buffer, 0);
1489         hammer_rel_volume(root_volume, 0);
1490         if (hammer_debug_general & 0x0800) {
1491                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1492                         (long long)zone_offset, (long long)result_offset);
1493         }
1494         return(result_offset);
1495 }
1496
1497
1498 /*
1499  * Check space availability
1500  *
1501  * MPSAFE - does not require fs_token
1502  */
1503 int
1504 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1505 {
1506         const int in_size = sizeof(struct hammer_inode_data) +
1507                             sizeof(union hammer_btree_elm);
1508         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1509         int64_t usedbytes;
1510
1511         usedbytes = hmp->rsv_inodes * in_size +
1512                     hmp->rsv_recs * rec_size +
1513                     hmp->rsv_databytes +
1514                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1515                     ((int64_t)hidirtybufspace << 2) +
1516                     (slop << HAMMER_LARGEBLOCK_BITS);
1517
1518         hammer_count_extra_space_used = usedbytes;      /* debugging */
1519         if (resp)
1520                 *resp = usedbytes;
1521
1522         if (hmp->copy_stat_freebigblocks >=
1523             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1524                 return(0);
1525         }
1526         return (ENOSPC);
1527 }
1528