HAMMER 56A/Many: Performance tuning - MEDIA STRUCTURES CHANGED!
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.19 2008/06/17 04:02:38 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43
44 /*
45  * Reserved big-blocks red-black tree support
46  */
47 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
48              hammer_res_rb_compare, hammer_off_t, zone_offset);
49
50 static int
51 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
52 {
53         if (res1->zone_offset < res2->zone_offset)
54                 return(-1);
55         if (res1->zone_offset > res2->zone_offset)
56                 return(1);
57         return(0);
58 }
59
60 /*
61  * Allocate bytes from a zone
62  */
63 hammer_off_t
64 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
65                       int bytes, int *errorp)
66 {
67         hammer_mount_t hmp;
68         hammer_volume_t root_volume;
69         hammer_blockmap_t blockmap;
70         hammer_blockmap_t freemap;
71         hammer_reserve_t resv;
72         struct hammer_blockmap_layer1 *layer1;
73         struct hammer_blockmap_layer2 *layer2;
74         hammer_buffer_t buffer1 = NULL;
75         hammer_buffer_t buffer2 = NULL;
76         hammer_buffer_t buffer3 = NULL;
77         hammer_off_t tmp_offset;
78         hammer_off_t next_offset;
79         hammer_off_t result_offset;
80         hammer_off_t layer1_offset;
81         hammer_off_t layer2_offset;
82         hammer_off_t base_off;
83         int loops = 0;
84
85         hmp = trans->hmp;
86
87         /*
88          * Deal with alignment and buffer-boundary issues.
89          *
90          * Be careful, certain primary alignments are used below to allocate
91          * new blockmap blocks.
92          */
93         bytes = (bytes + 15) & ~15;
94         KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
95         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
96
97         /*
98          * Setup
99          */
100         root_volume = trans->rootvol;
101         *errorp = 0;
102         blockmap = &hmp->blockmap[zone];
103         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
104         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
105
106         hammer_lock_ex(&hmp->blkmap_lock);
107         next_offset = blockmap->next_offset;
108
109 again:
110         /*
111          * Check for wrap
112          */
113         if (next_offset == 0) {
114                 if (++loops == 2) {
115                         result_offset = 0;
116                         *errorp = ENOSPC;
117                         goto done;
118                 }
119                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
120         }
121
122         /*
123          * The allocation request may not cross a buffer boundary.
124          */
125         tmp_offset = next_offset + bytes - 1;
126         if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
127                 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
128                 goto again;
129         }
130
131         /*
132          * Dive layer 1.
133          */
134         layer1_offset = freemap->phys_offset +
135                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
136         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
137         KKASSERT(*errorp == 0);
138
139         /*
140          * Check CRC.
141          */
142         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
143                 Debugger("CRC FAILED: LAYER1");
144         }
145
146         /*
147          * If we are at a big-block boundary and layer1 indicates no 
148          * free big-blocks, then we cannot allocate a new bigblock in
149          * layer2, skip to the next layer1 entry.
150          */
151         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
152             layer1->blocks_free == 0) {
153                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
154                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
155                 goto again;
156         }
157         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
158
159         /*
160          * Dive layer 2, each entry represents a large-block.
161          */
162         layer2_offset = layer1->phys_offset +
163                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
164         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
165         KKASSERT(*errorp == 0);
166
167         /*
168          * Check CRC.
169          */
170         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
171                 Debugger("CRC FAILED: LAYER2");
172         }
173
174         /*
175          * This is a bit complex.  If we are at the beginning of a bigblock
176          * we have to check for reservations.  If we aren't we may still have
177          * to assign ownership of the bigblock in layer2.
178          */
179         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
180                 if (layer2->zone != 0) {
181                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
182                                       ~HAMMER_LARGEBLOCK_MASK64;
183                         goto again;
184                 }
185                 base_off = (next_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
186                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
187                                  base_off);
188                 if (resv) {
189                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
190                                       ~HAMMER_LARGEBLOCK_MASK64;
191                         goto again;
192                 }
193         }
194
195         if (layer2->zone == 0) {
196                 /*
197                  * Assign the bigblock to our zone
198                  */
199                 hammer_modify_buffer(trans, buffer1,
200                                      layer1, sizeof(*layer1));
201                 --layer1->blocks_free;
202                 layer1->layer1_crc = crc32(layer1,
203                                            HAMMER_LAYER1_CRCSIZE);
204                 hammer_modify_buffer_done(buffer1);
205                 hammer_modify_buffer(trans, buffer2,
206                                      layer2, sizeof(*layer2));
207                 layer2->zone = zone;
208                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
209                 KKASSERT(layer2->append_off == 0);
210                 hammer_modify_volume_field(trans, trans->rootvol,
211                                            vol0_stat_freebigblocks);
212                 --root_volume->ondisk->vol0_stat_freebigblocks;
213                 hmp->copy_stat_freebigblocks =
214                         root_volume->ondisk->vol0_stat_freebigblocks;
215                 hammer_modify_volume_done(trans->rootvol);
216
217         } else {
218                 hammer_modify_buffer(trans, buffer2,
219                                      layer2, sizeof(*layer2));
220         }
221         KKASSERT(layer2->zone == zone);
222
223         /*
224          * XXX append_off
225          */
226         layer2->bytes_free -= bytes;
227         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
228         hammer_modify_buffer_done(buffer2);
229         KKASSERT(layer2->bytes_free >= 0);
230
231         /*
232          * If we are allocating from the base of a new buffer we can avoid
233          * a disk read by calling hammer_bnew().
234          */
235         if ((next_offset & HAMMER_BUFMASK) == 0) {
236                 hammer_bnew(trans->hmp, next_offset, errorp, &buffer3);
237         }
238         result_offset = next_offset;
239
240         /*
241          * Process allocated result_offset
242          */
243 done:
244         hammer_modify_volume(NULL, root_volume, NULL, 0);
245         if (result_offset) {
246                 if (result_offset == next_offset) {
247                         blockmap->next_offset = next_offset + bytes;
248                 } else {
249                         blockmap->next_offset = next_offset;
250                 }
251         } else {
252                 blockmap->next_offset = next_offset;
253         }
254         hammer_modify_volume_done(root_volume);
255         hammer_unlock(&hmp->blkmap_lock);
256
257         /*
258          * Cleanup
259          */
260         if (buffer1)
261                 hammer_rel_buffer(buffer1, 0);
262         if (buffer2)
263                 hammer_rel_buffer(buffer2, 0);
264         if (buffer3)
265                 hammer_rel_buffer(buffer3, 0);
266
267         return(result_offset);
268 }
269
270 /*
271  * Front-end blockmap reservation
272  *
273  * This code reserves bytes out of a blockmap without committing to any
274  * meta-data modifications, allowing the front-end to directly issue disk
275  * write I/O for large blocks of data
276  */
277 hammer_reserve_t
278 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
279                         hammer_off_t *zone_offp, int *errorp)
280 {
281         hammer_volume_t root_volume;
282         hammer_blockmap_t blockmap;
283         hammer_blockmap_t freemap;
284         struct hammer_blockmap_layer1 *layer1;
285         struct hammer_blockmap_layer2 *layer2;
286         hammer_buffer_t buffer1 = NULL;
287         hammer_buffer_t buffer2 = NULL;
288         hammer_buffer_t buffer3 = NULL;
289         hammer_off_t tmp_offset;
290         hammer_off_t next_offset;
291         hammer_off_t layer1_offset;
292         hammer_off_t layer2_offset;
293         hammer_off_t base_off;
294         hammer_reserve_t resv;
295         hammer_reserve_t resx;
296         int loops = 0;
297
298         /*
299          * Setup
300          */
301         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
302         root_volume = hammer_get_root_volume(hmp, errorp);
303         if (*errorp)
304                 return(NULL);
305         blockmap = &hmp->blockmap[zone];
306         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
307         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
308
309         /*
310          * Deal with alignment and buffer-boundary issues.
311          *
312          * Be careful, certain primary alignments are used below to allocate
313          * new blockmap blocks.
314          */
315         bytes = (bytes + 15) & ~15;
316         KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
317
318         hammer_lock_ex(&hmp->blkmap_lock);
319         next_offset = blockmap->next_offset;
320 again:
321         resv = NULL;
322
323         /*
324          * Check for wrap
325          */
326         if (next_offset == 0) {
327                 if (++loops == 2) {
328                         *errorp = ENOSPC;
329                         goto done;
330                 }
331                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
332         }
333
334         /*
335          * The allocation request may not cross a buffer boundary.
336          */
337         tmp_offset = next_offset + bytes - 1;
338         if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
339                 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
340                 goto again;
341         }
342
343         /*
344          * Dive layer 1.
345          */
346         layer1_offset = freemap->phys_offset +
347                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
348         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
349         KKASSERT(*errorp == 0);
350
351         /*
352          * Check CRC.
353          */
354         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
355                 Debugger("CRC FAILED: LAYER1");
356         }
357
358         /*
359          * If we are at a big-block boundary and layer1 indicates no 
360          * free big-blocks, then we cannot allocate a new bigblock in
361          * layer2, skip to the next layer1 entry.
362          */
363         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
364             layer1->blocks_free == 0) {
365                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
366                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
367                 goto again;
368         }
369         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
370
371         /*
372          * Dive layer 2, each entry represents a large-block.
373          */
374         layer2_offset = layer1->phys_offset +
375                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
376         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
377         KKASSERT(*errorp == 0);
378
379         /*
380          * Check CRC if not allocating into uninitialized space (which we
381          * aren't when reserving space).
382          */
383         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
384                 Debugger("CRC FAILED: LAYER2");
385         }
386
387         /*
388          * Shortcut to avoid unnecessary reservation lookups.  If we are at
389          * the beginning of a new big block determine whether we can use it
390          * or not.
391          */
392         base_off = (next_offset & (~HAMMER_LARGEBLOCK_MASK64 &
393                                    ~HAMMER_OFF_ZONE_MASK)) |
394                    HAMMER_ZONE_RAW_BUFFER;
395         if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
396                 if (layer2->zone != 0) {
397                         next_offset += HAMMER_LARGEBLOCK_SIZE;
398                         goto again;
399                 }
400                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
401                                  base_off);
402                 if (resv) {
403                         next_offset += HAMMER_LARGEBLOCK_SIZE;
404                         goto again;
405                 }
406         } else {
407                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
408                                  base_off);
409         }
410
411         /*
412          * The reservation code does not modify layer2->bytes_free, it
413          * simply adjusts next_offset.
414          */
415         KKASSERT(layer2->bytes_free >= 0);
416
417         /*
418          * Make the zone-2 reservation.
419          */
420         if (resv) {
421                 ++resv->refs;
422                 KKASSERT(resv->zone == zone);
423         } else {
424                 base_off = (next_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
425                 resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
426                 resv->refs = 1;
427                 resv->zone = zone;
428                 resv->zone_offset = base_off;
429                 resx = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
430                 KKASSERT(resx == NULL);
431                 ++hammer_count_reservations;
432         }
433
434         /*
435          * If we are not reserving a whole buffer but are at the start of
436          * a new block, call hammer_bnew() to avoid a disk read.
437          *
438          * If we are reserving a whole buffer the caller will probably use
439          * a direct read, so do nothing.
440          */
441         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
442                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
443         }
444
445
446         /*
447          * Adjust our iterator and alloc_offset.  The layer1 and layer2
448          * space beyond alloc_offset is uninitialized.  alloc_offset must
449          * be big-block aligned.
450          */
451 done:
452         if (resv) {
453                 hammer_modify_volume(NULL, root_volume, NULL, 0);
454                 blockmap->next_offset = next_offset + bytes;
455                 hammer_modify_volume_done(root_volume);
456         } else if (blockmap->next_offset != next_offset) {
457                 hammer_modify_volume(NULL, root_volume, NULL, 0);
458                 blockmap->next_offset = next_offset;
459                 hammer_modify_volume_done(root_volume);
460         }
461
462         if (buffer1)
463                 hammer_rel_buffer(buffer1, 0);
464         if (buffer2)
465                 hammer_rel_buffer(buffer2, 0);
466         if (buffer3)
467                 hammer_rel_buffer(buffer3, 0);
468         hammer_rel_volume(root_volume, 0);
469         hammer_unlock(&hmp->blkmap_lock);
470         *zone_offp = next_offset;
471
472         return(resv);
473 }
474
475 /*
476  * A record with a storage resolution calls this function when it is
477  * being freed.  The storage may or may not have actually been allocated.
478  */
479 void
480 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
481 {
482         KKASSERT(resv->refs > 0);
483         if (--resv->refs == 0) {
484                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
485                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
486                 kfree(resv, M_HAMMER);
487                 --hammer_count_reservations;
488         }
489 }
490
491 /*
492  * This ensures that no data reallocations will take place at the specified
493  * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
494  * preventing deleted data space, which has no UNDO, from being reallocated 
495  * too fast.
496  */
497 void
498 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
499                         hammer_off_t zone2_offset)
500 {
501         if (resv == NULL) {
502                 resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
503                 resv->refs = 1; /* ref for on-delay list */
504                 resv->zone_offset = zone2_offset;
505                 RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
506                 ++hammer_count_reservations;
507         } else if (resv->flags & HAMMER_RESF_ONDELAY) {
508                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
509                 resv->flush_group = hmp->flusher.next + 1;
510         } else {
511                 ++resv->refs;   /* ref for on-delay list */
512         }
513         resv->flags |= HAMMER_RESF_ONDELAY;
514         resv->flush_group = hmp->flusher.next + 1;
515         TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
516 }
517
518 void
519 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
520 {
521         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
522         resv->flags &= ~HAMMER_RESF_ONDELAY;
523         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
524         hammer_blockmap_reserve_complete(hmp, resv);
525 }
526
527
528 /*
529  * Free (offset,bytes) in a zone.
530  *
531  * If bytes is negative we are actually allocating previously reserved
532  * space in the zone.
533  */
534 void
535 hammer_blockmap_free(hammer_transaction_t trans,
536                      hammer_off_t zone_offset, int bytes)
537 {
538         hammer_mount_t hmp;
539         hammer_volume_t root_volume;
540         hammer_reserve_t resv;
541         hammer_blockmap_t blockmap;
542         hammer_blockmap_t freemap;
543         struct hammer_blockmap_layer1 *layer1;
544         struct hammer_blockmap_layer2 *layer2;
545         hammer_buffer_t buffer1 = NULL;
546         hammer_buffer_t buffer2 = NULL;
547         hammer_off_t layer1_offset;
548         hammer_off_t layer2_offset;
549         hammer_off_t base_off;
550         int error;
551         int zone;
552
553         if (bytes == 0)
554                 return;
555         hmp = trans->hmp;
556
557         /*
558          * Alignment
559          */
560         if (bytes > 0) {
561                 bytes = (bytes + 15) & ~15;
562                 KKASSERT(bytes <= HAMMER_BUFSIZE);
563                 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
564                           ~HAMMER_LARGEBLOCK_MASK64) == 0);
565         } else {
566                 bytes = -((-bytes + 15) & ~15);
567                 KKASSERT(bytes >= -HAMMER_BUFSIZE);
568         }
569
570         /*
571          * Basic zone validation & locking
572          */
573         zone = HAMMER_ZONE_DECODE(zone_offset);
574         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
575         root_volume = trans->rootvol;
576         error = 0;
577         hammer_lock_ex(&hmp->blkmap_lock);
578
579         blockmap = &hmp->blockmap[zone];
580         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
581
582         /*
583          * Dive layer 1.
584          */
585         layer1_offset = freemap->phys_offset +
586                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
587         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
588         KKASSERT(error == 0);
589         KKASSERT(layer1->phys_offset &&
590                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
591         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
592                 Debugger("CRC FAILED: LAYER1");
593         }
594
595         /*
596          * Dive layer 2, each entry represents a large-block.
597          */
598         layer2_offset = layer1->phys_offset +
599                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
600         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
601         KKASSERT(error == 0);
602         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
603                 Debugger("CRC FAILED: LAYER2");
604         }
605
606         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
607         if (bytes > 0) {
608                 /*
609                  * Freeing previously allocated space
610                  */
611                 KKASSERT(layer2->zone == zone);
612                 layer2->bytes_free += bytes;
613                 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
614                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
615                         base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
616                         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
617                                          base_off);
618                         if (resv) {
619                                 /*
620                                  * Portions of this block have been reserved, do
621                                  * not free it.
622                                  *
623                                  * Make sure the reservation remains through
624                                  * the next flush cycle so potentially undoable
625                                  * data is not overwritten.
626                                  */
627                                 KKASSERT(resv->zone == zone);
628                                 hammer_reserve_setdelay(hmp, resv, base_off);
629                         } else if ((blockmap->next_offset ^ zone_offset) &
630                                     ~HAMMER_LARGEBLOCK_MASK64) {
631                                 /*
632                                  * Our iterator is not in the now-free big-block
633                                  * and we can release it.
634                                  *
635                                  * Make sure the reservation remains through
636                                  * the next flush cycle so potentially undoable
637                                  * data is not overwritten.
638                                  */
639                                 hammer_reserve_setdelay(hmp, resv, base_off);
640                                 KKASSERT(layer2->zone == zone);
641                                 hammer_del_buffers(hmp,
642                                                    zone_offset &
643                                                       ~HAMMER_LARGEBLOCK_MASK64,
644                                                    base_off,
645                                                    HAMMER_LARGEBLOCK_SIZE);
646                                 layer2->zone = 0;
647                                 layer2->append_off = 0;
648                                 hammer_modify_buffer(trans, buffer1,
649                                                      layer1, sizeof(*layer1));
650                                 ++layer1->blocks_free;
651                                 layer1->layer1_crc = crc32(layer1,
652                                                            HAMMER_LAYER1_CRCSIZE);
653                                 hammer_modify_buffer_done(buffer1);
654                                 hammer_modify_volume_field(trans,
655                                                 trans->rootvol,
656                                                 vol0_stat_freebigblocks);
657                                 ++root_volume->ondisk->vol0_stat_freebigblocks;
658                                 hmp->copy_stat_freebigblocks =
659                                    root_volume->ondisk->vol0_stat_freebigblocks;
660                                 hammer_modify_volume_done(trans->rootvol);
661                         }
662                 }
663         } else {
664                 /*
665                  * Allocating previously reserved space
666                  */
667                 if (layer2->zone == 0) {
668                         layer2->zone = zone;
669                         hammer_modify_buffer(trans, buffer1,
670                                              layer1, sizeof(*layer1));
671                         --layer1->blocks_free;
672                         layer1->layer1_crc = crc32(layer1,
673                                                    HAMMER_LAYER1_CRCSIZE);
674                         hammer_modify_buffer_done(buffer1);
675                         hammer_modify_volume_field(trans,
676                                         trans->rootvol,
677                                         vol0_stat_freebigblocks);
678                         --root_volume->ondisk->vol0_stat_freebigblocks;
679                         hmp->copy_stat_freebigblocks =
680                            root_volume->ondisk->vol0_stat_freebigblocks;
681                         hammer_modify_volume_done(trans->rootvol);
682                 }
683                 if (layer2->zone != zone)
684                         kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
685                 KKASSERT(layer2->zone == zone);
686                 layer2->bytes_free += bytes;
687         }
688         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
689         hammer_modify_buffer_done(buffer2);
690         hammer_unlock(&hmp->blkmap_lock);
691
692         if (buffer1)
693                 hammer_rel_buffer(buffer1, 0);
694         if (buffer2)
695                 hammer_rel_buffer(buffer2, 0);
696 }
697
698 /*
699  * Return the number of free bytes in the big-block containing the
700  * specified blockmap offset.
701  */
702 int
703 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
704                         int *curp, int *errorp)
705 {
706         hammer_volume_t root_volume;
707         hammer_blockmap_t blockmap;
708         hammer_blockmap_t freemap;
709         struct hammer_blockmap_layer1 *layer1;
710         struct hammer_blockmap_layer2 *layer2;
711         hammer_buffer_t buffer = NULL;
712         hammer_off_t layer1_offset;
713         hammer_off_t layer2_offset;
714         int bytes;
715         int zone;
716
717         zone = HAMMER_ZONE_DECODE(zone_offset);
718         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
719         root_volume = hammer_get_root_volume(hmp, errorp);
720         if (*errorp) {
721                 *curp = 0;
722                 return(0);
723         }
724         blockmap = &hmp->blockmap[zone];
725         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
726
727         /*
728          * Dive layer 1.
729          */
730         layer1_offset = freemap->phys_offset +
731                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
732         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
733         KKASSERT(*errorp == 0);
734         KKASSERT(layer1->phys_offset);
735         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
736                 Debugger("CRC FAILED: LAYER1");
737         }
738
739         /*
740          * Dive layer 2, each entry represents a large-block.
741          */
742         layer2_offset = layer1->phys_offset +
743                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
744         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
745         KKASSERT(*errorp == 0);
746         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
747                 Debugger("CRC FAILED: LAYER2");
748         }
749         KKASSERT(layer2->zone == zone);
750
751         bytes = layer2->bytes_free;
752
753         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
754                 *curp = 0;
755         else
756                 *curp = 1;
757         if (buffer)
758                 hammer_rel_buffer(buffer, 0);
759         hammer_rel_volume(root_volume, 0);
760         if (hammer_debug_general & 0x0800) {
761                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
762                         zone_offset, bytes);
763         }
764         return(bytes);
765 }
766
767
768 /*
769  * Lookup a blockmap offset.
770  */
771 hammer_off_t
772 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
773                        int *errorp)
774 {
775         hammer_volume_t root_volume;
776         hammer_blockmap_t freemap;
777         struct hammer_blockmap_layer1 *layer1;
778         struct hammer_blockmap_layer2 *layer2;
779         hammer_buffer_t buffer = NULL;
780         hammer_off_t layer1_offset;
781         hammer_off_t layer2_offset;
782         hammer_off_t result_offset;
783         hammer_off_t base_off;
784         hammer_reserve_t resv;
785         int zone;
786
787         /*
788          * Calculate the zone-2 offset.
789          */
790         zone = HAMMER_ZONE_DECODE(zone_offset);
791         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
792
793         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
794                         HAMMER_ZONE_RAW_BUFFER;
795
796         /*
797          * We can actually stop here, normal blockmaps are now direct-mapped
798          * onto the freemap and so represent zone-2 addresses.
799          */
800         if (hammer_verify_zone == 0) {
801                 *errorp = 0;
802                 return(result_offset);
803         }
804
805         /*
806          * Validate the allocation zone
807          */
808         root_volume = hammer_get_root_volume(hmp, errorp);
809         if (*errorp)
810                 return(0);
811         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
812         KKASSERT(freemap->phys_offset != 0);
813
814         /*
815          * Dive layer 1.
816          */
817         layer1_offset = freemap->phys_offset +
818                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
819         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
820         KKASSERT(*errorp == 0);
821         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
822         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
823                 Debugger("CRC FAILED: LAYER1");
824         }
825
826         /*
827          * Dive layer 2, each entry represents a large-block.
828          */
829         layer2_offset = layer1->phys_offset +
830                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
831         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
832
833         KKASSERT(*errorp == 0);
834         if (layer2->zone == 0) {
835                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
836                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
837                                  base_off);
838                 KKASSERT(resv && resv->zone == zone);
839
840         } else if (layer2->zone != zone) {
841                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
842                         layer2->zone, zone);
843         }
844         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
845                 Debugger("CRC FAILED: LAYER2");
846         }
847
848         if (buffer)
849                 hammer_rel_buffer(buffer, 0);
850         hammer_rel_volume(root_volume, 0);
851         if (hammer_debug_general & 0x0800) {
852                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
853                         zone_offset, result_offset);
854         }
855         return(result_offset);
856 }
857
858
859 /*
860  * Check space availability
861  */
862 int
863 hammer_checkspace(hammer_mount_t hmp)
864 {
865         const int in_size = sizeof(struct hammer_inode_data) +
866                             sizeof(union hammer_btree_elm);
867         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
868         const int blkconv = HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE;
869         const int limit_inodes = HAMMER_LARGEBLOCK_SIZE / in_size;
870         const int limit_recs = HAMMER_LARGEBLOCK_SIZE / rec_size;
871         int usedbigblocks;;
872
873         /*
874          * Quick and very dirty, not even using the right units (bigblocks
875          * vs 16K buffers), but this catches almost everything.
876          */
877         if (hmp->copy_stat_freebigblocks >= hmp->rsv_databufs + 8 &&
878             hmp->rsv_inodes < limit_inodes &&
879             hmp->rsv_recs < limit_recs &&
880             hmp->rsv_databytes < HAMMER_LARGEBLOCK_SIZE) {
881                 return(0);
882         }
883
884         /*
885          * Do a more involved check
886          */
887         usedbigblocks = (hmp->rsv_inodes * in_size / HAMMER_LARGEBLOCK_SIZE) +
888                         (hmp->rsv_recs * rec_size / HAMMER_LARGEBLOCK_SIZE) +
889                         hmp->rsv_databufs / blkconv + 6;
890         if (hmp->copy_stat_freebigblocks >= usedbigblocks)
891                 return(0);
892         return (ENOSPC);
893 }
894