Merge commit 'crater/vendor/OPENPAM'
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay(hammer_mount_t hmp,
44                                     hammer_off_t base_offset,
45                                     struct hammer_blockmap_layer2 *layer2);
46
47
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52              hammer_res_rb_compare, hammer_off_t, zone_offset);
53
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57         if (res1->zone_offset < res2->zone_offset)
58                 return(-1);
59         if (res1->zone_offset > res2->zone_offset)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
69                       int bytes, int *errorp)
70 {
71         hammer_mount_t hmp;
72         hammer_volume_t root_volume;
73         hammer_blockmap_t blockmap;
74         hammer_blockmap_t freemap;
75         hammer_reserve_t resv;
76         struct hammer_blockmap_layer1 *layer1;
77         struct hammer_blockmap_layer2 *layer2;
78         hammer_buffer_t buffer1 = NULL;
79         hammer_buffer_t buffer2 = NULL;
80         hammer_buffer_t buffer3 = NULL;
81         hammer_off_t tmp_offset;
82         hammer_off_t next_offset;
83         hammer_off_t result_offset;
84         hammer_off_t layer1_offset;
85         hammer_off_t layer2_offset;
86         hammer_off_t base_off;
87         int loops = 0;
88         int offset;             /* offset within big-block */
89
90         hmp = trans->hmp;
91
92         /*
93          * Deal with alignment and buffer-boundary issues.
94          *
95          * Be careful, certain primary alignments are used below to allocate
96          * new blockmap blocks.
97          */
98         bytes = (bytes + 15) & ~15;
99         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101
102         /*
103          * Setup
104          */
105         root_volume = trans->rootvol;
106         *errorp = 0;
107         blockmap = &hmp->blockmap[zone];
108         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110
111         next_offset = blockmap->next_offset;
112 again:
113         /*
114          * Check for wrap
115          */
116         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
117                 if (++loops == 2) {
118                         result_offset = 0;
119                         *errorp = ENOSPC;
120                         goto failed;
121                 }
122                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
123         }
124
125         /*
126          * The allocation request may not cross a buffer boundary.  Special
127          * large allocations must not cross a large-block boundary.
128          */
129         tmp_offset = next_offset + bytes - 1;
130         if (bytes <= HAMMER_BUFSIZE) {
131                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
132                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
133                         goto again;
134                 }
135         } else {
136                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
137                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
138                         goto again;
139                 }
140         }
141         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
142
143         /*
144          * Dive layer 1.
145          */
146         layer1_offset = freemap->phys_offset +
147                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
148         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
149         if (*errorp) {
150                 result_offset = 0;
151                 goto failed;
152         }
153
154         /*
155          * Check CRC.
156          */
157         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
158                 Debugger("CRC FAILED: LAYER1");
159         }
160
161         /*
162          * If we are at a big-block boundary and layer1 indicates no 
163          * free big-blocks, then we cannot allocate a new bigblock in
164          * layer2, skip to the next layer1 entry.
165          */
166         if (offset == 0 && layer1->blocks_free == 0) {
167                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
168                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
169                 goto again;
170         }
171         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
172
173         /*
174          * Dive layer 2, each entry represents a large-block.
175          */
176         layer2_offset = layer1->phys_offset +
177                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
178         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
179         if (*errorp) {
180                 result_offset = 0;
181                 goto failed;
182         }
183
184         /*
185          * Check CRC.
186          */
187         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
188                 Debugger("CRC FAILED: LAYER2");
189         }
190
191         /*
192          * Skip the layer if the zone is owned by someone other then us.
193          */
194         if (layer2->zone && layer2->zone != zone) {
195                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
196                 goto again;
197         }
198         if (offset < layer2->append_off) {
199                 next_offset += layer2->append_off - offset;
200                 goto again;
201         }
202
203         /*
204          * We need the lock from this point on.  We have to re-check zone
205          * ownership after acquiring the lock and also check for reservations.
206          */
207         hammer_lock_ex(&hmp->blkmap_lock);
208
209         if (layer2->zone && layer2->zone != zone) {
210                 hammer_unlock(&hmp->blkmap_lock);
211                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
212                 goto again;
213         }
214         if (offset < layer2->append_off) {
215                 hammer_unlock(&hmp->blkmap_lock);
216                 next_offset += layer2->append_off - offset;
217                 goto again;
218         }
219
220         /*
221          * The bigblock might be reserved by another zone.  If it is reserved
222          * by our zone we may have to move next_offset past the append_off.
223          */
224         base_off = (next_offset &
225                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
226                     HAMMER_ZONE_RAW_BUFFER;
227         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
228         if (resv) {
229                 if (resv->zone != zone) {
230                         hammer_unlock(&hmp->blkmap_lock);
231                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
232                                       ~HAMMER_LARGEBLOCK_MASK64;
233                         goto again;
234                 }
235                 if (offset < resv->append_off) {
236                         hammer_unlock(&hmp->blkmap_lock);
237                         next_offset += resv->append_off - offset;
238                         goto again;
239                 }
240         }
241
242         /*
243          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
244          * of the layer for real.  At this point we've validated any
245          * reservation that might exist and can just ignore resv.
246          */
247         if (layer2->zone == 0) {
248                 /*
249                  * Assign the bigblock to our zone
250                  */
251                 hammer_modify_buffer(trans, buffer1,
252                                      layer1, sizeof(*layer1));
253                 --layer1->blocks_free;
254                 layer1->layer1_crc = crc32(layer1,
255                                            HAMMER_LAYER1_CRCSIZE);
256                 hammer_modify_buffer_done(buffer1);
257                 hammer_modify_buffer(trans, buffer2,
258                                      layer2, sizeof(*layer2));
259                 layer2->zone = zone;
260                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
261                 KKASSERT(layer2->append_off == 0);
262                 hammer_modify_volume_field(trans, trans->rootvol,
263                                            vol0_stat_freebigblocks);
264                 --root_volume->ondisk->vol0_stat_freebigblocks;
265                 hmp->copy_stat_freebigblocks =
266                         root_volume->ondisk->vol0_stat_freebigblocks;
267                 hammer_modify_volume_done(trans->rootvol);
268         } else {
269                 hammer_modify_buffer(trans, buffer2,
270                                      layer2, sizeof(*layer2));
271         }
272         KKASSERT(layer2->zone == zone);
273
274         layer2->bytes_free -= bytes;
275         KKASSERT(layer2->append_off <= offset);
276         layer2->append_off = offset + bytes;
277         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
278         hammer_modify_buffer_done(buffer2);
279         KKASSERT(layer2->bytes_free >= 0);
280
281         if (resv) {
282                 KKASSERT(resv->append_off <= offset);
283                 resv->append_off = offset + bytes;
284                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
285         }
286
287         /*
288          * If we are allocating from the base of a new buffer we can avoid
289          * a disk read by calling hammer_bnew().
290          */
291         if ((next_offset & HAMMER_BUFMASK) == 0) {
292                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
293                                 errorp, &buffer3);
294         }
295         result_offset = next_offset;
296
297         /*
298          * Process allocated result_offset
299          */
300         hammer_modify_volume(NULL, root_volume, NULL, 0);
301         blockmap->next_offset = next_offset + bytes;
302         hammer_modify_volume_done(root_volume);
303         hammer_unlock(&hmp->blkmap_lock);
304 failed:
305
306         /*
307          * Cleanup
308          */
309         if (buffer1)
310                 hammer_rel_buffer(buffer1, 0);
311         if (buffer2)
312                 hammer_rel_buffer(buffer2, 0);
313         if (buffer3)
314                 hammer_rel_buffer(buffer3, 0);
315
316         return(result_offset);
317 }
318
319 /*
320  * Frontend function - Reserve bytes in a zone.
321  *
322  * This code reserves bytes out of a blockmap without committing to any
323  * meta-data modifications, allowing the front-end to directly issue disk
324  * write I/O for large blocks of data
325  *
326  * The backend later finalizes the reservation with hammer_blockmap_finalize()
327  * upon committing the related record.
328  */
329 hammer_reserve_t
330 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
331                         hammer_off_t *zone_offp, int *errorp)
332 {
333         hammer_volume_t root_volume;
334         hammer_blockmap_t blockmap;
335         hammer_blockmap_t freemap;
336         struct hammer_blockmap_layer1 *layer1;
337         struct hammer_blockmap_layer2 *layer2;
338         hammer_buffer_t buffer1 = NULL;
339         hammer_buffer_t buffer2 = NULL;
340         hammer_buffer_t buffer3 = NULL;
341         hammer_off_t tmp_offset;
342         hammer_off_t next_offset;
343         hammer_off_t layer1_offset;
344         hammer_off_t layer2_offset;
345         hammer_off_t base_off;
346         hammer_reserve_t resv;
347         hammer_reserve_t resx;
348         int loops = 0;
349         int offset;
350
351         /*
352          * Setup
353          */
354         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
355         root_volume = hammer_get_root_volume(hmp, errorp);
356         if (*errorp)
357                 return(NULL);
358         blockmap = &hmp->blockmap[zone];
359         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
360         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
361
362         /*
363          * Deal with alignment and buffer-boundary issues.
364          *
365          * Be careful, certain primary alignments are used below to allocate
366          * new blockmap blocks.
367          */
368         bytes = (bytes + 15) & ~15;
369         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
370
371         next_offset = blockmap->next_offset;
372 again:
373         resv = NULL;
374         /*
375          * Check for wrap
376          */
377         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
378                 if (++loops == 2) {
379                         *errorp = ENOSPC;
380                         goto failed;
381                 }
382                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
383         }
384
385         /*
386          * The allocation request may not cross a buffer boundary.  Special
387          * large allocations must not cross a large-block boundary.
388          */
389         tmp_offset = next_offset + bytes - 1;
390         if (bytes <= HAMMER_BUFSIZE) {
391                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
392                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
393                         goto again;
394                 }
395         } else {
396                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
397                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
398                         goto again;
399                 }
400         }
401         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
402
403         /*
404          * Dive layer 1.
405          */
406         layer1_offset = freemap->phys_offset +
407                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
408         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
409         if (*errorp)
410                 goto failed;
411
412         /*
413          * Check CRC.
414          */
415         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
416                 Debugger("CRC FAILED: LAYER1");
417         }
418
419         /*
420          * If we are at a big-block boundary and layer1 indicates no 
421          * free big-blocks, then we cannot allocate a new bigblock in
422          * layer2, skip to the next layer1 entry.
423          */
424         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
425             layer1->blocks_free == 0) {
426                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
427                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
428                 goto again;
429         }
430         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
431
432         /*
433          * Dive layer 2, each entry represents a large-block.
434          */
435         layer2_offset = layer1->phys_offset +
436                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
437         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
438         if (*errorp)
439                 goto failed;
440
441         /*
442          * Check CRC if not allocating into uninitialized space (which we
443          * aren't when reserving space).
444          */
445         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
446                 Debugger("CRC FAILED: LAYER2");
447         }
448
449         /*
450          * Skip the layer if the zone is owned by someone other then us.
451          */
452         if (layer2->zone && layer2->zone != zone) {
453                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
454                 goto again;
455         }
456         if (offset < layer2->append_off) {
457                 next_offset += layer2->append_off - offset;
458                 goto again;
459         }
460
461         /*
462          * We need the lock from this point on.  We have to re-check zone
463          * ownership after acquiring the lock and also check for reservations.
464          */
465         hammer_lock_ex(&hmp->blkmap_lock);
466
467         if (layer2->zone && layer2->zone != zone) {
468                 hammer_unlock(&hmp->blkmap_lock);
469                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
470                 goto again;
471         }
472         if (offset < layer2->append_off) {
473                 hammer_unlock(&hmp->blkmap_lock);
474                 next_offset += layer2->append_off - offset;
475                 goto again;
476         }
477
478         /*
479          * The bigblock might be reserved by another zone.  If it is reserved
480          * by our zone we may have to move next_offset past the append_off.
481          */
482         base_off = (next_offset &
483                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
484                     HAMMER_ZONE_RAW_BUFFER;
485         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
486         if (resv) {
487                 if (resv->zone != zone) {
488                         hammer_unlock(&hmp->blkmap_lock);
489                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
490                                       ~HAMMER_LARGEBLOCK_MASK64;
491                         goto again;
492                 }
493                 if (offset < resv->append_off) {
494                         hammer_unlock(&hmp->blkmap_lock);
495                         next_offset += resv->append_off - offset;
496                         goto again;
497                 }
498                 ++resv->refs;
499                 resx = NULL;
500         } else {
501                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
502                                M_WAITOK | M_ZERO | M_USE_RESERVE);
503                 resx->refs = 1;
504                 resx->zone = zone;
505                 resx->zone_offset = base_off;
506                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
507                         resx->flags |= HAMMER_RESF_LAYER2FREE;
508                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
509                 KKASSERT(resv == NULL);
510                 resv = resx;
511                 ++hammer_count_reservations;
512         }
513         resv->append_off = offset + bytes;
514
515         /*
516          * If we are not reserving a whole buffer but are at the start of
517          * a new block, call hammer_bnew() to avoid a disk read.
518          *
519          * If we are reserving a whole buffer (or more), the caller will
520          * probably use a direct read, so do nothing.
521          */
522         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
523                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
524         }
525
526         /*
527          * Adjust our iterator and alloc_offset.  The layer1 and layer2
528          * space beyond alloc_offset is uninitialized.  alloc_offset must
529          * be big-block aligned.
530          */
531         blockmap->next_offset = next_offset + bytes;
532         hammer_unlock(&hmp->blkmap_lock);
533
534 failed:
535         if (buffer1)
536                 hammer_rel_buffer(buffer1, 0);
537         if (buffer2)
538                 hammer_rel_buffer(buffer2, 0);
539         if (buffer3)
540                 hammer_rel_buffer(buffer3, 0);
541         hammer_rel_volume(root_volume, 0);
542         *zone_offp = next_offset;
543
544         return(resv);
545 }
546
547 #if 0
548 /*
549  * Backend function - undo a portion of a reservation.
550  */
551 void
552 hammer_blockmap_reserve_undo(hammer_mount_t hmp, hammer_reserve_t resv,
553                          hammer_off_t zone_offset, int bytes)
554 {
555         resv->bytes_freed += bytes;
556 }
557
558 #endif
559
560 /*
561  * Dereference a reservation structure.  Upon the final release the
562  * underlying big-block is checked and if it is entirely free we delete
563  * any related HAMMER buffers to avoid potential conflicts with future
564  * reuse of the big-block.
565  */
566 void
567 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
568 {
569         hammer_off_t base_offset;
570
571         KKASSERT(resv->refs > 0);
572         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
573                  HAMMER_ZONE_RAW_BUFFER);
574
575         /*
576          * Setting append_off to the max prevents any new allocations
577          * from occuring while we are trying to dispose of the reservation,
578          * allowing us to safely delete any related HAMMER buffers.
579          */
580         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
581                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
582                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
583                 base_offset = resv->zone_offset & ~HAMMER_ZONE_RAW_BUFFER;
584                 base_offset = HAMMER_ZONE_ENCODE(base_offset, resv->zone);
585                 hammer_del_buffers(hmp, base_offset, resv->zone_offset,
586                                    HAMMER_LARGEBLOCK_SIZE);
587         }
588         if (--resv->refs == 0) {
589                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
590                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
591                 kfree(resv, hmp->m_misc);
592                 --hammer_count_reservations;
593         }
594 }
595
596 /*
597  * Prevent a potentially free big-block from being reused until after
598  * the related flushes have completely cycled, otherwise crash recovery
599  * could resurrect a data block that was already reused and overwritten.
600  *
601  * Return 0 if the layer2 entry is still completely free after the
602  * reservation has been allocated.
603  */
604 static void
605 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_off_t base_offset,
606                         struct hammer_blockmap_layer2 *layer2)
607 {
608         hammer_reserve_t resv;
609
610         /*
611          * Allocate the reservation if necessary.
612          */
613 again:
614         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
615         if (resv == NULL) {
616                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
617                                M_WAITOK | M_ZERO | M_USE_RESERVE);
618                 resv->zone_offset = base_offset;
619                 resv->refs = 0;
620                 /* XXX inherent lock until refs bumped later on */
621                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
622                         resv->flags |= HAMMER_RESF_LAYER2FREE;
623                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
624                         kfree(resv, hmp->m_misc);
625                         goto again;
626                 }
627                 ++hammer_count_reservations;
628         }
629
630         /*
631          * Enter the reservation on the on-delay list, or move it if it
632          * is already on the list.
633          */
634         if (resv->flags & HAMMER_RESF_ONDELAY) {
635                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
636                 resv->flush_group = hmp->flusher.next + 1;
637                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
638         } else {
639                 ++resv->refs;
640                 ++hmp->rsv_fromdelay;
641                 resv->flags |= HAMMER_RESF_ONDELAY;
642                 resv->flush_group = hmp->flusher.next + 1;
643                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
644         }
645 }
646
647 void
648 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
649 {
650         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
651         resv->flags &= ~HAMMER_RESF_ONDELAY;
652         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
653         --hmp->rsv_fromdelay;
654         hammer_blockmap_reserve_complete(hmp, resv);
655 }
656
657 /*
658  * Backend function - free (offset, bytes) in a zone.
659  *
660  * XXX error return
661  */
662 void
663 hammer_blockmap_free(hammer_transaction_t trans,
664                      hammer_off_t zone_offset, int bytes)
665 {
666         hammer_mount_t hmp;
667         hammer_volume_t root_volume;
668         hammer_blockmap_t blockmap;
669         hammer_blockmap_t freemap;
670         struct hammer_blockmap_layer1 *layer1;
671         struct hammer_blockmap_layer2 *layer2;
672         hammer_buffer_t buffer1 = NULL;
673         hammer_buffer_t buffer2 = NULL;
674         hammer_off_t layer1_offset;
675         hammer_off_t layer2_offset;
676         hammer_off_t base_off;
677         int error;
678         int zone;
679
680         if (bytes == 0)
681                 return;
682         hmp = trans->hmp;
683
684         /*
685          * Alignment
686          */
687         bytes = (bytes + 15) & ~15;
688         KKASSERT(bytes <= HAMMER_XBUFSIZE);
689         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
690                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
691
692         /*
693          * Basic zone validation & locking
694          */
695         zone = HAMMER_ZONE_DECODE(zone_offset);
696         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
697         root_volume = trans->rootvol;
698         error = 0;
699
700         blockmap = &hmp->blockmap[zone];
701         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
702
703         /*
704          * Dive layer 1.
705          */
706         layer1_offset = freemap->phys_offset +
707                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
708         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
709         if (error)
710                 goto failed;
711         KKASSERT(layer1->phys_offset &&
712                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
713         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
714                 Debugger("CRC FAILED: LAYER1");
715         }
716
717         /*
718          * Dive layer 2, each entry represents a large-block.
719          */
720         layer2_offset = layer1->phys_offset +
721                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
722         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
723         if (error)
724                 goto failed;
725         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
726                 Debugger("CRC FAILED: LAYER2");
727         }
728
729         hammer_lock_ex(&hmp->blkmap_lock);
730
731         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
732
733         /*
734          * Free space previously allocated via blockmap_alloc().
735          */
736         KKASSERT(layer2->zone == zone);
737         layer2->bytes_free += bytes;
738         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
739
740         /*
741          * If a big-block becomes entirely free we must create a covering
742          * reservation to prevent premature reuse.  Note, however, that
743          * the big-block and/or reservation may still have an append_off
744          * that allows further (non-reused) allocations.
745          *
746          * Once the reservation has been made we re-check layer2 and if
747          * the big-block is still entirely free we reset the layer2 entry.
748          * The reservation will prevent premature reuse.
749          *
750          * NOTE: hammer_buffer's are only invalidated when the reservation
751          * is completed, if the layer2 entry is still completely free at
752          * that time.  Any allocations from the reservation that may have
753          * occured in the mean time, or active references on the reservation
754          * from new pending allocations, will prevent the invalidation from
755          * occuring.
756          */
757         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
758                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
759
760                 hammer_reserve_setdelay(hmp, base_off, layer2);
761                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
762                         layer2->zone = 0;
763                         layer2->append_off = 0;
764                         hammer_modify_buffer(trans, buffer1,
765                                              layer1, sizeof(*layer1));
766                         ++layer1->blocks_free;
767                         layer1->layer1_crc = crc32(layer1,
768                                                    HAMMER_LAYER1_CRCSIZE);
769                         hammer_modify_buffer_done(buffer1);
770                         hammer_modify_volume_field(trans,
771                                         trans->rootvol,
772                                         vol0_stat_freebigblocks);
773                         ++root_volume->ondisk->vol0_stat_freebigblocks;
774                         hmp->copy_stat_freebigblocks =
775                            root_volume->ondisk->vol0_stat_freebigblocks;
776                         hammer_modify_volume_done(trans->rootvol);
777                 }
778         }
779         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
780         hammer_modify_buffer_done(buffer2);
781         hammer_unlock(&hmp->blkmap_lock);
782
783 failed:
784         if (buffer1)
785                 hammer_rel_buffer(buffer1, 0);
786         if (buffer2)
787                 hammer_rel_buffer(buffer2, 0);
788 }
789
790 /*
791  * Backend function - finalize (offset, bytes) in a zone.
792  *
793  * Allocate space that was previously reserved by the frontend.
794  */
795 int
796 hammer_blockmap_finalize(hammer_transaction_t trans,
797                          hammer_reserve_t resv,
798                          hammer_off_t zone_offset, int bytes)
799 {
800         hammer_mount_t hmp;
801         hammer_volume_t root_volume;
802         hammer_blockmap_t blockmap;
803         hammer_blockmap_t freemap;
804         struct hammer_blockmap_layer1 *layer1;
805         struct hammer_blockmap_layer2 *layer2;
806         hammer_buffer_t buffer1 = NULL;
807         hammer_buffer_t buffer2 = NULL;
808         hammer_off_t layer1_offset;
809         hammer_off_t layer2_offset;
810         int error;
811         int zone;
812         int offset;
813
814         if (bytes == 0)
815                 return(0);
816         hmp = trans->hmp;
817
818         /*
819          * Alignment
820          */
821         bytes = (bytes + 15) & ~15;
822         KKASSERT(bytes <= HAMMER_XBUFSIZE);
823
824         /*
825          * Basic zone validation & locking
826          */
827         zone = HAMMER_ZONE_DECODE(zone_offset);
828         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
829         root_volume = trans->rootvol;
830         error = 0;
831
832         blockmap = &hmp->blockmap[zone];
833         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
834
835         /*
836          * Dive layer 1.
837          */
838         layer1_offset = freemap->phys_offset +
839                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
840         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
841         if (error)
842                 goto failed;
843         KKASSERT(layer1->phys_offset &&
844                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
845         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
846                 Debugger("CRC FAILED: LAYER1");
847         }
848
849         /*
850          * Dive layer 2, each entry represents a large-block.
851          */
852         layer2_offset = layer1->phys_offset +
853                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
854         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
855         if (error)
856                 goto failed;
857         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
858                 Debugger("CRC FAILED: LAYER2");
859         }
860
861         hammer_lock_ex(&hmp->blkmap_lock);
862
863         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
864
865         /*
866          * Finalize some or all of the space covered by a current
867          * reservation.  An allocation in the same layer may have
868          * already assigned ownership.
869          */
870         if (layer2->zone == 0) {
871                 hammer_modify_buffer(trans, buffer1,
872                                      layer1, sizeof(*layer1));
873                 --layer1->blocks_free;
874                 layer1->layer1_crc = crc32(layer1,
875                                            HAMMER_LAYER1_CRCSIZE);
876                 hammer_modify_buffer_done(buffer1);
877                 layer2->zone = zone;
878                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
879                 KKASSERT(layer2->append_off == 0);
880                 hammer_modify_volume_field(trans,
881                                 trans->rootvol,
882                                 vol0_stat_freebigblocks);
883                 --root_volume->ondisk->vol0_stat_freebigblocks;
884                 hmp->copy_stat_freebigblocks =
885                    root_volume->ondisk->vol0_stat_freebigblocks;
886                 hammer_modify_volume_done(trans->rootvol);
887         }
888         if (layer2->zone != zone)
889                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
890         KKASSERT(layer2->zone == zone);
891         layer2->bytes_free -= bytes;
892         if (resv)
893                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
894
895         /*
896          * Finalizations can occur out of order, or combined with allocations.
897          * append_off must be set to the highest allocated offset.
898          */
899         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
900         if (layer2->append_off < offset)
901                 layer2->append_off = offset;
902
903         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
904         hammer_modify_buffer_done(buffer2);
905         hammer_unlock(&hmp->blkmap_lock);
906
907 failed:
908         if (buffer1)
909                 hammer_rel_buffer(buffer1, 0);
910         if (buffer2)
911                 hammer_rel_buffer(buffer2, 0);
912         return(error);
913 }
914
915 /*
916  * Return the number of free bytes in the big-block containing the
917  * specified blockmap offset.
918  */
919 int
920 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
921                         int *curp, int *errorp)
922 {
923         hammer_volume_t root_volume;
924         hammer_blockmap_t blockmap;
925         hammer_blockmap_t freemap;
926         struct hammer_blockmap_layer1 *layer1;
927         struct hammer_blockmap_layer2 *layer2;
928         hammer_buffer_t buffer = NULL;
929         hammer_off_t layer1_offset;
930         hammer_off_t layer2_offset;
931         int bytes;
932         int zone;
933
934         zone = HAMMER_ZONE_DECODE(zone_offset);
935         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
936         root_volume = hammer_get_root_volume(hmp, errorp);
937         if (*errorp) {
938                 *curp = 0;
939                 return(0);
940         }
941         blockmap = &hmp->blockmap[zone];
942         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
943
944         /*
945          * Dive layer 1.
946          */
947         layer1_offset = freemap->phys_offset +
948                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
949         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
950         if (*errorp) {
951                 bytes = 0;
952                 goto failed;
953         }
954         KKASSERT(layer1->phys_offset);
955         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
956                 Debugger("CRC FAILED: LAYER1");
957         }
958
959         /*
960          * Dive layer 2, each entry represents a large-block.
961          *
962          * (reuse buffer, layer1 pointer becomes invalid)
963          */
964         layer2_offset = layer1->phys_offset +
965                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
966         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
967         if (*errorp) {
968                 bytes = 0;
969                 goto failed;
970         }
971         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
972                 Debugger("CRC FAILED: LAYER2");
973         }
974         KKASSERT(layer2->zone == zone);
975
976         bytes = layer2->bytes_free;
977
978         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
979                 *curp = 0;
980         else
981                 *curp = 1;
982 failed:
983         if (buffer)
984                 hammer_rel_buffer(buffer, 0);
985         hammer_rel_volume(root_volume, 0);
986         if (hammer_debug_general & 0x0800) {
987                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
988                         zone_offset, bytes);
989         }
990         return(bytes);
991 }
992
993
994 /*
995  * Lookup a blockmap offset.
996  */
997 hammer_off_t
998 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
999                        int *errorp)
1000 {
1001         hammer_volume_t root_volume;
1002         hammer_blockmap_t freemap;
1003         struct hammer_blockmap_layer1 *layer1;
1004         struct hammer_blockmap_layer2 *layer2;
1005         hammer_buffer_t buffer = NULL;
1006         hammer_off_t layer1_offset;
1007         hammer_off_t layer2_offset;
1008         hammer_off_t result_offset;
1009         hammer_off_t base_off;
1010         hammer_reserve_t resv;
1011         int zone;
1012
1013         /*
1014          * Calculate the zone-2 offset.
1015          */
1016         zone = HAMMER_ZONE_DECODE(zone_offset);
1017         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1018
1019         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1020                         HAMMER_ZONE_RAW_BUFFER;
1021
1022         /*
1023          * We can actually stop here, normal blockmaps are now direct-mapped
1024          * onto the freemap and so represent zone-2 addresses.
1025          */
1026         if (hammer_verify_zone == 0) {
1027                 *errorp = 0;
1028                 return(result_offset);
1029         }
1030
1031         /*
1032          * Validate the allocation zone
1033          */
1034         root_volume = hammer_get_root_volume(hmp, errorp);
1035         if (*errorp)
1036                 return(0);
1037         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1038         KKASSERT(freemap->phys_offset != 0);
1039
1040         /*
1041          * Dive layer 1.
1042          */
1043         layer1_offset = freemap->phys_offset +
1044                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1045         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1046         if (*errorp)
1047                 goto failed;
1048         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1049         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1050                 Debugger("CRC FAILED: LAYER1");
1051         }
1052
1053         /*
1054          * Dive layer 2, each entry represents a large-block.
1055          */
1056         layer2_offset = layer1->phys_offset +
1057                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1058         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1059
1060         if (*errorp)
1061                 goto failed;
1062         if (layer2->zone == 0) {
1063                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1064                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1065                                  base_off);
1066                 KKASSERT(resv && resv->zone == zone);
1067
1068         } else if (layer2->zone != zone) {
1069                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1070                         layer2->zone, zone);
1071         }
1072         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1073                 Debugger("CRC FAILED: LAYER2");
1074         }
1075
1076 failed:
1077         if (buffer)
1078                 hammer_rel_buffer(buffer, 0);
1079         hammer_rel_volume(root_volume, 0);
1080         if (hammer_debug_general & 0x0800) {
1081                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1082                         zone_offset, result_offset);
1083         }
1084         return(result_offset);
1085 }
1086
1087
1088 /*
1089  * Check space availability
1090  */
1091 int
1092 hammer_checkspace(hammer_mount_t hmp, int slop)
1093 {
1094         const int in_size = sizeof(struct hammer_inode_data) +
1095                             sizeof(union hammer_btree_elm);
1096         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1097         int64_t usedbytes;
1098
1099         usedbytes = hmp->rsv_inodes * in_size +
1100                     hmp->rsv_recs * rec_size +
1101                     hmp->rsv_databytes +
1102                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1103                     ((int64_t)hidirtybufspace << 2) +
1104                     (slop << HAMMER_LARGEBLOCK_BITS);
1105
1106         hammer_count_extra_space_used = usedbytes;      /* debugging */
1107
1108         if (hmp->copy_stat_freebigblocks >=
1109             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1110                 return(0);
1111         }
1112         return (ENOSPC);
1113 }
1114