Merge branch 'master' of /home/aggelos/devel/dfly/dfly.git/
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset, int zone,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52              hammer_res_rb_compare, hammer_off_t, zone_offset);
53
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57         if (res1->zone_offset < res2->zone_offset)
58                 return(-1);
59         if (res1->zone_offset > res2->zone_offset)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
69                       int bytes, int *errorp)
70 {
71         hammer_mount_t hmp;
72         hammer_volume_t root_volume;
73         hammer_blockmap_t blockmap;
74         hammer_blockmap_t freemap;
75         hammer_reserve_t resv;
76         struct hammer_blockmap_layer1 *layer1;
77         struct hammer_blockmap_layer2 *layer2;
78         hammer_buffer_t buffer1 = NULL;
79         hammer_buffer_t buffer2 = NULL;
80         hammer_buffer_t buffer3 = NULL;
81         hammer_off_t tmp_offset;
82         hammer_off_t next_offset;
83         hammer_off_t result_offset;
84         hammer_off_t layer1_offset;
85         hammer_off_t layer2_offset;
86         hammer_off_t base_off;
87         int loops = 0;
88         int offset;             /* offset within big-block */
89
90         hmp = trans->hmp;
91
92         /*
93          * Deal with alignment and buffer-boundary issues.
94          *
95          * Be careful, certain primary alignments are used below to allocate
96          * new blockmap blocks.
97          */
98         bytes = (bytes + 15) & ~15;
99         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101
102         /*
103          * Setup
104          */
105         root_volume = trans->rootvol;
106         *errorp = 0;
107         blockmap = &hmp->blockmap[zone];
108         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110
111         next_offset = blockmap->next_offset;
112 again:
113         /*
114          * Check for wrap
115          */
116         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
117                 if (++loops == 2) {
118                         result_offset = 0;
119                         *errorp = ENOSPC;
120                         goto failed;
121                 }
122                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
123         }
124
125         /*
126          * The allocation request may not cross a buffer boundary.  Special
127          * large allocations must not cross a large-block boundary.
128          */
129         tmp_offset = next_offset + bytes - 1;
130         if (bytes <= HAMMER_BUFSIZE) {
131                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
132                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
133                         goto again;
134                 }
135         } else {
136                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
137                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
138                         goto again;
139                 }
140         }
141         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
142
143         /*
144          * Dive layer 1.
145          */
146         layer1_offset = freemap->phys_offset +
147                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
148         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
149         if (*errorp) {
150                 result_offset = 0;
151                 goto failed;
152         }
153
154         /*
155          * Check CRC.
156          */
157         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
158                 hammer_lock_ex(&hmp->blkmap_lock);
159                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
160                         panic("CRC FAILED: LAYER1");
161                 hammer_unlock(&hmp->blkmap_lock);
162         }
163
164         /*
165          * If we are at a big-block boundary and layer1 indicates no 
166          * free big-blocks, then we cannot allocate a new bigblock in
167          * layer2, skip to the next layer1 entry.
168          */
169         if (offset == 0 && layer1->blocks_free == 0) {
170                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
171                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
172                 goto again;
173         }
174         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
175
176         /*
177          * Dive layer 2, each entry represents a large-block.
178          */
179         layer2_offset = layer1->phys_offset +
180                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
181         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
182         if (*errorp) {
183                 result_offset = 0;
184                 goto failed;
185         }
186
187         /*
188          * Check CRC.  This can race another thread holding the lock
189          * and in the middle of modifying layer2.
190          */
191         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
192                 hammer_lock_ex(&hmp->blkmap_lock);
193                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
194                         panic("CRC FAILED: LAYER2");
195                 hammer_unlock(&hmp->blkmap_lock);
196         }
197
198         /*
199          * Skip the layer if the zone is owned by someone other then us.
200          */
201         if (layer2->zone && layer2->zone != zone) {
202                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
203                 goto again;
204         }
205         if (offset < layer2->append_off) {
206                 next_offset += layer2->append_off - offset;
207                 goto again;
208         }
209
210         /*
211          * We need the lock from this point on.  We have to re-check zone
212          * ownership after acquiring the lock and also check for reservations.
213          */
214         hammer_lock_ex(&hmp->blkmap_lock);
215
216         if (layer2->zone && layer2->zone != zone) {
217                 hammer_unlock(&hmp->blkmap_lock);
218                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
219                 goto again;
220         }
221         if (offset < layer2->append_off) {
222                 hammer_unlock(&hmp->blkmap_lock);
223                 next_offset += layer2->append_off - offset;
224                 goto again;
225         }
226
227         /*
228          * The bigblock might be reserved by another zone.  If it is reserved
229          * by our zone we may have to move next_offset past the append_off.
230          */
231         base_off = (next_offset &
232                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
233                     HAMMER_ZONE_RAW_BUFFER;
234         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
235         if (resv) {
236                 if (resv->zone != zone) {
237                         hammer_unlock(&hmp->blkmap_lock);
238                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
239                                       ~HAMMER_LARGEBLOCK_MASK64;
240                         goto again;
241                 }
242                 if (offset < resv->append_off) {
243                         hammer_unlock(&hmp->blkmap_lock);
244                         next_offset += resv->append_off - offset;
245                         goto again;
246                 }
247                 ++resv->refs;
248         }
249
250         /*
251          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
252          * of the layer for real.  At this point we've validated any
253          * reservation that might exist and can just ignore resv.
254          */
255         if (layer2->zone == 0) {
256                 /*
257                  * Assign the bigblock to our zone
258                  */
259                 hammer_modify_buffer(trans, buffer1,
260                                      layer1, sizeof(*layer1));
261                 --layer1->blocks_free;
262                 layer1->layer1_crc = crc32(layer1,
263                                            HAMMER_LAYER1_CRCSIZE);
264                 hammer_modify_buffer_done(buffer1);
265                 hammer_modify_buffer(trans, buffer2,
266                                      layer2, sizeof(*layer2));
267                 layer2->zone = zone;
268                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
269                 KKASSERT(layer2->append_off == 0);
270                 hammer_modify_volume_field(trans, trans->rootvol,
271                                            vol0_stat_freebigblocks);
272                 --root_volume->ondisk->vol0_stat_freebigblocks;
273                 hmp->copy_stat_freebigblocks =
274                         root_volume->ondisk->vol0_stat_freebigblocks;
275                 hammer_modify_volume_done(trans->rootvol);
276         } else {
277                 hammer_modify_buffer(trans, buffer2,
278                                      layer2, sizeof(*layer2));
279         }
280         KKASSERT(layer2->zone == zone);
281
282         layer2->bytes_free -= bytes;
283         KKASSERT(layer2->append_off <= offset);
284         layer2->append_off = offset + bytes;
285         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
286         hammer_modify_buffer_done(buffer2);
287         KKASSERT(layer2->bytes_free >= 0);
288
289         /*
290          * We hold the blockmap lock and should be the only ones
291          * capable of modifying resv->append_off.  Track the allocation
292          * as appropriate.
293          */
294         KKASSERT(bytes != 0);
295         if (resv) {
296                 KKASSERT(resv->append_off <= offset);
297                 resv->append_off = offset + bytes;
298                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
299                 hammer_blockmap_reserve_complete(hmp, resv);
300         }
301
302         /*
303          * If we are allocating from the base of a new buffer we can avoid
304          * a disk read by calling hammer_bnew().
305          */
306         if ((next_offset & HAMMER_BUFMASK) == 0) {
307                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
308                                 errorp, &buffer3);
309         }
310         result_offset = next_offset;
311
312         /*
313          * Process allocated result_offset
314          */
315         hammer_modify_volume(NULL, root_volume, NULL, 0);
316         blockmap->next_offset = next_offset + bytes;
317         hammer_modify_volume_done(root_volume);
318         hammer_unlock(&hmp->blkmap_lock);
319 failed:
320
321         /*
322          * Cleanup
323          */
324         if (buffer1)
325                 hammer_rel_buffer(buffer1, 0);
326         if (buffer2)
327                 hammer_rel_buffer(buffer2, 0);
328         if (buffer3)
329                 hammer_rel_buffer(buffer3, 0);
330
331         return(result_offset);
332 }
333
334 /*
335  * Frontend function - Reserve bytes in a zone.
336  *
337  * This code reserves bytes out of a blockmap without committing to any
338  * meta-data modifications, allowing the front-end to directly issue disk
339  * write I/O for large blocks of data
340  *
341  * The backend later finalizes the reservation with hammer_blockmap_finalize()
342  * upon committing the related record.
343  */
344 hammer_reserve_t
345 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
346                         hammer_off_t *zone_offp, int *errorp)
347 {
348         hammer_volume_t root_volume;
349         hammer_blockmap_t blockmap;
350         hammer_blockmap_t freemap;
351         struct hammer_blockmap_layer1 *layer1;
352         struct hammer_blockmap_layer2 *layer2;
353         hammer_buffer_t buffer1 = NULL;
354         hammer_buffer_t buffer2 = NULL;
355         hammer_buffer_t buffer3 = NULL;
356         hammer_off_t tmp_offset;
357         hammer_off_t next_offset;
358         hammer_off_t layer1_offset;
359         hammer_off_t layer2_offset;
360         hammer_off_t base_off;
361         hammer_reserve_t resv;
362         hammer_reserve_t resx;
363         int loops = 0;
364         int offset;
365
366         /*
367          * Setup
368          */
369         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
370         root_volume = hammer_get_root_volume(hmp, errorp);
371         if (*errorp)
372                 return(NULL);
373         blockmap = &hmp->blockmap[zone];
374         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
375         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
376
377         /*
378          * Deal with alignment and buffer-boundary issues.
379          *
380          * Be careful, certain primary alignments are used below to allocate
381          * new blockmap blocks.
382          */
383         bytes = (bytes + 15) & ~15;
384         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
385
386         next_offset = blockmap->next_offset;
387 again:
388         resv = NULL;
389         /*
390          * Check for wrap
391          */
392         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
393                 if (++loops == 2) {
394                         *errorp = ENOSPC;
395                         goto failed;
396                 }
397                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
398         }
399
400         /*
401          * The allocation request may not cross a buffer boundary.  Special
402          * large allocations must not cross a large-block boundary.
403          */
404         tmp_offset = next_offset + bytes - 1;
405         if (bytes <= HAMMER_BUFSIZE) {
406                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
407                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
408                         goto again;
409                 }
410         } else {
411                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
412                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
413                         goto again;
414                 }
415         }
416         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
417
418         /*
419          * Dive layer 1.
420          */
421         layer1_offset = freemap->phys_offset +
422                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
423         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
424         if (*errorp)
425                 goto failed;
426
427         /*
428          * Check CRC.
429          */
430         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
431                 hammer_lock_ex(&hmp->blkmap_lock);
432                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
433                         panic("CRC FAILED: LAYER1");
434                 hammer_unlock(&hmp->blkmap_lock);
435         }
436
437         /*
438          * If we are at a big-block boundary and layer1 indicates no 
439          * free big-blocks, then we cannot allocate a new bigblock in
440          * layer2, skip to the next layer1 entry.
441          */
442         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
443             layer1->blocks_free == 0) {
444                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
445                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
446                 goto again;
447         }
448         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
449
450         /*
451          * Dive layer 2, each entry represents a large-block.
452          */
453         layer2_offset = layer1->phys_offset +
454                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
455         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
456         if (*errorp)
457                 goto failed;
458
459         /*
460          * Check CRC if not allocating into uninitialized space (which we
461          * aren't when reserving space).
462          */
463         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
464                 hammer_lock_ex(&hmp->blkmap_lock);
465                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
466                         panic("CRC FAILED: LAYER2");
467                 hammer_unlock(&hmp->blkmap_lock);
468         }
469
470         /*
471          * Skip the layer if the zone is owned by someone other then us.
472          */
473         if (layer2->zone && layer2->zone != zone) {
474                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
475                 goto again;
476         }
477         if (offset < layer2->append_off) {
478                 next_offset += layer2->append_off - offset;
479                 goto again;
480         }
481
482         /*
483          * We need the lock from this point on.  We have to re-check zone
484          * ownership after acquiring the lock and also check for reservations.
485          */
486         hammer_lock_ex(&hmp->blkmap_lock);
487
488         if (layer2->zone && layer2->zone != zone) {
489                 hammer_unlock(&hmp->blkmap_lock);
490                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
491                 goto again;
492         }
493         if (offset < layer2->append_off) {
494                 hammer_unlock(&hmp->blkmap_lock);
495                 next_offset += layer2->append_off - offset;
496                 goto again;
497         }
498
499         /*
500          * The bigblock might be reserved by another zone.  If it is reserved
501          * by our zone we may have to move next_offset past the append_off.
502          */
503         base_off = (next_offset &
504                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
505                     HAMMER_ZONE_RAW_BUFFER;
506         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
507         if (resv) {
508                 if (resv->zone != zone) {
509                         hammer_unlock(&hmp->blkmap_lock);
510                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
511                                       ~HAMMER_LARGEBLOCK_MASK64;
512                         goto again;
513                 }
514                 if (offset < resv->append_off) {
515                         hammer_unlock(&hmp->blkmap_lock);
516                         next_offset += resv->append_off - offset;
517                         goto again;
518                 }
519                 ++resv->refs;
520                 resx = NULL;
521         } else {
522                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
523                                M_WAITOK | M_ZERO | M_USE_RESERVE);
524                 resx->refs = 1;
525                 resx->zone = zone;
526                 resx->zone_offset = base_off;
527                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
528                         resx->flags |= HAMMER_RESF_LAYER2FREE;
529                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
530                 KKASSERT(resv == NULL);
531                 resv = resx;
532                 ++hammer_count_reservations;
533         }
534         resv->append_off = offset + bytes;
535
536         /*
537          * If we are not reserving a whole buffer but are at the start of
538          * a new block, call hammer_bnew() to avoid a disk read.
539          *
540          * If we are reserving a whole buffer (or more), the caller will
541          * probably use a direct read, so do nothing.
542          */
543         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
544                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
545         }
546
547         /*
548          * Adjust our iterator and alloc_offset.  The layer1 and layer2
549          * space beyond alloc_offset is uninitialized.  alloc_offset must
550          * be big-block aligned.
551          */
552         blockmap->next_offset = next_offset + bytes;
553         hammer_unlock(&hmp->blkmap_lock);
554
555 failed:
556         if (buffer1)
557                 hammer_rel_buffer(buffer1, 0);
558         if (buffer2)
559                 hammer_rel_buffer(buffer2, 0);
560         if (buffer3)
561                 hammer_rel_buffer(buffer3, 0);
562         hammer_rel_volume(root_volume, 0);
563         *zone_offp = next_offset;
564
565         return(resv);
566 }
567
568 /*
569  * Dereference a reservation structure.  Upon the final release the
570  * underlying big-block is checked and if it is entirely free we delete
571  * any related HAMMER buffers to avoid potential conflicts with future
572  * reuse of the big-block.
573  */
574 void
575 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
576 {
577         hammer_off_t base_offset;
578         int error;
579
580         KKASSERT(resv->refs > 0);
581         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
582                  HAMMER_ZONE_RAW_BUFFER);
583
584         /*
585          * Setting append_off to the max prevents any new allocations
586          * from occuring while we are trying to dispose of the reservation,
587          * allowing us to safely delete any related HAMMER buffers.
588          *
589          * If we are unable to clean out all related HAMMER buffers we
590          * requeue the delay.
591          */
592         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
593                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
594                 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
595                 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
596                 error = hammer_del_buffers(hmp, base_offset,
597                                            resv->zone_offset,
598                                            HAMMER_LARGEBLOCK_SIZE,
599                                            0);
600                 if (error)
601                         hammer_reserve_setdelay(hmp, resv);
602         }
603         if (--resv->refs == 0) {
604                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
605                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
606                 kfree(resv, hmp->m_misc);
607                 --hammer_count_reservations;
608         }
609 }
610
611 /*
612  * Prevent a potentially free big-block from being reused until after
613  * the related flushes have completely cycled, otherwise crash recovery
614  * could resurrect a data block that was already reused and overwritten.
615  *
616  * The caller might reset the underlying layer2 entry's append_off to 0, so
617  * our covering append_off must be set to max to prevent any reallocation
618  * until after the flush delays complete, not to mention proper invalidation
619  * of any underlying cached blocks.
620  */
621 static void
622 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
623                         int zone, struct hammer_blockmap_layer2 *layer2)
624 {
625         hammer_reserve_t resv;
626
627         /*
628          * Allocate the reservation if necessary.
629          *
630          * NOTE: need lock in future around resv lookup/allocation and
631          * the setdelay call, currently refs is not bumped until the call.
632          */
633 again:
634         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
635         if (resv == NULL) {
636                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
637                                M_WAITOK | M_ZERO | M_USE_RESERVE);
638                 resv->zone = zone;
639                 resv->zone_offset = base_offset;
640                 resv->refs = 0;
641                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
642
643                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
644                         resv->flags |= HAMMER_RESF_LAYER2FREE;
645                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
646                         kfree(resv, hmp->m_misc);
647                         goto again;
648                 }
649                 ++hammer_count_reservations;
650         } else {
651                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
652                         resv->flags |= HAMMER_RESF_LAYER2FREE;
653         }
654         hammer_reserve_setdelay(hmp, resv);
655 }
656
657 /*
658  * Enter the reservation on the on-delay list, or move it if it
659  * is already on the list.
660  */
661 static void
662 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
663 {
664         if (resv->flags & HAMMER_RESF_ONDELAY) {
665                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
666                 resv->flush_group = hmp->flusher.next + 1;
667                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
668         } else {
669                 ++resv->refs;
670                 ++hmp->rsv_fromdelay;
671                 resv->flags |= HAMMER_RESF_ONDELAY;
672                 resv->flush_group = hmp->flusher.next + 1;
673                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
674         }
675 }
676
677 void
678 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
679 {
680         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
681         resv->flags &= ~HAMMER_RESF_ONDELAY;
682         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
683         --hmp->rsv_fromdelay;
684         hammer_blockmap_reserve_complete(hmp, resv);
685 }
686
687 /*
688  * Backend function - free (offset, bytes) in a zone.
689  *
690  * XXX error return
691  */
692 void
693 hammer_blockmap_free(hammer_transaction_t trans,
694                      hammer_off_t zone_offset, int bytes)
695 {
696         hammer_mount_t hmp;
697         hammer_volume_t root_volume;
698         hammer_blockmap_t blockmap;
699         hammer_blockmap_t freemap;
700         struct hammer_blockmap_layer1 *layer1;
701         struct hammer_blockmap_layer2 *layer2;
702         hammer_buffer_t buffer1 = NULL;
703         hammer_buffer_t buffer2 = NULL;
704         hammer_off_t layer1_offset;
705         hammer_off_t layer2_offset;
706         hammer_off_t base_off;
707         int error;
708         int zone;
709
710         if (bytes == 0)
711                 return;
712         hmp = trans->hmp;
713
714         /*
715          * Alignment
716          */
717         bytes = (bytes + 15) & ~15;
718         KKASSERT(bytes <= HAMMER_XBUFSIZE);
719         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
720                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
721
722         /*
723          * Basic zone validation & locking
724          */
725         zone = HAMMER_ZONE_DECODE(zone_offset);
726         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
727         root_volume = trans->rootvol;
728         error = 0;
729
730         blockmap = &hmp->blockmap[zone];
731         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
732
733         /*
734          * Dive layer 1.
735          */
736         layer1_offset = freemap->phys_offset +
737                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
738         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
739         if (error)
740                 goto failed;
741         KKASSERT(layer1->phys_offset &&
742                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
743         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
744                 hammer_lock_ex(&hmp->blkmap_lock);
745                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
746                         panic("CRC FAILED: LAYER1");
747                 hammer_unlock(&hmp->blkmap_lock);
748         }
749
750         /*
751          * Dive layer 2, each entry represents a large-block.
752          */
753         layer2_offset = layer1->phys_offset +
754                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
755         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
756         if (error)
757                 goto failed;
758         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
759                 hammer_lock_ex(&hmp->blkmap_lock);
760                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
761                         panic("CRC FAILED: LAYER2");
762                 hammer_unlock(&hmp->blkmap_lock);
763         }
764
765         hammer_lock_ex(&hmp->blkmap_lock);
766
767         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
768
769         /*
770          * Free space previously allocated via blockmap_alloc().
771          */
772         KKASSERT(layer2->zone == zone);
773         layer2->bytes_free += bytes;
774         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
775
776         /*
777          * If a big-block becomes entirely free we must create a covering
778          * reservation to prevent premature reuse.  Note, however, that
779          * the big-block and/or reservation may still have an append_off
780          * that allows further (non-reused) allocations.
781          *
782          * Once the reservation has been made we re-check layer2 and if
783          * the big-block is still entirely free we reset the layer2 entry.
784          * The reservation will prevent premature reuse.
785          *
786          * NOTE: hammer_buffer's are only invalidated when the reservation
787          * is completed, if the layer2 entry is still completely free at
788          * that time.  Any allocations from the reservation that may have
789          * occured in the mean time, or active references on the reservation
790          * from new pending allocations, will prevent the invalidation from
791          * occuring.
792          */
793         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
794                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
795
796                 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
797                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
798                         layer2->zone = 0;
799                         layer2->append_off = 0;
800                         hammer_modify_buffer(trans, buffer1,
801                                              layer1, sizeof(*layer1));
802                         ++layer1->blocks_free;
803                         layer1->layer1_crc = crc32(layer1,
804                                                    HAMMER_LAYER1_CRCSIZE);
805                         hammer_modify_buffer_done(buffer1);
806                         hammer_modify_volume_field(trans,
807                                         trans->rootvol,
808                                         vol0_stat_freebigblocks);
809                         ++root_volume->ondisk->vol0_stat_freebigblocks;
810                         hmp->copy_stat_freebigblocks =
811                            root_volume->ondisk->vol0_stat_freebigblocks;
812                         hammer_modify_volume_done(trans->rootvol);
813                 }
814         }
815         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
816         hammer_modify_buffer_done(buffer2);
817         hammer_unlock(&hmp->blkmap_lock);
818
819 failed:
820         if (buffer1)
821                 hammer_rel_buffer(buffer1, 0);
822         if (buffer2)
823                 hammer_rel_buffer(buffer2, 0);
824 }
825
826 /*
827  * Backend function - finalize (offset, bytes) in a zone.
828  *
829  * Allocate space that was previously reserved by the frontend.
830  */
831 int
832 hammer_blockmap_finalize(hammer_transaction_t trans,
833                          hammer_reserve_t resv,
834                          hammer_off_t zone_offset, int bytes)
835 {
836         hammer_mount_t hmp;
837         hammer_volume_t root_volume;
838         hammer_blockmap_t blockmap;
839         hammer_blockmap_t freemap;
840         struct hammer_blockmap_layer1 *layer1;
841         struct hammer_blockmap_layer2 *layer2;
842         hammer_buffer_t buffer1 = NULL;
843         hammer_buffer_t buffer2 = NULL;
844         hammer_off_t layer1_offset;
845         hammer_off_t layer2_offset;
846         int error;
847         int zone;
848         int offset;
849
850         if (bytes == 0)
851                 return(0);
852         hmp = trans->hmp;
853
854         /*
855          * Alignment
856          */
857         bytes = (bytes + 15) & ~15;
858         KKASSERT(bytes <= HAMMER_XBUFSIZE);
859
860         /*
861          * Basic zone validation & locking
862          */
863         zone = HAMMER_ZONE_DECODE(zone_offset);
864         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
865         root_volume = trans->rootvol;
866         error = 0;
867
868         blockmap = &hmp->blockmap[zone];
869         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
870
871         /*
872          * Dive layer 1.
873          */
874         layer1_offset = freemap->phys_offset +
875                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
876         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
877         if (error)
878                 goto failed;
879         KKASSERT(layer1->phys_offset &&
880                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
881         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
882                 hammer_lock_ex(&hmp->blkmap_lock);
883                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
884                         panic("CRC FAILED: LAYER1");
885                 hammer_unlock(&hmp->blkmap_lock);
886         }
887
888         /*
889          * Dive layer 2, each entry represents a large-block.
890          */
891         layer2_offset = layer1->phys_offset +
892                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
893         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
894         if (error)
895                 goto failed;
896         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
897                 hammer_lock_ex(&hmp->blkmap_lock);
898                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
899                         panic("CRC FAILED: LAYER2");
900                 hammer_unlock(&hmp->blkmap_lock);
901         }
902
903         hammer_lock_ex(&hmp->blkmap_lock);
904
905         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
906
907         /*
908          * Finalize some or all of the space covered by a current
909          * reservation.  An allocation in the same layer may have
910          * already assigned ownership.
911          */
912         if (layer2->zone == 0) {
913                 hammer_modify_buffer(trans, buffer1,
914                                      layer1, sizeof(*layer1));
915                 --layer1->blocks_free;
916                 layer1->layer1_crc = crc32(layer1,
917                                            HAMMER_LAYER1_CRCSIZE);
918                 hammer_modify_buffer_done(buffer1);
919                 layer2->zone = zone;
920                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
921                 KKASSERT(layer2->append_off == 0);
922                 hammer_modify_volume_field(trans,
923                                 trans->rootvol,
924                                 vol0_stat_freebigblocks);
925                 --root_volume->ondisk->vol0_stat_freebigblocks;
926                 hmp->copy_stat_freebigblocks =
927                    root_volume->ondisk->vol0_stat_freebigblocks;
928                 hammer_modify_volume_done(trans->rootvol);
929         }
930         if (layer2->zone != zone)
931                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
932         KKASSERT(layer2->zone == zone);
933         KKASSERT(bytes != 0);
934         layer2->bytes_free -= bytes;
935         if (resv)
936                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
937
938         /*
939          * Finalizations can occur out of order, or combined with allocations.
940          * append_off must be set to the highest allocated offset.
941          */
942         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
943         if (layer2->append_off < offset)
944                 layer2->append_off = offset;
945
946         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
947         hammer_modify_buffer_done(buffer2);
948         hammer_unlock(&hmp->blkmap_lock);
949
950 failed:
951         if (buffer1)
952                 hammer_rel_buffer(buffer1, 0);
953         if (buffer2)
954                 hammer_rel_buffer(buffer2, 0);
955         return(error);
956 }
957
958 /*
959  * Return the number of free bytes in the big-block containing the
960  * specified blockmap offset.
961  */
962 int
963 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
964                         int *curp, int *errorp)
965 {
966         hammer_volume_t root_volume;
967         hammer_blockmap_t blockmap;
968         hammer_blockmap_t freemap;
969         struct hammer_blockmap_layer1 *layer1;
970         struct hammer_blockmap_layer2 *layer2;
971         hammer_buffer_t buffer = NULL;
972         hammer_off_t layer1_offset;
973         hammer_off_t layer2_offset;
974         int bytes;
975         int zone;
976
977         zone = HAMMER_ZONE_DECODE(zone_offset);
978         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
979         root_volume = hammer_get_root_volume(hmp, errorp);
980         if (*errorp) {
981                 *curp = 0;
982                 return(0);
983         }
984         blockmap = &hmp->blockmap[zone];
985         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
986
987         /*
988          * Dive layer 1.
989          */
990         layer1_offset = freemap->phys_offset +
991                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
992         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
993         if (*errorp) {
994                 bytes = 0;
995                 goto failed;
996         }
997         KKASSERT(layer1->phys_offset);
998         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
999                 hammer_lock_ex(&hmp->blkmap_lock);
1000                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1001                         panic("CRC FAILED: LAYER1");
1002                 hammer_unlock(&hmp->blkmap_lock);
1003         }
1004
1005         /*
1006          * Dive layer 2, each entry represents a large-block.
1007          *
1008          * (reuse buffer, layer1 pointer becomes invalid)
1009          */
1010         layer2_offset = layer1->phys_offset +
1011                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1012         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1013         if (*errorp) {
1014                 bytes = 0;
1015                 goto failed;
1016         }
1017         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1018                 hammer_lock_ex(&hmp->blkmap_lock);
1019                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1020                         panic("CRC FAILED: LAYER2");
1021                 hammer_unlock(&hmp->blkmap_lock);
1022         }
1023         KKASSERT(layer2->zone == zone);
1024
1025         bytes = layer2->bytes_free;
1026
1027         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1028                 *curp = 0;
1029         else
1030                 *curp = 1;
1031 failed:
1032         if (buffer)
1033                 hammer_rel_buffer(buffer, 0);
1034         hammer_rel_volume(root_volume, 0);
1035         if (hammer_debug_general & 0x0800) {
1036                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1037                         zone_offset, bytes);
1038         }
1039         return(bytes);
1040 }
1041
1042
1043 /*
1044  * Lookup a blockmap offset.
1045  */
1046 hammer_off_t
1047 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1048                        int *errorp)
1049 {
1050         hammer_volume_t root_volume;
1051         hammer_blockmap_t freemap;
1052         struct hammer_blockmap_layer1 *layer1;
1053         struct hammer_blockmap_layer2 *layer2;
1054         hammer_buffer_t buffer = NULL;
1055         hammer_off_t layer1_offset;
1056         hammer_off_t layer2_offset;
1057         hammer_off_t result_offset;
1058         hammer_off_t base_off;
1059         hammer_reserve_t resv;
1060         int zone;
1061
1062         /*
1063          * Calculate the zone-2 offset.
1064          */
1065         zone = HAMMER_ZONE_DECODE(zone_offset);
1066         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1067
1068         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1069                         HAMMER_ZONE_RAW_BUFFER;
1070
1071         /*
1072          * We can actually stop here, normal blockmaps are now direct-mapped
1073          * onto the freemap and so represent zone-2 addresses.
1074          */
1075         if (hammer_verify_zone == 0) {
1076                 *errorp = 0;
1077                 return(result_offset);
1078         }
1079
1080         /*
1081          * Validate the allocation zone
1082          */
1083         root_volume = hammer_get_root_volume(hmp, errorp);
1084         if (*errorp)
1085                 return(0);
1086         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1087         KKASSERT(freemap->phys_offset != 0);
1088
1089         /*
1090          * Dive layer 1.
1091          */
1092         layer1_offset = freemap->phys_offset +
1093                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1094         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1095         if (*errorp)
1096                 goto failed;
1097         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1098         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1099                 hammer_lock_ex(&hmp->blkmap_lock);
1100                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1101                         panic("CRC FAILED: LAYER1");
1102                 hammer_unlock(&hmp->blkmap_lock);
1103         }
1104
1105         /*
1106          * Dive layer 2, each entry represents a large-block.
1107          */
1108         layer2_offset = layer1->phys_offset +
1109                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1110         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1111
1112         if (*errorp)
1113                 goto failed;
1114         if (layer2->zone == 0) {
1115                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1116                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1117                                  base_off);
1118                 KKASSERT(resv && resv->zone == zone);
1119
1120         } else if (layer2->zone != zone) {
1121                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1122                         layer2->zone, zone);
1123         }
1124         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1125                 hammer_lock_ex(&hmp->blkmap_lock);
1126                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1127                         panic("CRC FAILED: LAYER2");
1128                 hammer_unlock(&hmp->blkmap_lock);
1129         }
1130
1131 failed:
1132         if (buffer)
1133                 hammer_rel_buffer(buffer, 0);
1134         hammer_rel_volume(root_volume, 0);
1135         if (hammer_debug_general & 0x0800) {
1136                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1137                         zone_offset, result_offset);
1138         }
1139         return(result_offset);
1140 }
1141
1142
1143 /*
1144  * Check space availability
1145  */
1146 int
1147 hammer_checkspace(hammer_mount_t hmp, int slop)
1148 {
1149         const int in_size = sizeof(struct hammer_inode_data) +
1150                             sizeof(union hammer_btree_elm);
1151         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1152         int64_t usedbytes;
1153
1154         usedbytes = hmp->rsv_inodes * in_size +
1155                     hmp->rsv_recs * rec_size +
1156                     hmp->rsv_databytes +
1157                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1158                     ((int64_t)hidirtybufspace << 2) +
1159                     (slop << HAMMER_LARGEBLOCK_BITS);
1160
1161         hammer_count_extra_space_used = usedbytes;      /* debugging */
1162
1163         if (hmp->copy_stat_freebigblocks >=
1164             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1165                 return(0);
1166         }
1167         return (ENOSPC);
1168 }
1169