216b0d8d385bfcff44482202814c58a5afd28b2a
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44                                     hammer_off_t base_offset,
45                                     struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52              hammer_res_rb_compare, hammer_off_t, zone_offset);
53
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57         if (res1->zone_offset < res2->zone_offset)
58                 return(-1);
59         if (res1->zone_offset > res2->zone_offset)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
69                       int bytes, int *errorp)
70 {
71         hammer_mount_t hmp;
72         hammer_volume_t root_volume;
73         hammer_blockmap_t blockmap;
74         hammer_blockmap_t freemap;
75         hammer_reserve_t resv;
76         struct hammer_blockmap_layer1 *layer1;
77         struct hammer_blockmap_layer2 *layer2;
78         hammer_buffer_t buffer1 = NULL;
79         hammer_buffer_t buffer2 = NULL;
80         hammer_buffer_t buffer3 = NULL;
81         hammer_off_t tmp_offset;
82         hammer_off_t next_offset;
83         hammer_off_t result_offset;
84         hammer_off_t layer1_offset;
85         hammer_off_t layer2_offset;
86         hammer_off_t base_off;
87         int loops = 0;
88         int offset;             /* offset within big-block */
89
90         hmp = trans->hmp;
91
92         /*
93          * Deal with alignment and buffer-boundary issues.
94          *
95          * Be careful, certain primary alignments are used below to allocate
96          * new blockmap blocks.
97          */
98         bytes = (bytes + 15) & ~15;
99         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101
102         /*
103          * Setup
104          */
105         root_volume = trans->rootvol;
106         *errorp = 0;
107         blockmap = &hmp->blockmap[zone];
108         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110
111         next_offset = blockmap->next_offset;
112 again:
113         /*
114          * Check for wrap
115          */
116         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
117                 if (++loops == 2) {
118                         result_offset = 0;
119                         *errorp = ENOSPC;
120                         goto failed;
121                 }
122                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
123         }
124
125         /*
126          * The allocation request may not cross a buffer boundary.  Special
127          * large allocations must not cross a large-block boundary.
128          */
129         tmp_offset = next_offset + bytes - 1;
130         if (bytes <= HAMMER_BUFSIZE) {
131                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
132                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
133                         goto again;
134                 }
135         } else {
136                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
137                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
138                         goto again;
139                 }
140         }
141         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
142
143         /*
144          * Dive layer 1.
145          */
146         layer1_offset = freemap->phys_offset +
147                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
148         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
149         if (*errorp) {
150                 result_offset = 0;
151                 goto failed;
152         }
153
154         /*
155          * Check CRC.
156          */
157         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
158                 hammer_lock_ex(&hmp->blkmap_lock);
159                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
160                         panic("CRC FAILED: LAYER1");
161                 hammer_unlock(&hmp->blkmap_lock);
162         }
163
164         /*
165          * If we are at a big-block boundary and layer1 indicates no 
166          * free big-blocks, then we cannot allocate a new bigblock in
167          * layer2, skip to the next layer1 entry.
168          */
169         if (offset == 0 && layer1->blocks_free == 0) {
170                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
171                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
172                 goto again;
173         }
174         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
175
176         /*
177          * Dive layer 2, each entry represents a large-block.
178          */
179         layer2_offset = layer1->phys_offset +
180                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
181         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
182         if (*errorp) {
183                 result_offset = 0;
184                 goto failed;
185         }
186
187         /*
188          * Check CRC.  This can race another thread holding the lock
189          * and in the middle of modifying layer2.
190          */
191         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
192                 hammer_lock_ex(&hmp->blkmap_lock);
193                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
194                         panic("CRC FAILED: LAYER2");
195                 hammer_unlock(&hmp->blkmap_lock);
196         }
197
198         /*
199          * Skip the layer if the zone is owned by someone other then us.
200          */
201         if (layer2->zone && layer2->zone != zone) {
202                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
203                 goto again;
204         }
205         if (offset < layer2->append_off) {
206                 next_offset += layer2->append_off - offset;
207                 goto again;
208         }
209
210         /*
211          * We need the lock from this point on.  We have to re-check zone
212          * ownership after acquiring the lock and also check for reservations.
213          */
214         hammer_lock_ex(&hmp->blkmap_lock);
215
216         if (layer2->zone && layer2->zone != zone) {
217                 hammer_unlock(&hmp->blkmap_lock);
218                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
219                 goto again;
220         }
221         if (offset < layer2->append_off) {
222                 hammer_unlock(&hmp->blkmap_lock);
223                 next_offset += layer2->append_off - offset;
224                 goto again;
225         }
226
227         /*
228          * The bigblock might be reserved by another zone.  If it is reserved
229          * by our zone we may have to move next_offset past the append_off.
230          */
231         base_off = (next_offset &
232                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
233                     HAMMER_ZONE_RAW_BUFFER;
234         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
235         if (resv) {
236                 if (resv->zone != zone) {
237                         hammer_unlock(&hmp->blkmap_lock);
238                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
239                                       ~HAMMER_LARGEBLOCK_MASK64;
240                         goto again;
241                 }
242                 if (offset < resv->append_off) {
243                         hammer_unlock(&hmp->blkmap_lock);
244                         next_offset += resv->append_off - offset;
245                         goto again;
246                 }
247         }
248
249         /*
250          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
251          * of the layer for real.  At this point we've validated any
252          * reservation that might exist and can just ignore resv.
253          */
254         if (layer2->zone == 0) {
255                 /*
256                  * Assign the bigblock to our zone
257                  */
258                 hammer_modify_buffer(trans, buffer1,
259                                      layer1, sizeof(*layer1));
260                 --layer1->blocks_free;
261                 layer1->layer1_crc = crc32(layer1,
262                                            HAMMER_LAYER1_CRCSIZE);
263                 hammer_modify_buffer_done(buffer1);
264                 hammer_modify_buffer(trans, buffer2,
265                                      layer2, sizeof(*layer2));
266                 layer2->zone = zone;
267                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
268                 KKASSERT(layer2->append_off == 0);
269                 hammer_modify_volume_field(trans, trans->rootvol,
270                                            vol0_stat_freebigblocks);
271                 --root_volume->ondisk->vol0_stat_freebigblocks;
272                 hmp->copy_stat_freebigblocks =
273                         root_volume->ondisk->vol0_stat_freebigblocks;
274                 hammer_modify_volume_done(trans->rootvol);
275         } else {
276                 hammer_modify_buffer(trans, buffer2,
277                                      layer2, sizeof(*layer2));
278         }
279         KKASSERT(layer2->zone == zone);
280
281         layer2->bytes_free -= bytes;
282         KKASSERT(layer2->append_off <= offset);
283         layer2->append_off = offset + bytes;
284         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
285         hammer_modify_buffer_done(buffer2);
286         KKASSERT(layer2->bytes_free >= 0);
287
288         if (resv) {
289                 KKASSERT(resv->append_off <= offset);
290                 resv->append_off = offset + bytes;
291                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
292         }
293
294         /*
295          * If we are allocating from the base of a new buffer we can avoid
296          * a disk read by calling hammer_bnew().
297          */
298         if ((next_offset & HAMMER_BUFMASK) == 0) {
299                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
300                                 errorp, &buffer3);
301         }
302         result_offset = next_offset;
303
304         /*
305          * Process allocated result_offset
306          */
307         hammer_modify_volume(NULL, root_volume, NULL, 0);
308         blockmap->next_offset = next_offset + bytes;
309         hammer_modify_volume_done(root_volume);
310         hammer_unlock(&hmp->blkmap_lock);
311 failed:
312
313         /*
314          * Cleanup
315          */
316         if (buffer1)
317                 hammer_rel_buffer(buffer1, 0);
318         if (buffer2)
319                 hammer_rel_buffer(buffer2, 0);
320         if (buffer3)
321                 hammer_rel_buffer(buffer3, 0);
322
323         return(result_offset);
324 }
325
326 /*
327  * Frontend function - Reserve bytes in a zone.
328  *
329  * This code reserves bytes out of a blockmap without committing to any
330  * meta-data modifications, allowing the front-end to directly issue disk
331  * write I/O for large blocks of data
332  *
333  * The backend later finalizes the reservation with hammer_blockmap_finalize()
334  * upon committing the related record.
335  */
336 hammer_reserve_t
337 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
338                         hammer_off_t *zone_offp, int *errorp)
339 {
340         hammer_volume_t root_volume;
341         hammer_blockmap_t blockmap;
342         hammer_blockmap_t freemap;
343         struct hammer_blockmap_layer1 *layer1;
344         struct hammer_blockmap_layer2 *layer2;
345         hammer_buffer_t buffer1 = NULL;
346         hammer_buffer_t buffer2 = NULL;
347         hammer_buffer_t buffer3 = NULL;
348         hammer_off_t tmp_offset;
349         hammer_off_t next_offset;
350         hammer_off_t layer1_offset;
351         hammer_off_t layer2_offset;
352         hammer_off_t base_off;
353         hammer_reserve_t resv;
354         hammer_reserve_t resx;
355         int loops = 0;
356         int offset;
357
358         /*
359          * Setup
360          */
361         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
362         root_volume = hammer_get_root_volume(hmp, errorp);
363         if (*errorp)
364                 return(NULL);
365         blockmap = &hmp->blockmap[zone];
366         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
367         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
368
369         /*
370          * Deal with alignment and buffer-boundary issues.
371          *
372          * Be careful, certain primary alignments are used below to allocate
373          * new blockmap blocks.
374          */
375         bytes = (bytes + 15) & ~15;
376         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
377
378         next_offset = blockmap->next_offset;
379 again:
380         resv = NULL;
381         /*
382          * Check for wrap
383          */
384         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
385                 if (++loops == 2) {
386                         *errorp = ENOSPC;
387                         goto failed;
388                 }
389                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
390         }
391
392         /*
393          * The allocation request may not cross a buffer boundary.  Special
394          * large allocations must not cross a large-block boundary.
395          */
396         tmp_offset = next_offset + bytes - 1;
397         if (bytes <= HAMMER_BUFSIZE) {
398                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
399                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
400                         goto again;
401                 }
402         } else {
403                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
404                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
405                         goto again;
406                 }
407         }
408         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
409
410         /*
411          * Dive layer 1.
412          */
413         layer1_offset = freemap->phys_offset +
414                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
415         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
416         if (*errorp)
417                 goto failed;
418
419         /*
420          * Check CRC.
421          */
422         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
423                 hammer_lock_ex(&hmp->blkmap_lock);
424                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
425                         panic("CRC FAILED: LAYER1");
426                 hammer_unlock(&hmp->blkmap_lock);
427         }
428
429         /*
430          * If we are at a big-block boundary and layer1 indicates no 
431          * free big-blocks, then we cannot allocate a new bigblock in
432          * layer2, skip to the next layer1 entry.
433          */
434         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
435             layer1->blocks_free == 0) {
436                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
437                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
438                 goto again;
439         }
440         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
441
442         /*
443          * Dive layer 2, each entry represents a large-block.
444          */
445         layer2_offset = layer1->phys_offset +
446                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
447         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
448         if (*errorp)
449                 goto failed;
450
451         /*
452          * Check CRC if not allocating into uninitialized space (which we
453          * aren't when reserving space).
454          */
455         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
456                 hammer_lock_ex(&hmp->blkmap_lock);
457                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
458                         panic("CRC FAILED: LAYER2");
459                 hammer_unlock(&hmp->blkmap_lock);
460         }
461
462         /*
463          * Skip the layer if the zone is owned by someone other then us.
464          */
465         if (layer2->zone && layer2->zone != zone) {
466                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
467                 goto again;
468         }
469         if (offset < layer2->append_off) {
470                 next_offset += layer2->append_off - offset;
471                 goto again;
472         }
473
474         /*
475          * We need the lock from this point on.  We have to re-check zone
476          * ownership after acquiring the lock and also check for reservations.
477          */
478         hammer_lock_ex(&hmp->blkmap_lock);
479
480         if (layer2->zone && layer2->zone != zone) {
481                 hammer_unlock(&hmp->blkmap_lock);
482                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
483                 goto again;
484         }
485         if (offset < layer2->append_off) {
486                 hammer_unlock(&hmp->blkmap_lock);
487                 next_offset += layer2->append_off - offset;
488                 goto again;
489         }
490
491         /*
492          * The bigblock might be reserved by another zone.  If it is reserved
493          * by our zone we may have to move next_offset past the append_off.
494          */
495         base_off = (next_offset &
496                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
497                     HAMMER_ZONE_RAW_BUFFER;
498         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
499         if (resv) {
500                 if (resv->zone != zone) {
501                         hammer_unlock(&hmp->blkmap_lock);
502                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
503                                       ~HAMMER_LARGEBLOCK_MASK64;
504                         goto again;
505                 }
506                 if (offset < resv->append_off) {
507                         hammer_unlock(&hmp->blkmap_lock);
508                         next_offset += resv->append_off - offset;
509                         goto again;
510                 }
511                 ++resv->refs;
512                 resx = NULL;
513         } else {
514                 resx = kmalloc(sizeof(*resv), hmp->m_misc,
515                                M_WAITOK | M_ZERO | M_USE_RESERVE);
516                 resx->refs = 1;
517                 resx->zone = zone;
518                 resx->zone_offset = base_off;
519                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
520                         resx->flags |= HAMMER_RESF_LAYER2FREE;
521                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
522                 KKASSERT(resv == NULL);
523                 resv = resx;
524                 ++hammer_count_reservations;
525         }
526         resv->append_off = offset + bytes;
527
528         /*
529          * If we are not reserving a whole buffer but are at the start of
530          * a new block, call hammer_bnew() to avoid a disk read.
531          *
532          * If we are reserving a whole buffer (or more), the caller will
533          * probably use a direct read, so do nothing.
534          */
535         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
536                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
537         }
538
539         /*
540          * Adjust our iterator and alloc_offset.  The layer1 and layer2
541          * space beyond alloc_offset is uninitialized.  alloc_offset must
542          * be big-block aligned.
543          */
544         blockmap->next_offset = next_offset + bytes;
545         hammer_unlock(&hmp->blkmap_lock);
546
547 failed:
548         if (buffer1)
549                 hammer_rel_buffer(buffer1, 0);
550         if (buffer2)
551                 hammer_rel_buffer(buffer2, 0);
552         if (buffer3)
553                 hammer_rel_buffer(buffer3, 0);
554         hammer_rel_volume(root_volume, 0);
555         *zone_offp = next_offset;
556
557         return(resv);
558 }
559
560 #if 0
561 /*
562  * Backend function - undo a portion of a reservation.
563  */
564 void
565 hammer_blockmap_reserve_undo(hammer_mount_t hmp, hammer_reserve_t resv,
566                          hammer_off_t zone_offset, int bytes)
567 {
568         resv->bytes_freed += bytes;
569 }
570
571 #endif
572
573 /*
574  * Dereference a reservation structure.  Upon the final release the
575  * underlying big-block is checked and if it is entirely free we delete
576  * any related HAMMER buffers to avoid potential conflicts with future
577  * reuse of the big-block.
578  */
579 void
580 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
581 {
582         hammer_off_t base_offset;
583         int error;
584
585         KKASSERT(resv->refs > 0);
586         KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
587                  HAMMER_ZONE_RAW_BUFFER);
588
589         /*
590          * Setting append_off to the max prevents any new allocations
591          * from occuring while we are trying to dispose of the reservation,
592          * allowing us to safely delete any related HAMMER buffers.
593          *
594          * If we are unable to clean out all related HAMMER buffers we
595          * requeue the delay.
596          */
597         if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
598                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
599                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
600                 base_offset = resv->zone_offset & ~HAMMER_ZONE_RAW_BUFFER;
601                 base_offset = HAMMER_ZONE_ENCODE(base_offset, resv->zone);
602                 error = hammer_del_buffers(hmp, base_offset,
603                                            resv->zone_offset,
604                                            HAMMER_LARGEBLOCK_SIZE,
605                                            0);
606                 if (error)
607                         hammer_reserve_setdelay(hmp, resv);
608         }
609         if (--resv->refs == 0) {
610                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
611                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
612                 kfree(resv, hmp->m_misc);
613                 --hammer_count_reservations;
614         }
615 }
616
617 /*
618  * Prevent a potentially free big-block from being reused until after
619  * the related flushes have completely cycled, otherwise crash recovery
620  * could resurrect a data block that was already reused and overwritten.
621  *
622  * Return 0 if the layer2 entry is still completely free after the
623  * reservation has been allocated.
624  */
625 static void
626 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
627                         struct hammer_blockmap_layer2 *layer2)
628 {
629         hammer_reserve_t resv;
630
631         /*
632          * Allocate the reservation if necessary.
633          */
634 again:
635         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
636         if (resv == NULL) {
637                 resv = kmalloc(sizeof(*resv), hmp->m_misc,
638                                M_WAITOK | M_ZERO | M_USE_RESERVE);
639                 resv->zone_offset = base_offset;
640                 resv->refs = 0;
641                 /* XXX inherent lock until refs bumped later on */
642                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
643                         resv->flags |= HAMMER_RESF_LAYER2FREE;
644                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
645                         kfree(resv, hmp->m_misc);
646                         goto again;
647                 }
648                 ++hammer_count_reservations;
649         }
650 }
651
652 /*
653  * Enter the reservation on the on-delay list, or move it if it
654  * is already on the list.
655  */
656 static void
657 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
658 {
659         if (resv->flags & HAMMER_RESF_ONDELAY) {
660                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
661                 resv->flush_group = hmp->flusher.next + 1;
662                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
663         } else {
664                 ++resv->refs;
665                 ++hmp->rsv_fromdelay;
666                 resv->flags |= HAMMER_RESF_ONDELAY;
667                 resv->flush_group = hmp->flusher.next + 1;
668                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
669         }
670 }
671
672 void
673 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
674 {
675         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
676         resv->flags &= ~HAMMER_RESF_ONDELAY;
677         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
678         --hmp->rsv_fromdelay;
679         hammer_blockmap_reserve_complete(hmp, resv);
680 }
681
682 /*
683  * Backend function - free (offset, bytes) in a zone.
684  *
685  * XXX error return
686  */
687 void
688 hammer_blockmap_free(hammer_transaction_t trans,
689                      hammer_off_t zone_offset, int bytes)
690 {
691         hammer_mount_t hmp;
692         hammer_volume_t root_volume;
693         hammer_blockmap_t blockmap;
694         hammer_blockmap_t freemap;
695         struct hammer_blockmap_layer1 *layer1;
696         struct hammer_blockmap_layer2 *layer2;
697         hammer_buffer_t buffer1 = NULL;
698         hammer_buffer_t buffer2 = NULL;
699         hammer_off_t layer1_offset;
700         hammer_off_t layer2_offset;
701         hammer_off_t base_off;
702         int error;
703         int zone;
704
705         if (bytes == 0)
706                 return;
707         hmp = trans->hmp;
708
709         /*
710          * Alignment
711          */
712         bytes = (bytes + 15) & ~15;
713         KKASSERT(bytes <= HAMMER_XBUFSIZE);
714         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
715                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
716
717         /*
718          * Basic zone validation & locking
719          */
720         zone = HAMMER_ZONE_DECODE(zone_offset);
721         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
722         root_volume = trans->rootvol;
723         error = 0;
724
725         blockmap = &hmp->blockmap[zone];
726         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
727
728         /*
729          * Dive layer 1.
730          */
731         layer1_offset = freemap->phys_offset +
732                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
733         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
734         if (error)
735                 goto failed;
736         KKASSERT(layer1->phys_offset &&
737                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
738         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
739                 hammer_lock_ex(&hmp->blkmap_lock);
740                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
741                         panic("CRC FAILED: LAYER1");
742                 hammer_unlock(&hmp->blkmap_lock);
743         }
744
745         /*
746          * Dive layer 2, each entry represents a large-block.
747          */
748         layer2_offset = layer1->phys_offset +
749                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
750         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
751         if (error)
752                 goto failed;
753         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
754                 hammer_lock_ex(&hmp->blkmap_lock);
755                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
756                         panic("CRC FAILED: LAYER2");
757                 hammer_unlock(&hmp->blkmap_lock);
758         }
759
760         hammer_lock_ex(&hmp->blkmap_lock);
761
762         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
763
764         /*
765          * Free space previously allocated via blockmap_alloc().
766          */
767         KKASSERT(layer2->zone == zone);
768         layer2->bytes_free += bytes;
769         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
770
771         /*
772          * If a big-block becomes entirely free we must create a covering
773          * reservation to prevent premature reuse.  Note, however, that
774          * the big-block and/or reservation may still have an append_off
775          * that allows further (non-reused) allocations.
776          *
777          * Once the reservation has been made we re-check layer2 and if
778          * the big-block is still entirely free we reset the layer2 entry.
779          * The reservation will prevent premature reuse.
780          *
781          * NOTE: hammer_buffer's are only invalidated when the reservation
782          * is completed, if the layer2 entry is still completely free at
783          * that time.  Any allocations from the reservation that may have
784          * occured in the mean time, or active references on the reservation
785          * from new pending allocations, will prevent the invalidation from
786          * occuring.
787          */
788         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
789                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
790
791                 hammer_reserve_setdelay_offset(hmp, base_off, layer2);
792                 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
793                         layer2->zone = 0;
794                         layer2->append_off = 0;
795                         hammer_modify_buffer(trans, buffer1,
796                                              layer1, sizeof(*layer1));
797                         ++layer1->blocks_free;
798                         layer1->layer1_crc = crc32(layer1,
799                                                    HAMMER_LAYER1_CRCSIZE);
800                         hammer_modify_buffer_done(buffer1);
801                         hammer_modify_volume_field(trans,
802                                         trans->rootvol,
803                                         vol0_stat_freebigblocks);
804                         ++root_volume->ondisk->vol0_stat_freebigblocks;
805                         hmp->copy_stat_freebigblocks =
806                            root_volume->ondisk->vol0_stat_freebigblocks;
807                         hammer_modify_volume_done(trans->rootvol);
808                 }
809         }
810         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
811         hammer_modify_buffer_done(buffer2);
812         hammer_unlock(&hmp->blkmap_lock);
813
814 failed:
815         if (buffer1)
816                 hammer_rel_buffer(buffer1, 0);
817         if (buffer2)
818                 hammer_rel_buffer(buffer2, 0);
819 }
820
821 /*
822  * Backend function - finalize (offset, bytes) in a zone.
823  *
824  * Allocate space that was previously reserved by the frontend.
825  */
826 int
827 hammer_blockmap_finalize(hammer_transaction_t trans,
828                          hammer_reserve_t resv,
829                          hammer_off_t zone_offset, int bytes)
830 {
831         hammer_mount_t hmp;
832         hammer_volume_t root_volume;
833         hammer_blockmap_t blockmap;
834         hammer_blockmap_t freemap;
835         struct hammer_blockmap_layer1 *layer1;
836         struct hammer_blockmap_layer2 *layer2;
837         hammer_buffer_t buffer1 = NULL;
838         hammer_buffer_t buffer2 = NULL;
839         hammer_off_t layer1_offset;
840         hammer_off_t layer2_offset;
841         int error;
842         int zone;
843         int offset;
844
845         if (bytes == 0)
846                 return(0);
847         hmp = trans->hmp;
848
849         /*
850          * Alignment
851          */
852         bytes = (bytes + 15) & ~15;
853         KKASSERT(bytes <= HAMMER_XBUFSIZE);
854
855         /*
856          * Basic zone validation & locking
857          */
858         zone = HAMMER_ZONE_DECODE(zone_offset);
859         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
860         root_volume = trans->rootvol;
861         error = 0;
862
863         blockmap = &hmp->blockmap[zone];
864         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
865
866         /*
867          * Dive layer 1.
868          */
869         layer1_offset = freemap->phys_offset +
870                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
871         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
872         if (error)
873                 goto failed;
874         KKASSERT(layer1->phys_offset &&
875                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
876         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
877                 hammer_lock_ex(&hmp->blkmap_lock);
878                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
879                         panic("CRC FAILED: LAYER1");
880                 hammer_unlock(&hmp->blkmap_lock);
881         }
882
883         /*
884          * Dive layer 2, each entry represents a large-block.
885          */
886         layer2_offset = layer1->phys_offset +
887                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
888         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
889         if (error)
890                 goto failed;
891         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
892                 hammer_lock_ex(&hmp->blkmap_lock);
893                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
894                         panic("CRC FAILED: LAYER2");
895                 hammer_unlock(&hmp->blkmap_lock);
896         }
897
898         hammer_lock_ex(&hmp->blkmap_lock);
899
900         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
901
902         /*
903          * Finalize some or all of the space covered by a current
904          * reservation.  An allocation in the same layer may have
905          * already assigned ownership.
906          */
907         if (layer2->zone == 0) {
908                 hammer_modify_buffer(trans, buffer1,
909                                      layer1, sizeof(*layer1));
910                 --layer1->blocks_free;
911                 layer1->layer1_crc = crc32(layer1,
912                                            HAMMER_LAYER1_CRCSIZE);
913                 hammer_modify_buffer_done(buffer1);
914                 layer2->zone = zone;
915                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
916                 KKASSERT(layer2->append_off == 0);
917                 hammer_modify_volume_field(trans,
918                                 trans->rootvol,
919                                 vol0_stat_freebigblocks);
920                 --root_volume->ondisk->vol0_stat_freebigblocks;
921                 hmp->copy_stat_freebigblocks =
922                    root_volume->ondisk->vol0_stat_freebigblocks;
923                 hammer_modify_volume_done(trans->rootvol);
924         }
925         if (layer2->zone != zone)
926                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
927         KKASSERT(layer2->zone == zone);
928         layer2->bytes_free -= bytes;
929         if (resv)
930                 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
931
932         /*
933          * Finalizations can occur out of order, or combined with allocations.
934          * append_off must be set to the highest allocated offset.
935          */
936         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
937         if (layer2->append_off < offset)
938                 layer2->append_off = offset;
939
940         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
941         hammer_modify_buffer_done(buffer2);
942         hammer_unlock(&hmp->blkmap_lock);
943
944 failed:
945         if (buffer1)
946                 hammer_rel_buffer(buffer1, 0);
947         if (buffer2)
948                 hammer_rel_buffer(buffer2, 0);
949         return(error);
950 }
951
952 /*
953  * Return the number of free bytes in the big-block containing the
954  * specified blockmap offset.
955  */
956 int
957 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
958                         int *curp, int *errorp)
959 {
960         hammer_volume_t root_volume;
961         hammer_blockmap_t blockmap;
962         hammer_blockmap_t freemap;
963         struct hammer_blockmap_layer1 *layer1;
964         struct hammer_blockmap_layer2 *layer2;
965         hammer_buffer_t buffer = NULL;
966         hammer_off_t layer1_offset;
967         hammer_off_t layer2_offset;
968         int bytes;
969         int zone;
970
971         zone = HAMMER_ZONE_DECODE(zone_offset);
972         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
973         root_volume = hammer_get_root_volume(hmp, errorp);
974         if (*errorp) {
975                 *curp = 0;
976                 return(0);
977         }
978         blockmap = &hmp->blockmap[zone];
979         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
980
981         /*
982          * Dive layer 1.
983          */
984         layer1_offset = freemap->phys_offset +
985                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
986         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
987         if (*errorp) {
988                 bytes = 0;
989                 goto failed;
990         }
991         KKASSERT(layer1->phys_offset);
992         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
993                 hammer_lock_ex(&hmp->blkmap_lock);
994                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
995                         panic("CRC FAILED: LAYER1");
996                 hammer_unlock(&hmp->blkmap_lock);
997         }
998
999         /*
1000          * Dive layer 2, each entry represents a large-block.
1001          *
1002          * (reuse buffer, layer1 pointer becomes invalid)
1003          */
1004         layer2_offset = layer1->phys_offset +
1005                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1006         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1007         if (*errorp) {
1008                 bytes = 0;
1009                 goto failed;
1010         }
1011         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1012                 hammer_lock_ex(&hmp->blkmap_lock);
1013                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1014                         panic("CRC FAILED: LAYER2");
1015                 hammer_unlock(&hmp->blkmap_lock);
1016         }
1017         KKASSERT(layer2->zone == zone);
1018
1019         bytes = layer2->bytes_free;
1020
1021         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1022                 *curp = 0;
1023         else
1024                 *curp = 1;
1025 failed:
1026         if (buffer)
1027                 hammer_rel_buffer(buffer, 0);
1028         hammer_rel_volume(root_volume, 0);
1029         if (hammer_debug_general & 0x0800) {
1030                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1031                         zone_offset, bytes);
1032         }
1033         return(bytes);
1034 }
1035
1036
1037 /*
1038  * Lookup a blockmap offset.
1039  */
1040 hammer_off_t
1041 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1042                        int *errorp)
1043 {
1044         hammer_volume_t root_volume;
1045         hammer_blockmap_t freemap;
1046         struct hammer_blockmap_layer1 *layer1;
1047         struct hammer_blockmap_layer2 *layer2;
1048         hammer_buffer_t buffer = NULL;
1049         hammer_off_t layer1_offset;
1050         hammer_off_t layer2_offset;
1051         hammer_off_t result_offset;
1052         hammer_off_t base_off;
1053         hammer_reserve_t resv;
1054         int zone;
1055
1056         /*
1057          * Calculate the zone-2 offset.
1058          */
1059         zone = HAMMER_ZONE_DECODE(zone_offset);
1060         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1061
1062         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1063                         HAMMER_ZONE_RAW_BUFFER;
1064
1065         /*
1066          * We can actually stop here, normal blockmaps are now direct-mapped
1067          * onto the freemap and so represent zone-2 addresses.
1068          */
1069         if (hammer_verify_zone == 0) {
1070                 *errorp = 0;
1071                 return(result_offset);
1072         }
1073
1074         /*
1075          * Validate the allocation zone
1076          */
1077         root_volume = hammer_get_root_volume(hmp, errorp);
1078         if (*errorp)
1079                 return(0);
1080         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1081         KKASSERT(freemap->phys_offset != 0);
1082
1083         /*
1084          * Dive layer 1.
1085          */
1086         layer1_offset = freemap->phys_offset +
1087                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1088         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1089         if (*errorp)
1090                 goto failed;
1091         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1092         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1093                 hammer_lock_ex(&hmp->blkmap_lock);
1094                 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1095                         panic("CRC FAILED: LAYER1");
1096                 hammer_unlock(&hmp->blkmap_lock);
1097         }
1098
1099         /*
1100          * Dive layer 2, each entry represents a large-block.
1101          */
1102         layer2_offset = layer1->phys_offset +
1103                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1104         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1105
1106         if (*errorp)
1107                 goto failed;
1108         if (layer2->zone == 0) {
1109                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1110                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1111                                  base_off);
1112                 KKASSERT(resv && resv->zone == zone);
1113
1114         } else if (layer2->zone != zone) {
1115                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1116                         layer2->zone, zone);
1117         }
1118         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1119                 hammer_lock_ex(&hmp->blkmap_lock);
1120                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1121                         panic("CRC FAILED: LAYER2");
1122                 hammer_unlock(&hmp->blkmap_lock);
1123         }
1124
1125 failed:
1126         if (buffer)
1127                 hammer_rel_buffer(buffer, 0);
1128         hammer_rel_volume(root_volume, 0);
1129         if (hammer_debug_general & 0x0800) {
1130                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1131                         zone_offset, result_offset);
1132         }
1133         return(result_offset);
1134 }
1135
1136
1137 /*
1138  * Check space availability
1139  */
1140 int
1141 hammer_checkspace(hammer_mount_t hmp, int slop)
1142 {
1143         const int in_size = sizeof(struct hammer_inode_data) +
1144                             sizeof(union hammer_btree_elm);
1145         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1146         int64_t usedbytes;
1147
1148         usedbytes = hmp->rsv_inodes * in_size +
1149                     hmp->rsv_recs * rec_size +
1150                     hmp->rsv_databytes +
1151                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1152                     ((int64_t)hidirtybufspace << 2) +
1153                     (slop << HAMMER_LARGEBLOCK_BITS);
1154
1155         hammer_count_extra_space_used = usedbytes;      /* debugging */
1156
1157         if (hmp->copy_stat_freebigblocks >=
1158             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1159                 return(0);
1160         }
1161         return (ENOSPC);
1162 }
1163