HAMMER 63/Many: IO Error handling features
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.26 2008/07/18 00:19:53 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static int hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
44                         hammer_off_t zone2_offset);
45
46
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51              hammer_res_rb_compare, hammer_off_t, zone_offset);
52
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56         if (res1->zone_offset < res2->zone_offset)
57                 return(-1);
58         if (res1->zone_offset > res2->zone_offset)
59                 return(1);
60         return(0);
61 }
62
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
68                       int bytes, int *errorp)
69 {
70         hammer_mount_t hmp;
71         hammer_volume_t root_volume;
72         hammer_blockmap_t blockmap;
73         hammer_blockmap_t freemap;
74         hammer_reserve_t resv;
75         struct hammer_blockmap_layer1 *layer1;
76         struct hammer_blockmap_layer2 *layer2;
77         hammer_buffer_t buffer1 = NULL;
78         hammer_buffer_t buffer2 = NULL;
79         hammer_buffer_t buffer3 = NULL;
80         hammer_off_t tmp_offset;
81         hammer_off_t next_offset;
82         hammer_off_t result_offset;
83         hammer_off_t layer1_offset;
84         hammer_off_t layer2_offset;
85         hammer_off_t base_off;
86         int loops = 0;
87         int offset;             /* offset within big-block */
88
89         hmp = trans->hmp;
90
91         /*
92          * Deal with alignment and buffer-boundary issues.
93          *
94          * Be careful, certain primary alignments are used below to allocate
95          * new blockmap blocks.
96          */
97         bytes = (bytes + 15) & ~15;
98         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
99         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
100
101         /*
102          * Setup
103          */
104         root_volume = trans->rootvol;
105         *errorp = 0;
106         blockmap = &hmp->blockmap[zone];
107         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
108         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
109
110         next_offset = blockmap->next_offset;
111 again:
112         /*
113          * Check for wrap
114          */
115         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
116                 if (++loops == 2) {
117                         result_offset = 0;
118                         *errorp = ENOSPC;
119                         goto failed;
120                 }
121                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
122         }
123
124         /*
125          * The allocation request may not cross a buffer boundary.  Special
126          * large allocations must not cross a large-block boundary.
127          */
128         tmp_offset = next_offset + bytes - 1;
129         if (bytes <= HAMMER_BUFSIZE) {
130                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
131                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
132                         goto again;
133                 }
134         } else {
135                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
136                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
137                         goto again;
138                 }
139         }
140         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
141
142         /*
143          * Dive layer 1.
144          */
145         layer1_offset = freemap->phys_offset +
146                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
147         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
148         if (*errorp) {
149                 result_offset = 0;
150                 goto failed;
151         }
152
153         /*
154          * Check CRC.
155          */
156         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
157                 Debugger("CRC FAILED: LAYER1");
158         }
159
160         /*
161          * If we are at a big-block boundary and layer1 indicates no 
162          * free big-blocks, then we cannot allocate a new bigblock in
163          * layer2, skip to the next layer1 entry.
164          */
165         if (offset == 0 && layer1->blocks_free == 0) {
166                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
167                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
168                 goto again;
169         }
170         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
171
172         /*
173          * Dive layer 2, each entry represents a large-block.
174          */
175         layer2_offset = layer1->phys_offset +
176                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
177         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
178         if (*errorp) {
179                 result_offset = 0;
180                 goto failed;
181         }
182
183         /*
184          * Check CRC.
185          */
186         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
187                 Debugger("CRC FAILED: LAYER2");
188         }
189
190         /*
191          * Skip the layer if the zone is owned by someone other then us.
192          */
193         if (layer2->zone && layer2->zone != zone) {
194                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
195                 goto again;
196         }
197         if (offset < layer2->append_off) {
198                 next_offset += layer2->append_off - offset;
199                 goto again;
200         }
201
202         /*
203          * We need the lock from this point on.  We have to re-check zone
204          * ownership after acquiring the lock and also check for reservations.
205          */
206         hammer_lock_ex(&hmp->blkmap_lock);
207
208         if (layer2->zone && layer2->zone != zone) {
209                 hammer_unlock(&hmp->blkmap_lock);
210                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
211                 goto again;
212         }
213         if (offset < layer2->append_off) {
214                 hammer_unlock(&hmp->blkmap_lock);
215                 next_offset += layer2->append_off - offset;
216                 goto again;
217         }
218
219         /*
220          * The bigblock might be reserved by another zone.  If it is reserved
221          * by our zone we may have to move next_offset past the append_off.
222          */
223         base_off = (next_offset &
224                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
225                     HAMMER_ZONE_RAW_BUFFER;
226         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
227         if (resv) {
228                 if (resv->zone != zone) {
229                         hammer_unlock(&hmp->blkmap_lock);
230                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
231                                       ~HAMMER_LARGEBLOCK_MASK64;
232                         goto again;
233                 }
234                 if (offset < resv->append_off) {
235                         hammer_unlock(&hmp->blkmap_lock);
236                         next_offset += resv->append_off - offset;
237                         goto again;
238                 }
239         }
240
241         /*
242          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
243          * of the layer for real.  At this point we've validated any
244          * reservation that might exist and can just ignore resv.
245          */
246         if (layer2->zone == 0) {
247                 /*
248                  * Assign the bigblock to our zone
249                  */
250                 hammer_modify_buffer(trans, buffer1,
251                                      layer1, sizeof(*layer1));
252                 --layer1->blocks_free;
253                 layer1->layer1_crc = crc32(layer1,
254                                            HAMMER_LAYER1_CRCSIZE);
255                 hammer_modify_buffer_done(buffer1);
256                 hammer_modify_buffer(trans, buffer2,
257                                      layer2, sizeof(*layer2));
258                 layer2->zone = zone;
259                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
260                 KKASSERT(layer2->append_off == 0);
261                 hammer_modify_volume_field(trans, trans->rootvol,
262                                            vol0_stat_freebigblocks);
263                 --root_volume->ondisk->vol0_stat_freebigblocks;
264                 hmp->copy_stat_freebigblocks =
265                         root_volume->ondisk->vol0_stat_freebigblocks;
266                 hammer_modify_volume_done(trans->rootvol);
267         } else {
268                 hammer_modify_buffer(trans, buffer2,
269                                      layer2, sizeof(*layer2));
270         }
271         KKASSERT(layer2->zone == zone);
272
273         layer2->bytes_free -= bytes;
274         KKASSERT(layer2->append_off <= offset);
275         layer2->append_off = offset + bytes;
276         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
277         hammer_modify_buffer_done(buffer2);
278         KKASSERT(layer2->bytes_free >= 0);
279
280         if (resv) {
281                 KKASSERT(resv->append_off <= offset);
282                 resv->append_off = offset + bytes;
283         }
284
285         /*
286          * If we are allocating from the base of a new buffer we can avoid
287          * a disk read by calling hammer_bnew().
288          */
289         if ((next_offset & HAMMER_BUFMASK) == 0) {
290                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
291                                 errorp, &buffer3);
292         }
293         result_offset = next_offset;
294
295         /*
296          * Process allocated result_offset
297          */
298         hammer_modify_volume(NULL, root_volume, NULL, 0);
299         blockmap->next_offset = next_offset + bytes;
300         hammer_modify_volume_done(root_volume);
301         hammer_unlock(&hmp->blkmap_lock);
302 failed:
303
304         /*
305          * Cleanup
306          */
307         if (buffer1)
308                 hammer_rel_buffer(buffer1, 0);
309         if (buffer2)
310                 hammer_rel_buffer(buffer2, 0);
311         if (buffer3)
312                 hammer_rel_buffer(buffer3, 0);
313
314         return(result_offset);
315 }
316
317 /*
318  * Frontend function - Reserve bytes in a zone.
319  *
320  * This code reserves bytes out of a blockmap without committing to any
321  * meta-data modifications, allowing the front-end to directly issue disk
322  * write I/O for large blocks of data
323  *
324  * The backend later finalizes the reservation with hammer_blockmap_finalize()
325  * upon committing the related record.
326  */
327 hammer_reserve_t
328 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
329                         hammer_off_t *zone_offp, int *errorp)
330 {
331         hammer_volume_t root_volume;
332         hammer_blockmap_t blockmap;
333         hammer_blockmap_t freemap;
334         struct hammer_blockmap_layer1 *layer1;
335         struct hammer_blockmap_layer2 *layer2;
336         hammer_buffer_t buffer1 = NULL;
337         hammer_buffer_t buffer2 = NULL;
338         hammer_buffer_t buffer3 = NULL;
339         hammer_off_t tmp_offset;
340         hammer_off_t next_offset;
341         hammer_off_t layer1_offset;
342         hammer_off_t layer2_offset;
343         hammer_off_t base_off;
344         hammer_reserve_t resv;
345         hammer_reserve_t resx;
346         int loops = 0;
347         int offset;
348
349         /*
350          * Setup
351          */
352         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
353         root_volume = hammer_get_root_volume(hmp, errorp);
354         if (*errorp)
355                 return(NULL);
356         blockmap = &hmp->blockmap[zone];
357         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
358         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
359
360         /*
361          * Deal with alignment and buffer-boundary issues.
362          *
363          * Be careful, certain primary alignments are used below to allocate
364          * new blockmap blocks.
365          */
366         bytes = (bytes + 15) & ~15;
367         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
368
369         next_offset = blockmap->next_offset;
370 again:
371         resv = NULL;
372         /*
373          * Check for wrap
374          */
375         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
376                 if (++loops == 2) {
377                         *errorp = ENOSPC;
378                         goto failed;
379                 }
380                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
381         }
382
383         /*
384          * The allocation request may not cross a buffer boundary.  Special
385          * large allocations must not cross a large-block boundary.
386          */
387         tmp_offset = next_offset + bytes - 1;
388         if (bytes <= HAMMER_BUFSIZE) {
389                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
390                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
391                         goto again;
392                 }
393         } else {
394                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
395                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
396                         goto again;
397                 }
398         }
399         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
400
401         /*
402          * Dive layer 1.
403          */
404         layer1_offset = freemap->phys_offset +
405                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
406         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
407         if (*errorp)
408                 goto failed;
409
410         /*
411          * Check CRC.
412          */
413         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
414                 Debugger("CRC FAILED: LAYER1");
415         }
416
417         /*
418          * If we are at a big-block boundary and layer1 indicates no 
419          * free big-blocks, then we cannot allocate a new bigblock in
420          * layer2, skip to the next layer1 entry.
421          */
422         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
423             layer1->blocks_free == 0) {
424                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
425                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
426                 goto again;
427         }
428         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
429
430         /*
431          * Dive layer 2, each entry represents a large-block.
432          */
433         layer2_offset = layer1->phys_offset +
434                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
435         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
436         if (*errorp)
437                 goto failed;
438
439         /*
440          * Check CRC if not allocating into uninitialized space (which we
441          * aren't when reserving space).
442          */
443         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
444                 Debugger("CRC FAILED: LAYER2");
445         }
446
447         /*
448          * Skip the layer if the zone is owned by someone other then us.
449          */
450         if (layer2->zone && layer2->zone != zone) {
451                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
452                 goto again;
453         }
454         if (offset < layer2->append_off) {
455                 next_offset += layer2->append_off - offset;
456                 goto again;
457         }
458
459         /*
460          * We need the lock from this point on.  We have to re-check zone
461          * ownership after acquiring the lock and also check for reservations.
462          */
463         hammer_lock_ex(&hmp->blkmap_lock);
464
465         if (layer2->zone && layer2->zone != zone) {
466                 hammer_unlock(&hmp->blkmap_lock);
467                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
468                 goto again;
469         }
470         if (offset < layer2->append_off) {
471                 hammer_unlock(&hmp->blkmap_lock);
472                 next_offset += layer2->append_off - offset;
473                 goto again;
474         }
475
476         /*
477          * The bigblock might be reserved by another zone.  If it is reserved
478          * by our zone we may have to move next_offset past the append_off.
479          */
480         base_off = (next_offset &
481                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
482                     HAMMER_ZONE_RAW_BUFFER;
483         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
484         if (resv) {
485                 if (resv->zone != zone) {
486                         hammer_unlock(&hmp->blkmap_lock);
487                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
488                                       ~HAMMER_LARGEBLOCK_MASK64;
489                         goto again;
490                 }
491                 if (offset < resv->append_off) {
492                         hammer_unlock(&hmp->blkmap_lock);
493                         next_offset += resv->append_off - offset;
494                         goto again;
495                 }
496                 ++resv->refs;
497                 resx = NULL;
498         } else {
499                 resx = kmalloc(sizeof(*resv), M_HAMMER,
500                                M_WAITOK | M_ZERO | M_USE_RESERVE);
501                 resx->refs = 1;
502                 resx->zone = zone;
503                 resx->zone_offset = base_off;
504                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
505                 KKASSERT(resv == NULL);
506                 resv = resx;
507                 ++hammer_count_reservations;
508         }
509         resv->append_off = offset + bytes;
510
511         /*
512          * If we are not reserving a whole buffer but are at the start of
513          * a new block, call hammer_bnew() to avoid a disk read.
514          *
515          * If we are reserving a whole buffer (or more), the caller will
516          * probably use a direct read, so do nothing.
517          */
518         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
519                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
520         }
521
522         /*
523          * Adjust our iterator and alloc_offset.  The layer1 and layer2
524          * space beyond alloc_offset is uninitialized.  alloc_offset must
525          * be big-block aligned.
526          */
527         blockmap->next_offset = next_offset + bytes;
528         hammer_unlock(&hmp->blkmap_lock);
529
530 failed:
531         if (buffer1)
532                 hammer_rel_buffer(buffer1, 0);
533         if (buffer2)
534                 hammer_rel_buffer(buffer2, 0);
535         if (buffer3)
536                 hammer_rel_buffer(buffer3, 0);
537         hammer_rel_volume(root_volume, 0);
538         *zone_offp = next_offset;
539
540         return(resv);
541 }
542
543 /*
544  * Backend function - undo a portion of a reservation.
545  */
546 void
547 hammer_blockmap_reserve_undo(hammer_reserve_t resv,
548                          hammer_off_t zone_offset, int bytes)
549 {
550         resv->bytes_freed += bytes;
551 }
552
553
554 /*
555  * A record with a storage reservation calls this function when it is
556  * being freed.  The storage may or may not have actually been allocated.
557  *
558  * This function removes the lock that prevented other entities from
559  * allocating out of the storage or removing the zone assignment.
560  */
561 void
562 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
563 {
564         hammer_off_t zone2_offset;
565
566         KKASSERT(resv->refs > 0);
567         if (--resv->refs == 0) {
568                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
569
570                 zone2_offset = (resv->zone_offset & ~HAMMER_OFF_ZONE_MASK) |
571                                 HAMMER_ZONE_RAW_BUFFER;
572
573                 /*
574                  * If we are releasing a zone and all of its reservations
575                  * were undone we have to clean out all hammer and device
576                  * buffers associated with the big block.
577                  *
578                  * Any direct allocations will cause this test to fail
579                  * (bytes_freed will never reach append_off), which is
580                  * the behavior we desire.  Once the zone has been assigned
581                  * to the big-block the only way to allocate from it in the
582                  * future is if the reblocker can completely clean it out,
583                  * and that will also properly call hammer_del_buffers().
584                  *
585                  * If we don't we risk all sorts of buffer cache aliasing
586                  * effects, including overlapping buffers with different
587                  * sizes.
588                  */
589                 if (resv->bytes_freed == resv->append_off) {
590                         hammer_del_buffers(hmp, resv->zone_offset,
591                                            zone2_offset,
592                                            HAMMER_LARGEBLOCK_SIZE);
593                 }
594                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
595                 kfree(resv, M_HAMMER);
596                 --hammer_count_reservations;
597         }
598 }
599
600 /*
601  * This ensures that no data reallocations will take place at the specified
602  * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
603  * preventing deleted data space, which has no UNDO, from being reallocated 
604  * too quickly.
605  */
606 static int
607 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
608                         hammer_off_t zone2_offset)
609 {
610         int error;
611
612         if (resv == NULL) {
613                 resv = kmalloc(sizeof(*resv), M_HAMMER,
614                                M_WAITOK | M_ZERO | M_USE_RESERVE);
615                 resv->refs = 1; /* ref for on-delay list */
616                 resv->zone_offset = zone2_offset;
617                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
618                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
619                         error = EAGAIN;
620                         kfree(resv, M_HAMMER);
621                 } else {
622                         error = 0;
623                         ++hammer_count_reservations;
624                 }
625         } else if (resv->flags & HAMMER_RESF_ONDELAY) {
626                 --hmp->rsv_fromdelay;
627                 resv->flags &= ~HAMMER_RESF_ONDELAY;
628                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
629                 resv->flush_group = hmp->flusher.next + 1;
630                 error = 0;
631         } else {
632                 ++resv->refs;   /* ref for on-delay list */
633                 error = 0;
634         }
635         if (error == 0) {
636                 ++hmp->rsv_fromdelay;
637                 resv->flags |= HAMMER_RESF_ONDELAY;
638                 resv->flush_group = hmp->flusher.next + 1;
639                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
640         }
641         return(error);
642 }
643
644 void
645 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
646 {
647         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
648         resv->flags &= ~HAMMER_RESF_ONDELAY;
649         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
650         --hmp->rsv_fromdelay;
651         hammer_blockmap_reserve_complete(hmp, resv);
652 }
653
654 /*
655  * Backend function - free (offset, bytes) in a zone.
656  *
657  * XXX error return
658  */
659 void
660 hammer_blockmap_free(hammer_transaction_t trans,
661                      hammer_off_t zone_offset, int bytes)
662 {
663         hammer_mount_t hmp;
664         hammer_volume_t root_volume;
665         hammer_reserve_t resv;
666         hammer_blockmap_t blockmap;
667         hammer_blockmap_t freemap;
668         struct hammer_blockmap_layer1 *layer1;
669         struct hammer_blockmap_layer2 *layer2;
670         hammer_buffer_t buffer1 = NULL;
671         hammer_buffer_t buffer2 = NULL;
672         hammer_off_t layer1_offset;
673         hammer_off_t layer2_offset;
674         hammer_off_t base_off;
675         int error;
676         int zone;
677
678         if (bytes == 0)
679                 return;
680         hmp = trans->hmp;
681
682         /*
683          * Alignment
684          */
685         bytes = (bytes + 15) & ~15;
686         KKASSERT(bytes <= HAMMER_XBUFSIZE);
687         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
688                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
689
690         /*
691          * Basic zone validation & locking
692          */
693         zone = HAMMER_ZONE_DECODE(zone_offset);
694         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
695         root_volume = trans->rootvol;
696         error = 0;
697
698         blockmap = &hmp->blockmap[zone];
699         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
700
701         /*
702          * Dive layer 1.
703          */
704         layer1_offset = freemap->phys_offset +
705                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
706         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
707         if (error)
708                 goto failed;
709         KKASSERT(layer1->phys_offset &&
710                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
711         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
712                 Debugger("CRC FAILED: LAYER1");
713         }
714
715         /*
716          * Dive layer 2, each entry represents a large-block.
717          */
718         layer2_offset = layer1->phys_offset +
719                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
720         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
721         if (error)
722                 goto failed;
723         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
724                 Debugger("CRC FAILED: LAYER2");
725         }
726
727         hammer_lock_ex(&hmp->blkmap_lock);
728
729         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
730
731         /*
732          * Freeing previously allocated space
733          */
734         KKASSERT(layer2->zone == zone);
735         layer2->bytes_free += bytes;
736         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
737         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
738                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
739 again:
740                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
741                                  base_off);
742                 if (resv) {
743                         /*
744                          * Portions of this block have been reserved, do
745                          * not free it.
746                          *
747                          * Make sure the reservation remains through
748                          * the next flush cycle so potentially undoable
749                          * data is not overwritten.
750                          */
751                         KKASSERT(resv->zone == zone);
752                         hammer_reserve_setdelay(hmp, resv, base_off);
753                 } else if ((blockmap->next_offset ^ zone_offset) &
754                             ~HAMMER_LARGEBLOCK_MASK64) {
755                         /*
756                          * Our iterator is not in the now-free big-block
757                          * and we can release it.
758                          *
759                          * Make sure the reservation remains through
760                          * the next flush cycle so potentially undoable
761                          * data is not overwritten.
762                          */
763                         if (hammer_reserve_setdelay(hmp, NULL, base_off))
764                                 goto again;
765                         KKASSERT(layer2->zone == zone);
766                         /*
767                          * XXX maybe incorporate this del call in the
768                          * release code by setting base_offset, bytes_freed,
769                          * etc.
770                          */
771                         hammer_del_buffers(hmp,
772                                            zone_offset &
773                                               ~HAMMER_LARGEBLOCK_MASK64,
774                                            base_off,
775                                            HAMMER_LARGEBLOCK_SIZE);
776                         layer2->zone = 0;
777                         layer2->append_off = 0;
778                         hammer_modify_buffer(trans, buffer1,
779                                              layer1, sizeof(*layer1));
780                         ++layer1->blocks_free;
781                         layer1->layer1_crc = crc32(layer1,
782                                                    HAMMER_LAYER1_CRCSIZE);
783                         hammer_modify_buffer_done(buffer1);
784                         hammer_modify_volume_field(trans,
785                                         trans->rootvol,
786                                         vol0_stat_freebigblocks);
787                         ++root_volume->ondisk->vol0_stat_freebigblocks;
788                         hmp->copy_stat_freebigblocks =
789                            root_volume->ondisk->vol0_stat_freebigblocks;
790                         hammer_modify_volume_done(trans->rootvol);
791                 }
792         }
793
794         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
795         hammer_modify_buffer_done(buffer2);
796         hammer_unlock(&hmp->blkmap_lock);
797
798 failed:
799         if (buffer1)
800                 hammer_rel_buffer(buffer1, 0);
801         if (buffer2)
802                 hammer_rel_buffer(buffer2, 0);
803 }
804
805 /*
806  * Backend function - finalize (offset, bytes) in a zone.
807  *
808  * Allocate space that was previously reserved by the frontend.
809  */
810 int
811 hammer_blockmap_finalize(hammer_transaction_t trans,
812                          hammer_off_t zone_offset, int bytes)
813 {
814         hammer_mount_t hmp;
815         hammer_volume_t root_volume;
816         hammer_blockmap_t blockmap;
817         hammer_blockmap_t freemap;
818         struct hammer_blockmap_layer1 *layer1;
819         struct hammer_blockmap_layer2 *layer2;
820         hammer_buffer_t buffer1 = NULL;
821         hammer_buffer_t buffer2 = NULL;
822         hammer_off_t layer1_offset;
823         hammer_off_t layer2_offset;
824         int error;
825         int zone;
826         int offset;
827
828         if (bytes == 0)
829                 return(0);
830         hmp = trans->hmp;
831
832         /*
833          * Alignment
834          */
835         bytes = (bytes + 15) & ~15;
836         KKASSERT(bytes <= HAMMER_XBUFSIZE);
837
838         /*
839          * Basic zone validation & locking
840          */
841         zone = HAMMER_ZONE_DECODE(zone_offset);
842         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
843         root_volume = trans->rootvol;
844         error = 0;
845
846         blockmap = &hmp->blockmap[zone];
847         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
848
849         /*
850          * Dive layer 1.
851          */
852         layer1_offset = freemap->phys_offset +
853                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
854         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
855         if (error)
856                 goto failed;
857         KKASSERT(layer1->phys_offset &&
858                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
859         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
860                 Debugger("CRC FAILED: LAYER1");
861         }
862
863         /*
864          * Dive layer 2, each entry represents a large-block.
865          */
866         layer2_offset = layer1->phys_offset +
867                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
868         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
869         if (error)
870                 goto failed;
871         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
872                 Debugger("CRC FAILED: LAYER2");
873         }
874
875         hammer_lock_ex(&hmp->blkmap_lock);
876
877         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
878
879         /*
880          * Finalize some or all of the space covered by a current
881          * reservation.  An allocation in the same layer may have
882          * already assigned ownership.
883          */
884         if (layer2->zone == 0) {
885                 hammer_modify_buffer(trans, buffer1,
886                                      layer1, sizeof(*layer1));
887                 --layer1->blocks_free;
888                 layer1->layer1_crc = crc32(layer1,
889                                            HAMMER_LAYER1_CRCSIZE);
890                 hammer_modify_buffer_done(buffer1);
891                 layer2->zone = zone;
892                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
893                 KKASSERT(layer2->append_off == 0);
894                 hammer_modify_volume_field(trans,
895                                 trans->rootvol,
896                                 vol0_stat_freebigblocks);
897                 --root_volume->ondisk->vol0_stat_freebigblocks;
898                 hmp->copy_stat_freebigblocks =
899                    root_volume->ondisk->vol0_stat_freebigblocks;
900                 hammer_modify_volume_done(trans->rootvol);
901         }
902         if (layer2->zone != zone)
903                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
904         KKASSERT(layer2->zone == zone);
905         layer2->bytes_free -= bytes;
906
907         /*
908          * Finalizations can occur out of order, or combined with allocations.
909          * append_off must be set to the highest allocated offset.
910          */
911         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
912         if (layer2->append_off < offset)
913                 layer2->append_off = offset;
914
915         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
916         hammer_modify_buffer_done(buffer2);
917         hammer_unlock(&hmp->blkmap_lock);
918
919 failed:
920         if (buffer1)
921                 hammer_rel_buffer(buffer1, 0);
922         if (buffer2)
923                 hammer_rel_buffer(buffer2, 0);
924         return(error);
925 }
926
927 /*
928  * Return the number of free bytes in the big-block containing the
929  * specified blockmap offset.
930  */
931 int
932 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
933                         int *curp, int *errorp)
934 {
935         hammer_volume_t root_volume;
936         hammer_blockmap_t blockmap;
937         hammer_blockmap_t freemap;
938         struct hammer_blockmap_layer1 *layer1;
939         struct hammer_blockmap_layer2 *layer2;
940         hammer_buffer_t buffer = NULL;
941         hammer_off_t layer1_offset;
942         hammer_off_t layer2_offset;
943         int bytes;
944         int zone;
945
946         zone = HAMMER_ZONE_DECODE(zone_offset);
947         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
948         root_volume = hammer_get_root_volume(hmp, errorp);
949         if (*errorp) {
950                 *curp = 0;
951                 return(0);
952         }
953         blockmap = &hmp->blockmap[zone];
954         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
955
956         /*
957          * Dive layer 1.
958          */
959         layer1_offset = freemap->phys_offset +
960                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
961         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
962         if (*errorp) {
963                 bytes = 0;
964                 goto failed;
965         }
966         KKASSERT(layer1->phys_offset);
967         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
968                 Debugger("CRC FAILED: LAYER1");
969         }
970
971         /*
972          * Dive layer 2, each entry represents a large-block.
973          *
974          * (reuse buffer, layer1 pointer becomes invalid)
975          */
976         layer2_offset = layer1->phys_offset +
977                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
978         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
979         if (*errorp) {
980                 bytes = 0;
981                 goto failed;
982         }
983         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
984                 Debugger("CRC FAILED: LAYER2");
985         }
986         KKASSERT(layer2->zone == zone);
987
988         bytes = layer2->bytes_free;
989
990         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
991                 *curp = 0;
992         else
993                 *curp = 1;
994 failed:
995         if (buffer)
996                 hammer_rel_buffer(buffer, 0);
997         hammer_rel_volume(root_volume, 0);
998         if (hammer_debug_general & 0x0800) {
999                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1000                         zone_offset, bytes);
1001         }
1002         return(bytes);
1003 }
1004
1005
1006 /*
1007  * Lookup a blockmap offset.
1008  */
1009 hammer_off_t
1010 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1011                        int *errorp)
1012 {
1013         hammer_volume_t root_volume;
1014         hammer_blockmap_t freemap;
1015         struct hammer_blockmap_layer1 *layer1;
1016         struct hammer_blockmap_layer2 *layer2;
1017         hammer_buffer_t buffer = NULL;
1018         hammer_off_t layer1_offset;
1019         hammer_off_t layer2_offset;
1020         hammer_off_t result_offset;
1021         hammer_off_t base_off;
1022         hammer_reserve_t resv;
1023         int zone;
1024
1025         /*
1026          * Calculate the zone-2 offset.
1027          */
1028         zone = HAMMER_ZONE_DECODE(zone_offset);
1029         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1030
1031         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1032                         HAMMER_ZONE_RAW_BUFFER;
1033
1034         /*
1035          * We can actually stop here, normal blockmaps are now direct-mapped
1036          * onto the freemap and so represent zone-2 addresses.
1037          */
1038         if (hammer_verify_zone == 0) {
1039                 *errorp = 0;
1040                 return(result_offset);
1041         }
1042
1043         /*
1044          * Validate the allocation zone
1045          */
1046         root_volume = hammer_get_root_volume(hmp, errorp);
1047         if (*errorp)
1048                 return(0);
1049         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1050         KKASSERT(freemap->phys_offset != 0);
1051
1052         /*
1053          * Dive layer 1.
1054          */
1055         layer1_offset = freemap->phys_offset +
1056                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1057         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1058         if (*errorp)
1059                 goto failed;
1060         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1061         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1062                 Debugger("CRC FAILED: LAYER1");
1063         }
1064
1065         /*
1066          * Dive layer 2, each entry represents a large-block.
1067          */
1068         layer2_offset = layer1->phys_offset +
1069                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1070         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1071
1072         if (*errorp)
1073                 goto failed;
1074         if (layer2->zone == 0) {
1075                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1076                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1077                                  base_off);
1078                 KKASSERT(resv && resv->zone == zone);
1079
1080         } else if (layer2->zone != zone) {
1081                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1082                         layer2->zone, zone);
1083         }
1084         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1085                 Debugger("CRC FAILED: LAYER2");
1086         }
1087
1088 failed:
1089         if (buffer)
1090                 hammer_rel_buffer(buffer, 0);
1091         hammer_rel_volume(root_volume, 0);
1092         if (hammer_debug_general & 0x0800) {
1093                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1094                         zone_offset, result_offset);
1095         }
1096         return(result_offset);
1097 }
1098
1099
1100 /*
1101  * Check space availability
1102  */
1103 int
1104 hammer_checkspace(hammer_mount_t hmp, int slop)
1105 {
1106         const int in_size = sizeof(struct hammer_inode_data) +
1107                             sizeof(union hammer_btree_elm);
1108         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1109         int64_t usedbytes;
1110
1111         usedbytes = hmp->rsv_inodes * in_size +
1112                     hmp->rsv_recs * rec_size +
1113                     hmp->rsv_databytes +
1114                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1115                     ((int64_t)hidirtybufspace << 2) +
1116                     (slop << HAMMER_LARGEBLOCK_BITS);
1117
1118         hammer_count_extra_space_used = usedbytes;      /* debugging */
1119
1120         if (hmp->copy_stat_freebigblocks >=
1121             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1122                 return(0);
1123         }
1124         return (ENOSPC);
1125 }
1126