Add KQUEUE support to HAMMER.
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static int hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
44                         hammer_off_t zone2_offset);
45
46
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51              hammer_res_rb_compare, hammer_off_t, zone_offset);
52
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56         if (res1->zone_offset < res2->zone_offset)
57                 return(-1);
58         if (res1->zone_offset > res2->zone_offset)
59                 return(1);
60         return(0);
61 }
62
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
68                       int bytes, int *errorp)
69 {
70         hammer_mount_t hmp;
71         hammer_volume_t root_volume;
72         hammer_blockmap_t blockmap;
73         hammer_blockmap_t freemap;
74         hammer_reserve_t resv;
75         struct hammer_blockmap_layer1 *layer1;
76         struct hammer_blockmap_layer2 *layer2;
77         hammer_buffer_t buffer1 = NULL;
78         hammer_buffer_t buffer2 = NULL;
79         hammer_buffer_t buffer3 = NULL;
80         hammer_off_t tmp_offset;
81         hammer_off_t next_offset;
82         hammer_off_t result_offset;
83         hammer_off_t layer1_offset;
84         hammer_off_t layer2_offset;
85         hammer_off_t base_off;
86         int loops = 0;
87         int offset;             /* offset within big-block */
88
89         hmp = trans->hmp;
90
91         /*
92          * Deal with alignment and buffer-boundary issues.
93          *
94          * Be careful, certain primary alignments are used below to allocate
95          * new blockmap blocks.
96          */
97         bytes = (bytes + 15) & ~15;
98         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
99         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
100
101         /*
102          * Setup
103          */
104         root_volume = trans->rootvol;
105         *errorp = 0;
106         blockmap = &hmp->blockmap[zone];
107         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
108         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
109
110         next_offset = blockmap->next_offset;
111 again:
112         /*
113          * Check for wrap
114          */
115         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
116                 if (++loops == 2) {
117                         result_offset = 0;
118                         *errorp = ENOSPC;
119                         goto failed;
120                 }
121                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
122         }
123
124         /*
125          * The allocation request may not cross a buffer boundary.  Special
126          * large allocations must not cross a large-block boundary.
127          */
128         tmp_offset = next_offset + bytes - 1;
129         if (bytes <= HAMMER_BUFSIZE) {
130                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
131                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
132                         goto again;
133                 }
134         } else {
135                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
136                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
137                         goto again;
138                 }
139         }
140         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
141
142         /*
143          * Dive layer 1.
144          */
145         layer1_offset = freemap->phys_offset +
146                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
147         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
148         if (*errorp) {
149                 result_offset = 0;
150                 goto failed;
151         }
152
153         /*
154          * Check CRC.
155          */
156         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
157                 Debugger("CRC FAILED: LAYER1");
158         }
159
160         /*
161          * If we are at a big-block boundary and layer1 indicates no 
162          * free big-blocks, then we cannot allocate a new bigblock in
163          * layer2, skip to the next layer1 entry.
164          */
165         if (offset == 0 && layer1->blocks_free == 0) {
166                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
167                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
168                 goto again;
169         }
170         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
171
172         /*
173          * Dive layer 2, each entry represents a large-block.
174          */
175         layer2_offset = layer1->phys_offset +
176                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
177         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
178         if (*errorp) {
179                 result_offset = 0;
180                 goto failed;
181         }
182
183         /*
184          * Check CRC.
185          */
186         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
187                 Debugger("CRC FAILED: LAYER2");
188         }
189
190         /*
191          * Skip the layer if the zone is owned by someone other then us.
192          */
193         if (layer2->zone && layer2->zone != zone) {
194                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
195                 goto again;
196         }
197         if (offset < layer2->append_off) {
198                 next_offset += layer2->append_off - offset;
199                 goto again;
200         }
201
202         /*
203          * We need the lock from this point on.  We have to re-check zone
204          * ownership after acquiring the lock and also check for reservations.
205          */
206         hammer_lock_ex(&hmp->blkmap_lock);
207
208         if (layer2->zone && layer2->zone != zone) {
209                 hammer_unlock(&hmp->blkmap_lock);
210                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
211                 goto again;
212         }
213         if (offset < layer2->append_off) {
214                 hammer_unlock(&hmp->blkmap_lock);
215                 next_offset += layer2->append_off - offset;
216                 goto again;
217         }
218
219         /*
220          * The bigblock might be reserved by another zone.  If it is reserved
221          * by our zone we may have to move next_offset past the append_off.
222          */
223         base_off = (next_offset &
224                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
225                     HAMMER_ZONE_RAW_BUFFER;
226         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
227         if (resv) {
228                 if (resv->zone != zone) {
229                         hammer_unlock(&hmp->blkmap_lock);
230                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
231                                       ~HAMMER_LARGEBLOCK_MASK64;
232                         goto again;
233                 }
234                 if (offset < resv->append_off) {
235                         hammer_unlock(&hmp->blkmap_lock);
236                         next_offset += resv->append_off - offset;
237                         goto again;
238                 }
239         }
240
241         /*
242          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
243          * of the layer for real.  At this point we've validated any
244          * reservation that might exist and can just ignore resv.
245          */
246         if (layer2->zone == 0) {
247                 /*
248                  * Assign the bigblock to our zone
249                  */
250                 hammer_modify_buffer(trans, buffer1,
251                                      layer1, sizeof(*layer1));
252                 --layer1->blocks_free;
253                 layer1->layer1_crc = crc32(layer1,
254                                            HAMMER_LAYER1_CRCSIZE);
255                 hammer_modify_buffer_done(buffer1);
256                 hammer_modify_buffer(trans, buffer2,
257                                      layer2, sizeof(*layer2));
258                 layer2->zone = zone;
259                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
260                 KKASSERT(layer2->append_off == 0);
261                 hammer_modify_volume_field(trans, trans->rootvol,
262                                            vol0_stat_freebigblocks);
263                 --root_volume->ondisk->vol0_stat_freebigblocks;
264                 hmp->copy_stat_freebigblocks =
265                         root_volume->ondisk->vol0_stat_freebigblocks;
266                 hammer_modify_volume_done(trans->rootvol);
267         } else {
268                 hammer_modify_buffer(trans, buffer2,
269                                      layer2, sizeof(*layer2));
270         }
271         KKASSERT(layer2->zone == zone);
272
273         layer2->bytes_free -= bytes;
274         KKASSERT(layer2->append_off <= offset);
275         layer2->append_off = offset + bytes;
276         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
277         hammer_modify_buffer_done(buffer2);
278         KKASSERT(layer2->bytes_free >= 0);
279
280         if (resv) {
281                 KKASSERT(resv->append_off <= offset);
282                 resv->append_off = offset + bytes;
283         }
284
285         /*
286          * If we are allocating from the base of a new buffer we can avoid
287          * a disk read by calling hammer_bnew().
288          */
289         if ((next_offset & HAMMER_BUFMASK) == 0) {
290                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
291                                 errorp, &buffer3);
292         }
293         result_offset = next_offset;
294
295         /*
296          * Process allocated result_offset
297          */
298         hammer_modify_volume(NULL, root_volume, NULL, 0);
299         blockmap->next_offset = next_offset + bytes;
300         hammer_modify_volume_done(root_volume);
301         hammer_unlock(&hmp->blkmap_lock);
302 failed:
303
304         /*
305          * Cleanup
306          */
307         if (buffer1)
308                 hammer_rel_buffer(buffer1, 0);
309         if (buffer2)
310                 hammer_rel_buffer(buffer2, 0);
311         if (buffer3)
312                 hammer_rel_buffer(buffer3, 0);
313
314         return(result_offset);
315 }
316
317 /*
318  * Frontend function - Reserve bytes in a zone.
319  *
320  * This code reserves bytes out of a blockmap without committing to any
321  * meta-data modifications, allowing the front-end to directly issue disk
322  * write I/O for large blocks of data
323  *
324  * The backend later finalizes the reservation with hammer_blockmap_finalize()
325  * upon committing the related record.
326  */
327 hammer_reserve_t
328 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
329                         hammer_off_t *zone_offp, int *errorp)
330 {
331         hammer_volume_t root_volume;
332         hammer_blockmap_t blockmap;
333         hammer_blockmap_t freemap;
334         struct hammer_blockmap_layer1 *layer1;
335         struct hammer_blockmap_layer2 *layer2;
336         hammer_buffer_t buffer1 = NULL;
337         hammer_buffer_t buffer2 = NULL;
338         hammer_buffer_t buffer3 = NULL;
339         hammer_off_t tmp_offset;
340         hammer_off_t next_offset;
341         hammer_off_t layer1_offset;
342         hammer_off_t layer2_offset;
343         hammer_off_t base_off;
344         hammer_reserve_t resv;
345         hammer_reserve_t resx;
346         int loops = 0;
347         int offset;
348
349         /*
350          * Setup
351          */
352         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
353         root_volume = hammer_get_root_volume(hmp, errorp);
354         if (*errorp)
355                 return(NULL);
356         blockmap = &hmp->blockmap[zone];
357         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
358         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
359
360         /*
361          * Deal with alignment and buffer-boundary issues.
362          *
363          * Be careful, certain primary alignments are used below to allocate
364          * new blockmap blocks.
365          */
366         bytes = (bytes + 15) & ~15;
367         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
368
369         next_offset = blockmap->next_offset;
370 again:
371         resv = NULL;
372         /*
373          * Check for wrap
374          */
375         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
376                 if (++loops == 2) {
377                         *errorp = ENOSPC;
378                         goto failed;
379                 }
380                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
381         }
382
383         /*
384          * The allocation request may not cross a buffer boundary.  Special
385          * large allocations must not cross a large-block boundary.
386          */
387         tmp_offset = next_offset + bytes - 1;
388         if (bytes <= HAMMER_BUFSIZE) {
389                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
390                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
391                         goto again;
392                 }
393         } else {
394                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
395                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
396                         goto again;
397                 }
398         }
399         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
400
401         /*
402          * Dive layer 1.
403          */
404         layer1_offset = freemap->phys_offset +
405                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
406         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
407         if (*errorp)
408                 goto failed;
409
410         /*
411          * Check CRC.
412          */
413         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
414                 Debugger("CRC FAILED: LAYER1");
415         }
416
417         /*
418          * If we are at a big-block boundary and layer1 indicates no 
419          * free big-blocks, then we cannot allocate a new bigblock in
420          * layer2, skip to the next layer1 entry.
421          */
422         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
423             layer1->blocks_free == 0) {
424                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
425                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
426                 goto again;
427         }
428         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
429
430         /*
431          * Dive layer 2, each entry represents a large-block.
432          */
433         layer2_offset = layer1->phys_offset +
434                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
435         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
436         if (*errorp)
437                 goto failed;
438
439         /*
440          * Check CRC if not allocating into uninitialized space (which we
441          * aren't when reserving space).
442          */
443         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
444                 Debugger("CRC FAILED: LAYER2");
445         }
446
447         /*
448          * Skip the layer if the zone is owned by someone other then us.
449          */
450         if (layer2->zone && layer2->zone != zone) {
451                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
452                 goto again;
453         }
454         if (offset < layer2->append_off) {
455                 next_offset += layer2->append_off - offset;
456                 goto again;
457         }
458
459         /*
460          * We need the lock from this point on.  We have to re-check zone
461          * ownership after acquiring the lock and also check for reservations.
462          */
463         hammer_lock_ex(&hmp->blkmap_lock);
464
465         if (layer2->zone && layer2->zone != zone) {
466                 hammer_unlock(&hmp->blkmap_lock);
467                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
468                 goto again;
469         }
470         if (offset < layer2->append_off) {
471                 hammer_unlock(&hmp->blkmap_lock);
472                 next_offset += layer2->append_off - offset;
473                 goto again;
474         }
475
476         /*
477          * The bigblock might be reserved by another zone.  If it is reserved
478          * by our zone we may have to move next_offset past the append_off.
479          */
480         base_off = (next_offset &
481                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
482                     HAMMER_ZONE_RAW_BUFFER;
483         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
484         if (resv) {
485                 if (resv->zone != zone) {
486                         hammer_unlock(&hmp->blkmap_lock);
487                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
488                                       ~HAMMER_LARGEBLOCK_MASK64;
489                         goto again;
490                 }
491                 if (offset < resv->append_off) {
492                         hammer_unlock(&hmp->blkmap_lock);
493                         next_offset += resv->append_off - offset;
494                         goto again;
495                 }
496                 ++resv->refs;
497                 resx = NULL;
498         } else {
499                 resx = kmalloc(sizeof(*resv), M_HAMMER,
500                                M_WAITOK | M_ZERO | M_USE_RESERVE);
501                 resx->refs = 1;
502                 resx->zone = zone;
503                 resx->zone_offset = base_off;
504                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
505                 KKASSERT(resv == NULL);
506                 resv = resx;
507                 ++hammer_count_reservations;
508         }
509         resv->append_off = offset + bytes;
510
511         /*
512          * If we are not reserving a whole buffer but are at the start of
513          * a new block, call hammer_bnew() to avoid a disk read.
514          *
515          * If we are reserving a whole buffer (or more), the caller will
516          * probably use a direct read, so do nothing.
517          */
518         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
519                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
520         }
521
522         /*
523          * Adjust our iterator and alloc_offset.  The layer1 and layer2
524          * space beyond alloc_offset is uninitialized.  alloc_offset must
525          * be big-block aligned.
526          */
527         blockmap->next_offset = next_offset + bytes;
528         hammer_unlock(&hmp->blkmap_lock);
529
530 failed:
531         if (buffer1)
532                 hammer_rel_buffer(buffer1, 0);
533         if (buffer2)
534                 hammer_rel_buffer(buffer2, 0);
535         if (buffer3)
536                 hammer_rel_buffer(buffer3, 0);
537         hammer_rel_volume(root_volume, 0);
538         *zone_offp = next_offset;
539
540         return(resv);
541 }
542
543 /*
544  * Backend function - undo a portion of a reservation.
545  */
546 void
547 hammer_blockmap_reserve_undo(hammer_reserve_t resv,
548                          hammer_off_t zone_offset, int bytes)
549 {
550         resv->bytes_freed += bytes;
551 }
552
553
554 /*
555  * A record with a storage reservation calls this function when it is
556  * being freed.  The storage may or may not have actually been allocated.
557  *
558  * This function removes the lock that prevented other entities from
559  * allocating out of the storage or removing the zone assignment.
560  */
561 void
562 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
563 {
564         hammer_off_t zone2_offset;
565
566         KKASSERT(resv->refs > 0);
567         if (--resv->refs == 0) {
568                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
569
570                 zone2_offset = (resv->zone_offset & ~HAMMER_OFF_ZONE_MASK) |
571                                 HAMMER_ZONE_RAW_BUFFER;
572
573                 /*
574                  * If we are releasing a zone and all of its reservations
575                  * were undone we have to clean out all hammer and device
576                  * buffers associated with the big block.  We do this
577                  * primarily because the large-block may be reallocated
578                  * from non-large-data to large-data or vise-versa, resulting
579                  * in a different mix of 16K and 64K buffer cache buffers.
580                  * XXX - this isn't fun and needs to be redone.
581                  *
582                  * Any direct allocations will cause this test to fail
583                  * (bytes_freed will never reach append_off), which is
584                  * the behavior we desire.  Once the zone has been assigned
585                  * to the big-block the only way to allocate from it in the
586                  * future is if the reblocker can completely clean it out,
587                  * and that will also properly call hammer_del_buffers().
588                  *
589                  * If we don't we risk all sorts of buffer cache aliasing
590                  * effects, including overlapping buffers with different
591                  * sizes.
592                  */
593                 if (resv->bytes_freed == resv->append_off) {
594                         hammer_del_buffers(hmp, resv->zone_offset,
595                                            zone2_offset,
596                                            HAMMER_LARGEBLOCK_SIZE);
597                 }
598                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
599                 kfree(resv, M_HAMMER);
600                 --hammer_count_reservations;
601         }
602 }
603
604 /*
605  * This ensures that no data reallocations will take place at the specified
606  * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
607  * preventing deleted data space, which has no UNDO, from being reallocated 
608  * too quickly.
609  */
610 static int
611 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
612                         hammer_off_t zone2_offset)
613 {
614         int error;
615
616         if (resv == NULL) {
617                 resv = kmalloc(sizeof(*resv), M_HAMMER,
618                                M_WAITOK | M_ZERO | M_USE_RESERVE);
619                 resv->refs = 1; /* ref for on-delay list */
620                 resv->zone_offset = zone2_offset;
621                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
622                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
623                         error = EAGAIN;
624                         kfree(resv, M_HAMMER);
625                 } else {
626                         error = 0;
627                         ++hammer_count_reservations;
628                 }
629         } else if (resv->flags & HAMMER_RESF_ONDELAY) {
630                 --hmp->rsv_fromdelay;
631                 resv->flags &= ~HAMMER_RESF_ONDELAY;
632                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
633                 resv->flush_group = hmp->flusher.next + 1;
634                 error = 0;
635         } else {
636                 ++resv->refs;   /* ref for on-delay list */
637                 error = 0;
638         }
639         if (error == 0) {
640                 ++hmp->rsv_fromdelay;
641                 resv->flags |= HAMMER_RESF_ONDELAY;
642                 resv->flush_group = hmp->flusher.next + 1;
643                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
644         }
645         return(error);
646 }
647
648 void
649 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
650 {
651         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
652         resv->flags &= ~HAMMER_RESF_ONDELAY;
653         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
654         --hmp->rsv_fromdelay;
655         hammer_blockmap_reserve_complete(hmp, resv);
656 }
657
658 /*
659  * Backend function - free (offset, bytes) in a zone.
660  *
661  * XXX error return
662  */
663 void
664 hammer_blockmap_free(hammer_transaction_t trans,
665                      hammer_off_t zone_offset, int bytes)
666 {
667         hammer_mount_t hmp;
668         hammer_volume_t root_volume;
669         hammer_reserve_t resv;
670         hammer_blockmap_t blockmap;
671         hammer_blockmap_t freemap;
672         struct hammer_blockmap_layer1 *layer1;
673         struct hammer_blockmap_layer2 *layer2;
674         hammer_buffer_t buffer1 = NULL;
675         hammer_buffer_t buffer2 = NULL;
676         hammer_off_t layer1_offset;
677         hammer_off_t layer2_offset;
678         hammer_off_t base_off;
679         int error;
680         int zone;
681
682         if (bytes == 0)
683                 return;
684         hmp = trans->hmp;
685
686         /*
687          * Alignment
688          */
689         bytes = (bytes + 15) & ~15;
690         KKASSERT(bytes <= HAMMER_XBUFSIZE);
691         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
692                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
693
694         /*
695          * Basic zone validation & locking
696          */
697         zone = HAMMER_ZONE_DECODE(zone_offset);
698         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
699         root_volume = trans->rootvol;
700         error = 0;
701
702         blockmap = &hmp->blockmap[zone];
703         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
704
705         /*
706          * Dive layer 1.
707          */
708         layer1_offset = freemap->phys_offset +
709                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
710         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
711         if (error)
712                 goto failed;
713         KKASSERT(layer1->phys_offset &&
714                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
715         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
716                 Debugger("CRC FAILED: LAYER1");
717         }
718
719         /*
720          * Dive layer 2, each entry represents a large-block.
721          */
722         layer2_offset = layer1->phys_offset +
723                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
724         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
725         if (error)
726                 goto failed;
727         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
728                 Debugger("CRC FAILED: LAYER2");
729         }
730
731         hammer_lock_ex(&hmp->blkmap_lock);
732
733         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
734
735         /*
736          * Freeing previously allocated space
737          */
738         KKASSERT(layer2->zone == zone);
739         layer2->bytes_free += bytes;
740         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
741         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
742                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
743 again:
744                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
745                                  base_off);
746                 if (resv) {
747                         /*
748                          * Portions of this block have been reserved, do
749                          * not free it.
750                          *
751                          * Make sure the reservation remains through
752                          * the next flush cycle so potentially undoable
753                          * data is not overwritten.
754                          */
755                         KKASSERT(resv->zone == zone);
756                         hammer_reserve_setdelay(hmp, resv, base_off);
757                 } else if ((blockmap->next_offset ^ zone_offset) &
758                             ~HAMMER_LARGEBLOCK_MASK64) {
759                         /*
760                          * Our iterator is not in the now-free big-block
761                          * and we can release it.
762                          *
763                          * Make sure the reservation remains through
764                          * the next flush cycle so potentially undoable
765                          * data is not overwritten.
766                          */
767                         if (hammer_reserve_setdelay(hmp, NULL, base_off))
768                                 goto again;
769                         KKASSERT(layer2->zone == zone);
770                         /*
771                          * XXX maybe incorporate this del call in the
772                          * release code by setting base_offset, bytes_freed,
773                          * etc.
774                          */
775                         hammer_del_buffers(hmp,
776                                            zone_offset &
777                                               ~HAMMER_LARGEBLOCK_MASK64,
778                                            base_off,
779                                            HAMMER_LARGEBLOCK_SIZE);
780                         layer2->zone = 0;
781                         layer2->append_off = 0;
782                         hammer_modify_buffer(trans, buffer1,
783                                              layer1, sizeof(*layer1));
784                         ++layer1->blocks_free;
785                         layer1->layer1_crc = crc32(layer1,
786                                                    HAMMER_LAYER1_CRCSIZE);
787                         hammer_modify_buffer_done(buffer1);
788                         hammer_modify_volume_field(trans,
789                                         trans->rootvol,
790                                         vol0_stat_freebigblocks);
791                         ++root_volume->ondisk->vol0_stat_freebigblocks;
792                         hmp->copy_stat_freebigblocks =
793                            root_volume->ondisk->vol0_stat_freebigblocks;
794                         hammer_modify_volume_done(trans->rootvol);
795                 }
796         }
797
798         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
799         hammer_modify_buffer_done(buffer2);
800         hammer_unlock(&hmp->blkmap_lock);
801
802 failed:
803         if (buffer1)
804                 hammer_rel_buffer(buffer1, 0);
805         if (buffer2)
806                 hammer_rel_buffer(buffer2, 0);
807 }
808
809 /*
810  * Backend function - finalize (offset, bytes) in a zone.
811  *
812  * Allocate space that was previously reserved by the frontend.
813  */
814 int
815 hammer_blockmap_finalize(hammer_transaction_t trans,
816                          hammer_off_t zone_offset, int bytes)
817 {
818         hammer_mount_t hmp;
819         hammer_volume_t root_volume;
820         hammer_blockmap_t blockmap;
821         hammer_blockmap_t freemap;
822         struct hammer_blockmap_layer1 *layer1;
823         struct hammer_blockmap_layer2 *layer2;
824         hammer_buffer_t buffer1 = NULL;
825         hammer_buffer_t buffer2 = NULL;
826         hammer_off_t layer1_offset;
827         hammer_off_t layer2_offset;
828         int error;
829         int zone;
830         int offset;
831
832         if (bytes == 0)
833                 return(0);
834         hmp = trans->hmp;
835
836         /*
837          * Alignment
838          */
839         bytes = (bytes + 15) & ~15;
840         KKASSERT(bytes <= HAMMER_XBUFSIZE);
841
842         /*
843          * Basic zone validation & locking
844          */
845         zone = HAMMER_ZONE_DECODE(zone_offset);
846         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
847         root_volume = trans->rootvol;
848         error = 0;
849
850         blockmap = &hmp->blockmap[zone];
851         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
852
853         /*
854          * Dive layer 1.
855          */
856         layer1_offset = freemap->phys_offset +
857                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
858         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
859         if (error)
860                 goto failed;
861         KKASSERT(layer1->phys_offset &&
862                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
863         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
864                 Debugger("CRC FAILED: LAYER1");
865         }
866
867         /*
868          * Dive layer 2, each entry represents a large-block.
869          */
870         layer2_offset = layer1->phys_offset +
871                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
872         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
873         if (error)
874                 goto failed;
875         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
876                 Debugger("CRC FAILED: LAYER2");
877         }
878
879         hammer_lock_ex(&hmp->blkmap_lock);
880
881         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
882
883         /*
884          * Finalize some or all of the space covered by a current
885          * reservation.  An allocation in the same layer may have
886          * already assigned ownership.
887          */
888         if (layer2->zone == 0) {
889                 hammer_modify_buffer(trans, buffer1,
890                                      layer1, sizeof(*layer1));
891                 --layer1->blocks_free;
892                 layer1->layer1_crc = crc32(layer1,
893                                            HAMMER_LAYER1_CRCSIZE);
894                 hammer_modify_buffer_done(buffer1);
895                 layer2->zone = zone;
896                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
897                 KKASSERT(layer2->append_off == 0);
898                 hammer_modify_volume_field(trans,
899                                 trans->rootvol,
900                                 vol0_stat_freebigblocks);
901                 --root_volume->ondisk->vol0_stat_freebigblocks;
902                 hmp->copy_stat_freebigblocks =
903                    root_volume->ondisk->vol0_stat_freebigblocks;
904                 hammer_modify_volume_done(trans->rootvol);
905         }
906         if (layer2->zone != zone)
907                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
908         KKASSERT(layer2->zone == zone);
909         layer2->bytes_free -= bytes;
910
911         /*
912          * Finalizations can occur out of order, or combined with allocations.
913          * append_off must be set to the highest allocated offset.
914          */
915         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
916         if (layer2->append_off < offset)
917                 layer2->append_off = offset;
918
919         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
920         hammer_modify_buffer_done(buffer2);
921         hammer_unlock(&hmp->blkmap_lock);
922
923 failed:
924         if (buffer1)
925                 hammer_rel_buffer(buffer1, 0);
926         if (buffer2)
927                 hammer_rel_buffer(buffer2, 0);
928         return(error);
929 }
930
931 /*
932  * Return the number of free bytes in the big-block containing the
933  * specified blockmap offset.
934  */
935 int
936 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
937                         int *curp, int *errorp)
938 {
939         hammer_volume_t root_volume;
940         hammer_blockmap_t blockmap;
941         hammer_blockmap_t freemap;
942         struct hammer_blockmap_layer1 *layer1;
943         struct hammer_blockmap_layer2 *layer2;
944         hammer_buffer_t buffer = NULL;
945         hammer_off_t layer1_offset;
946         hammer_off_t layer2_offset;
947         int bytes;
948         int zone;
949
950         zone = HAMMER_ZONE_DECODE(zone_offset);
951         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
952         root_volume = hammer_get_root_volume(hmp, errorp);
953         if (*errorp) {
954                 *curp = 0;
955                 return(0);
956         }
957         blockmap = &hmp->blockmap[zone];
958         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
959
960         /*
961          * Dive layer 1.
962          */
963         layer1_offset = freemap->phys_offset +
964                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
965         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
966         if (*errorp) {
967                 bytes = 0;
968                 goto failed;
969         }
970         KKASSERT(layer1->phys_offset);
971         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
972                 Debugger("CRC FAILED: LAYER1");
973         }
974
975         /*
976          * Dive layer 2, each entry represents a large-block.
977          *
978          * (reuse buffer, layer1 pointer becomes invalid)
979          */
980         layer2_offset = layer1->phys_offset +
981                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
982         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
983         if (*errorp) {
984                 bytes = 0;
985                 goto failed;
986         }
987         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
988                 Debugger("CRC FAILED: LAYER2");
989         }
990         KKASSERT(layer2->zone == zone);
991
992         bytes = layer2->bytes_free;
993
994         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
995                 *curp = 0;
996         else
997                 *curp = 1;
998 failed:
999         if (buffer)
1000                 hammer_rel_buffer(buffer, 0);
1001         hammer_rel_volume(root_volume, 0);
1002         if (hammer_debug_general & 0x0800) {
1003                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1004                         zone_offset, bytes);
1005         }
1006         return(bytes);
1007 }
1008
1009
1010 /*
1011  * Lookup a blockmap offset.
1012  */
1013 hammer_off_t
1014 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1015                        int *errorp)
1016 {
1017         hammer_volume_t root_volume;
1018         hammer_blockmap_t freemap;
1019         struct hammer_blockmap_layer1 *layer1;
1020         struct hammer_blockmap_layer2 *layer2;
1021         hammer_buffer_t buffer = NULL;
1022         hammer_off_t layer1_offset;
1023         hammer_off_t layer2_offset;
1024         hammer_off_t result_offset;
1025         hammer_off_t base_off;
1026         hammer_reserve_t resv;
1027         int zone;
1028
1029         /*
1030          * Calculate the zone-2 offset.
1031          */
1032         zone = HAMMER_ZONE_DECODE(zone_offset);
1033         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1034
1035         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1036                         HAMMER_ZONE_RAW_BUFFER;
1037
1038         /*
1039          * We can actually stop here, normal blockmaps are now direct-mapped
1040          * onto the freemap and so represent zone-2 addresses.
1041          */
1042         if (hammer_verify_zone == 0) {
1043                 *errorp = 0;
1044                 return(result_offset);
1045         }
1046
1047         /*
1048          * Validate the allocation zone
1049          */
1050         root_volume = hammer_get_root_volume(hmp, errorp);
1051         if (*errorp)
1052                 return(0);
1053         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1054         KKASSERT(freemap->phys_offset != 0);
1055
1056         /*
1057          * Dive layer 1.
1058          */
1059         layer1_offset = freemap->phys_offset +
1060                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1061         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1062         if (*errorp)
1063                 goto failed;
1064         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1065         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1066                 Debugger("CRC FAILED: LAYER1");
1067         }
1068
1069         /*
1070          * Dive layer 2, each entry represents a large-block.
1071          */
1072         layer2_offset = layer1->phys_offset +
1073                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1074         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1075
1076         if (*errorp)
1077                 goto failed;
1078         if (layer2->zone == 0) {
1079                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1080                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1081                                  base_off);
1082                 KKASSERT(resv && resv->zone == zone);
1083
1084         } else if (layer2->zone != zone) {
1085                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1086                         layer2->zone, zone);
1087         }
1088         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1089                 Debugger("CRC FAILED: LAYER2");
1090         }
1091
1092 failed:
1093         if (buffer)
1094                 hammer_rel_buffer(buffer, 0);
1095         hammer_rel_volume(root_volume, 0);
1096         if (hammer_debug_general & 0x0800) {
1097                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1098                         zone_offset, result_offset);
1099         }
1100         return(result_offset);
1101 }
1102
1103
1104 /*
1105  * Check space availability
1106  */
1107 int
1108 hammer_checkspace(hammer_mount_t hmp, int slop)
1109 {
1110         const int in_size = sizeof(struct hammer_inode_data) +
1111                             sizeof(union hammer_btree_elm);
1112         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1113         int64_t usedbytes;
1114
1115         usedbytes = hmp->rsv_inodes * in_size +
1116                     hmp->rsv_recs * rec_size +
1117                     hmp->rsv_databytes +
1118                     ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1119                     ((int64_t)hidirtybufspace << 2) +
1120                     (slop << HAMMER_LARGEBLOCK_BITS);
1121
1122         hammer_count_extra_space_used = usedbytes;      /* debugging */
1123
1124         if (hmp->copy_stat_freebigblocks >=
1125             (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1126                 return(0);
1127         }
1128         return (ENOSPC);
1129 }
1130