HAMMER 56C/Many: Performance tuning - MEDIA STRUCTURES CHANGED!
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.20 2008/06/20 05:38:26 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43
44 /*
45  * Reserved big-blocks red-black tree support
46  */
47 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
48              hammer_res_rb_compare, hammer_off_t, zone_offset);
49
50 static int
51 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
52 {
53         if (res1->zone_offset < res2->zone_offset)
54                 return(-1);
55         if (res1->zone_offset > res2->zone_offset)
56                 return(1);
57         return(0);
58 }
59
60 /*
61  * Allocate bytes from a zone
62  */
63 hammer_off_t
64 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
65                       int bytes, int *errorp)
66 {
67         hammer_mount_t hmp;
68         hammer_volume_t root_volume;
69         hammer_blockmap_t blockmap;
70         hammer_blockmap_t freemap;
71         hammer_reserve_t resv;
72         struct hammer_blockmap_layer1 *layer1;
73         struct hammer_blockmap_layer2 *layer2;
74         hammer_buffer_t buffer1 = NULL;
75         hammer_buffer_t buffer2 = NULL;
76         hammer_buffer_t buffer3 = NULL;
77         hammer_off_t tmp_offset;
78         hammer_off_t next_offset;
79         hammer_off_t result_offset;
80         hammer_off_t layer1_offset;
81         hammer_off_t layer2_offset;
82         hammer_off_t base_off;
83         int loops = 0;
84
85         hmp = trans->hmp;
86
87         /*
88          * Deal with alignment and buffer-boundary issues.
89          *
90          * Be careful, certain primary alignments are used below to allocate
91          * new blockmap blocks.
92          */
93         bytes = (bytes + 15) & ~15;
94         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
95         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
96
97         /*
98          * Setup
99          */
100         root_volume = trans->rootvol;
101         *errorp = 0;
102         blockmap = &hmp->blockmap[zone];
103         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
104         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
105
106         hammer_lock_ex(&hmp->blkmap_lock);
107         next_offset = blockmap->next_offset;
108
109 again:
110         /*
111          * Check for wrap
112          */
113         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
114                 if (++loops == 2) {
115                         result_offset = 0;
116                         *errorp = ENOSPC;
117                         goto done;
118                 }
119                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
120         }
121
122         /*
123          * The allocation request may not cross a buffer boundary.  Special
124          * large allocations must not cross a large-block boundary.
125          */
126         tmp_offset = next_offset + bytes - 1;
127         if (bytes <= HAMMER_BUFSIZE) {
128                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
129                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
130                         goto again;
131                 }
132         } else {
133                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
134                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
135                         goto again;
136                 }
137         }
138
139         /*
140          * Dive layer 1.
141          */
142         layer1_offset = freemap->phys_offset +
143                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
144         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
145         KKASSERT(*errorp == 0);
146
147         /*
148          * Check CRC.
149          */
150         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
151                 Debugger("CRC FAILED: LAYER1");
152         }
153
154         /*
155          * If we are at a big-block boundary and layer1 indicates no 
156          * free big-blocks, then we cannot allocate a new bigblock in
157          * layer2, skip to the next layer1 entry.
158          */
159         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
160             layer1->blocks_free == 0) {
161                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
162                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
163                 goto again;
164         }
165         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
166
167         /*
168          * Dive layer 2, each entry represents a large-block.
169          */
170         layer2_offset = layer1->phys_offset +
171                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
172         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
173         KKASSERT(*errorp == 0);
174
175         /*
176          * Check CRC.
177          */
178         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
179                 Debugger("CRC FAILED: LAYER2");
180         }
181
182         /*
183          * Complex junk follows.  The next_offset is an ephermal pointer,
184          * it can point anywhere, really, so we have to check that we can
185          * actually allocate at this point.
186          *
187          * If we own the zone but just entered into it the easiest thing to
188          * do is skip it.  We could adjust according to layer2->append_off
189          * but it isn't really worth doing.
190          *
191          * If someone else owns the zone we must skip it.
192          */
193         if (layer2->zone == zone) {
194                 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
195                         next_offset += HAMMER_LARGEBLOCK_SIZE;
196                         goto again;
197                 }
198         } else if (layer2->zone) {
199                 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
200                               ~HAMMER_LARGEBLOCK_MASK64;
201                 goto again;
202         }
203
204         /*
205          * Now check to see if someone has reserved the big-block.  Again,
206          * if we are at the beginning of it then the reservation was not
207          * under our control and we must skip it.  Same if someone else owns
208          * the reservation.
209          */
210         base_off = (next_offset &
211                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
212                     HAMMER_ZONE_RAW_BUFFER;
213         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
214         if (resv) {
215                 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
216                         next_offset += HAMMER_LARGEBLOCK_SIZE;
217                         goto again;
218                 }
219                 if (resv->zone != zone) {
220                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
221                                       ~HAMMER_LARGEBLOCK_MASK64;
222                         goto again;
223                 }
224         }
225
226         /*
227          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
228          * of the layer for real.  At this point we've validated any
229          * reservation that might exist and can just ignore resv.
230          */
231         if (layer2->zone == 0) {
232                 /*
233                  * Assign the bigblock to our zone
234                  */
235                 hammer_modify_buffer(trans, buffer1,
236                                      layer1, sizeof(*layer1));
237                 --layer1->blocks_free;
238                 layer1->layer1_crc = crc32(layer1,
239                                            HAMMER_LAYER1_CRCSIZE);
240                 hammer_modify_buffer_done(buffer1);
241                 hammer_modify_buffer(trans, buffer2,
242                                      layer2, sizeof(*layer2));
243                 layer2->zone = zone;
244                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
245                 KKASSERT(layer2->append_off == 0);
246                 hammer_modify_volume_field(trans, trans->rootvol,
247                                            vol0_stat_freebigblocks);
248                 --root_volume->ondisk->vol0_stat_freebigblocks;
249                 hmp->copy_stat_freebigblocks =
250                         root_volume->ondisk->vol0_stat_freebigblocks;
251                 hammer_modify_volume_done(trans->rootvol);
252         } else {
253                 KKASSERT(layer2->append_off <=
254                          ((int)next_offset & HAMMER_LARGEBLOCK_MASK));
255                 hammer_modify_buffer(trans, buffer2,
256                                      layer2, sizeof(*layer2));
257         }
258         KKASSERT(layer2->zone == zone);
259
260         layer2->bytes_free -= bytes;
261         layer2->append_off = ((int)next_offset & HAMMER_LARGEBLOCK_MASK) +
262                              bytes;
263         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
264         hammer_modify_buffer_done(buffer2);
265         KKASSERT(layer2->bytes_free >= 0);
266
267         /*
268          * If we are allocating from the base of a new buffer we can avoid
269          * a disk read by calling hammer_bnew().
270          */
271         if ((next_offset & HAMMER_BUFMASK) == 0) {
272                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
273                                 errorp, &buffer3);
274         }
275         result_offset = next_offset;
276
277         /*
278          * Process allocated result_offset
279          */
280 done:
281         hammer_modify_volume(NULL, root_volume, NULL, 0);
282         if (result_offset) {
283                 if (result_offset == next_offset) {
284                         blockmap->next_offset = next_offset + bytes;
285                 } else {
286                         blockmap->next_offset = next_offset;
287                 }
288         } else {
289                 blockmap->next_offset = next_offset;
290         }
291         hammer_modify_volume_done(root_volume);
292         hammer_unlock(&hmp->blkmap_lock);
293
294         /*
295          * Cleanup
296          */
297         if (buffer1)
298                 hammer_rel_buffer(buffer1, 0);
299         if (buffer2)
300                 hammer_rel_buffer(buffer2, 0);
301         if (buffer3)
302                 hammer_rel_buffer(buffer3, 0);
303
304         return(result_offset);
305 }
306
307 /*
308  * Frontend function - Reserve bytes in a zone.
309  *
310  * This code reserves bytes out of a blockmap without committing to any
311  * meta-data modifications, allowing the front-end to directly issue disk
312  * write I/O for large blocks of data
313  *
314  * The backend later finalizes the reservation with hammer_blockmap_finalize()
315  * upon committing the related record.
316  */
317 hammer_reserve_t
318 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
319                         hammer_off_t *zone_offp, int *errorp)
320 {
321         hammer_volume_t root_volume;
322         hammer_blockmap_t blockmap;
323         hammer_blockmap_t freemap;
324         struct hammer_blockmap_layer1 *layer1;
325         struct hammer_blockmap_layer2 *layer2;
326         hammer_buffer_t buffer1 = NULL;
327         hammer_buffer_t buffer2 = NULL;
328         hammer_buffer_t buffer3 = NULL;
329         hammer_off_t tmp_offset;
330         hammer_off_t next_offset;
331         hammer_off_t layer1_offset;
332         hammer_off_t layer2_offset;
333         hammer_off_t base_off;
334         hammer_reserve_t resv;
335         hammer_reserve_t resx;
336         int loops = 0;
337
338         /*
339          * Setup
340          */
341         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
342         root_volume = hammer_get_root_volume(hmp, errorp);
343         if (*errorp)
344                 return(NULL);
345         blockmap = &hmp->blockmap[zone];
346         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
347         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
348
349         /*
350          * Deal with alignment and buffer-boundary issues.
351          *
352          * Be careful, certain primary alignments are used below to allocate
353          * new blockmap blocks.
354          */
355         bytes = (bytes + 15) & ~15;
356         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
357
358         hammer_lock_ex(&hmp->blkmap_lock);
359         next_offset = blockmap->next_offset;
360 again:
361         resv = NULL;
362
363         /*
364          * Check for wrap
365          */
366         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
367                 if (++loops == 2) {
368                         *errorp = ENOSPC;
369                         goto done;
370                 }
371                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
372         }
373
374         /*
375          * The allocation request may not cross a buffer boundary.  Special
376          * large allocations must not cross a large-block boundary.
377          */
378         tmp_offset = next_offset + bytes - 1;
379         if (bytes <= HAMMER_BUFSIZE) {
380                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
381                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
382                         goto again;
383                 }
384         } else {
385                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
386                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
387                         goto again;
388                 }
389         }
390
391         /*
392          * Dive layer 1.
393          */
394         layer1_offset = freemap->phys_offset +
395                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
396         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
397         KKASSERT(*errorp == 0);
398
399         /*
400          * Check CRC.
401          */
402         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
403                 Debugger("CRC FAILED: LAYER1");
404         }
405
406         /*
407          * If we are at a big-block boundary and layer1 indicates no 
408          * free big-blocks, then we cannot allocate a new bigblock in
409          * layer2, skip to the next layer1 entry.
410          */
411         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
412             layer1->blocks_free == 0) {
413                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
414                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
415                 goto again;
416         }
417         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
418
419         /*
420          * Dive layer 2, each entry represents a large-block.
421          */
422         layer2_offset = layer1->phys_offset +
423                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
424         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
425         KKASSERT(*errorp == 0);
426
427         /*
428          * Check CRC if not allocating into uninitialized space (which we
429          * aren't when reserving space).
430          */
431         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
432                 Debugger("CRC FAILED: LAYER2");
433         }
434
435         /*
436          * Complex junk follows.  The next_offset is an ephermal pointer,
437          * it can point anywhere, really, so we have to check that we can
438          * actually allocate at this point.
439          *
440          * If we own the zone but just entered into it the easiest thing to
441          * do is skip it.  We could adjust according to layer2->append_off
442          * but it isn't really worth doing.
443          *
444          * If someone else owns the zone we must skip it.
445          */
446         if (layer2->zone == zone) {
447                 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
448                         next_offset += HAMMER_LARGEBLOCK_SIZE;
449                         goto again;
450                 }
451         } else if (layer2->zone) {
452                 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
453                               ~HAMMER_LARGEBLOCK_MASK64;
454                 goto again;
455         }
456
457         /*
458          * Now check to see if someone has reserved the big-block.  Again,
459          * if we are at the beginning of it then the reservation was not
460          * under our control and we must skip it.  Same if someone else owns
461          * the reservation.
462          */
463         base_off = (next_offset &
464                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
465                     HAMMER_ZONE_RAW_BUFFER;
466         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
467         if (resv) {
468                 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0) {
469                         next_offset += HAMMER_LARGEBLOCK_SIZE;
470                         goto again;
471                 }
472                 if (resv->zone != zone) {
473                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
474                                       ~HAMMER_LARGEBLOCK_MASK64;
475                         goto again;
476                 }
477         }
478
479         /*
480          * The reservation code does not modify layer2->bytes_free, it
481          * simply adjusts next_offset.
482          */
483         KKASSERT(layer2->bytes_free >= 0);
484
485         /*
486          * Make the zone-2 reservation.
487          */
488         if (resv) {
489                 ++resv->refs;
490                 KKASSERT(resv->zone == zone);
491         } else {
492                 resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
493                 resv->refs = 1;
494                 resv->zone = zone;
495                 resv->zone_offset = base_off;
496                 resx = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
497                 KKASSERT(resx == NULL);
498                 ++hammer_count_reservations;
499         }
500
501         /*
502          * If we are not reserving a whole buffer but are at the start of
503          * a new block, call hammer_bnew() to avoid a disk read.
504          *
505          * If we are reserving a whole buffer (or more), the caller will
506          * probably use a direct read, so do nothing.
507          */
508         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
509                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
510         }
511
512
513         /*
514          * Adjust our iterator and alloc_offset.  The layer1 and layer2
515          * space beyond alloc_offset is uninitialized.  alloc_offset must
516          * be big-block aligned.
517          */
518 done:
519         if (resv) {
520                 hammer_modify_volume(NULL, root_volume, NULL, 0);
521                 blockmap->next_offset = next_offset + bytes;
522                 hammer_modify_volume_done(root_volume);
523         } else if (blockmap->next_offset != next_offset) {
524                 hammer_modify_volume(NULL, root_volume, NULL, 0);
525                 blockmap->next_offset = next_offset;
526                 hammer_modify_volume_done(root_volume);
527         }
528
529         if (buffer1)
530                 hammer_rel_buffer(buffer1, 0);
531         if (buffer2)
532                 hammer_rel_buffer(buffer2, 0);
533         if (buffer3)
534                 hammer_rel_buffer(buffer3, 0);
535         hammer_rel_volume(root_volume, 0);
536         hammer_unlock(&hmp->blkmap_lock);
537         *zone_offp = next_offset;
538
539         return(resv);
540 }
541
542 /*
543  * A record with a storage reservation calls this function when it is
544  * being freed.  The storage may or may not have actually been allocated.
545  *
546  * This function removes the lock that prevented other entities from
547  * allocating out of the storage or removing the zone assignment.
548  */
549 void
550 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
551 {
552         KKASSERT(resv->refs > 0);
553         if (--resv->refs == 0) {
554                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
555                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
556                 kfree(resv, M_HAMMER);
557                 --hammer_count_reservations;
558         }
559 }
560
561 /*
562  * This ensures that no data reallocations will take place at the specified
563  * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
564  * preventing deleted data space, which has no UNDO, from being reallocated 
565  * too quickly.
566  */
567 void
568 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
569                         hammer_off_t zone2_offset)
570 {
571         if (resv == NULL) {
572                 resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
573                 resv->refs = 1; /* ref for on-delay list */
574                 resv->zone_offset = zone2_offset;
575                 RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
576                 ++hammer_count_reservations;
577         } else if (resv->flags & HAMMER_RESF_ONDELAY) {
578                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
579                 resv->flush_group = hmp->flusher.next + 1;
580         } else {
581                 ++resv->refs;   /* ref for on-delay list */
582         }
583         resv->flags |= HAMMER_RESF_ONDELAY;
584         resv->flush_group = hmp->flusher.next + 1;
585         TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
586 }
587
588 void
589 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
590 {
591         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
592         resv->flags &= ~HAMMER_RESF_ONDELAY;
593         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
594         hammer_blockmap_reserve_complete(hmp, resv);
595 }
596
597 /*
598  * Backend function - free (offset, bytes) in a zone.
599  */
600 void
601 hammer_blockmap_free(hammer_transaction_t trans,
602                      hammer_off_t zone_offset, int bytes)
603 {
604         hammer_mount_t hmp;
605         hammer_volume_t root_volume;
606         hammer_reserve_t resv;
607         hammer_blockmap_t blockmap;
608         hammer_blockmap_t freemap;
609         struct hammer_blockmap_layer1 *layer1;
610         struct hammer_blockmap_layer2 *layer2;
611         hammer_buffer_t buffer1 = NULL;
612         hammer_buffer_t buffer2 = NULL;
613         hammer_off_t layer1_offset;
614         hammer_off_t layer2_offset;
615         hammer_off_t base_off;
616         int error;
617         int zone;
618
619         if (bytes == 0)
620                 return;
621         hmp = trans->hmp;
622
623         /*
624          * Alignment
625          */
626         bytes = (bytes + 15) & ~15;
627         KKASSERT(bytes <= HAMMER_XBUFSIZE);
628         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
629                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
630
631         /*
632          * Basic zone validation & locking
633          */
634         zone = HAMMER_ZONE_DECODE(zone_offset);
635         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
636         root_volume = trans->rootvol;
637         error = 0;
638         hammer_lock_ex(&hmp->blkmap_lock);
639
640         blockmap = &hmp->blockmap[zone];
641         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
642
643         /*
644          * Dive layer 1.
645          */
646         layer1_offset = freemap->phys_offset +
647                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
648         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
649         KKASSERT(error == 0);
650         KKASSERT(layer1->phys_offset &&
651                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
652         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
653                 Debugger("CRC FAILED: LAYER1");
654         }
655
656         /*
657          * Dive layer 2, each entry represents a large-block.
658          */
659         layer2_offset = layer1->phys_offset +
660                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
661         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
662         KKASSERT(error == 0);
663         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
664                 Debugger("CRC FAILED: LAYER2");
665         }
666
667         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
668
669         /*
670          * Freeing previously allocated space
671          */
672         KKASSERT(layer2->zone == zone);
673         layer2->bytes_free += bytes;
674         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
675         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
676                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
677                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
678                                  base_off);
679                 if (resv) {
680                         /*
681                          * Portions of this block have been reserved, do
682                          * not free it.
683                          *
684                          * Make sure the reservation remains through
685                          * the next flush cycle so potentially undoable
686                          * data is not overwritten.
687                          */
688                         KKASSERT(resv->zone == zone);
689                         hammer_reserve_setdelay(hmp, resv, base_off);
690                 } else if ((blockmap->next_offset ^ zone_offset) &
691                             ~HAMMER_LARGEBLOCK_MASK64) {
692                         /*
693                          * Our iterator is not in the now-free big-block
694                          * and we can release it.
695                          *
696                          * Make sure the reservation remains through
697                          * the next flush cycle so potentially undoable
698                          * data is not overwritten.
699                          */
700                         hammer_reserve_setdelay(hmp, resv, base_off);
701                         KKASSERT(layer2->zone == zone);
702                         hammer_del_buffers(hmp,
703                                            zone_offset &
704                                               ~HAMMER_LARGEBLOCK_MASK64,
705                                            base_off,
706                                            HAMMER_LARGEBLOCK_SIZE);
707                         layer2->zone = 0;
708                         layer2->append_off = 0;
709                         hammer_modify_buffer(trans, buffer1,
710                                              layer1, sizeof(*layer1));
711                         ++layer1->blocks_free;
712                         layer1->layer1_crc = crc32(layer1,
713                                                    HAMMER_LAYER1_CRCSIZE);
714                         hammer_modify_buffer_done(buffer1);
715                         hammer_modify_volume_field(trans,
716                                         trans->rootvol,
717                                         vol0_stat_freebigblocks);
718                         ++root_volume->ondisk->vol0_stat_freebigblocks;
719                         hmp->copy_stat_freebigblocks =
720                            root_volume->ondisk->vol0_stat_freebigblocks;
721                         hammer_modify_volume_done(trans->rootvol);
722                 }
723         }
724
725         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
726         hammer_modify_buffer_done(buffer2);
727         hammer_unlock(&hmp->blkmap_lock);
728
729         if (buffer1)
730                 hammer_rel_buffer(buffer1, 0);
731         if (buffer2)
732                 hammer_rel_buffer(buffer2, 0);
733 }
734
735 /*
736  * Backend function - finalize (offset, bytes) in a zone.
737  *
738  * Allocate space that was previously reserved by the frontend.
739  */
740 void
741 hammer_blockmap_finalize(hammer_transaction_t trans,
742                          hammer_off_t zone_offset, int bytes)
743 {
744         hammer_mount_t hmp;
745         hammer_volume_t root_volume;
746         hammer_blockmap_t blockmap;
747         hammer_blockmap_t freemap;
748         struct hammer_blockmap_layer1 *layer1;
749         struct hammer_blockmap_layer2 *layer2;
750         hammer_buffer_t buffer1 = NULL;
751         hammer_buffer_t buffer2 = NULL;
752         hammer_off_t layer1_offset;
753         hammer_off_t layer2_offset;
754         int error;
755         int zone;
756         int append_off;
757
758         if (bytes == 0)
759                 return;
760         hmp = trans->hmp;
761
762         /*
763          * Alignment
764          */
765         bytes = (bytes + 15) & ~15;
766         KKASSERT(bytes <= HAMMER_XBUFSIZE);
767
768         /*
769          * Basic zone validation & locking
770          */
771         zone = HAMMER_ZONE_DECODE(zone_offset);
772         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
773         root_volume = trans->rootvol;
774         error = 0;
775         hammer_lock_ex(&hmp->blkmap_lock);
776
777         blockmap = &hmp->blockmap[zone];
778         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
779
780         /*
781          * Dive layer 1.
782          */
783         layer1_offset = freemap->phys_offset +
784                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
785         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
786         KKASSERT(error == 0);
787         KKASSERT(layer1->phys_offset &&
788                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
789         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
790                 Debugger("CRC FAILED: LAYER1");
791         }
792
793         /*
794          * Dive layer 2, each entry represents a large-block.
795          */
796         layer2_offset = layer1->phys_offset +
797                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
798         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
799         KKASSERT(error == 0);
800         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
801                 Debugger("CRC FAILED: LAYER2");
802         }
803
804         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
805
806         /*
807          * Finalize some or all of the space covered by a current
808          * reservation.  An allocation in the same layer may have
809          * already assigned ownership.
810          */
811         if (layer2->zone == 0) {
812                 hammer_modify_buffer(trans, buffer1,
813                                      layer1, sizeof(*layer1));
814                 --layer1->blocks_free;
815                 layer1->layer1_crc = crc32(layer1,
816                                            HAMMER_LAYER1_CRCSIZE);
817                 hammer_modify_buffer_done(buffer1);
818                 layer2->zone = zone;
819                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
820                 KKASSERT(layer2->append_off == 0);
821                 hammer_modify_volume_field(trans,
822                                 trans->rootvol,
823                                 vol0_stat_freebigblocks);
824                 --root_volume->ondisk->vol0_stat_freebigblocks;
825                 hmp->copy_stat_freebigblocks =
826                    root_volume->ondisk->vol0_stat_freebigblocks;
827                 hammer_modify_volume_done(trans->rootvol);
828         }
829         if (layer2->zone != zone)
830                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
831         KKASSERT(layer2->zone == zone);
832         layer2->bytes_free -= bytes;
833
834         /*
835          * Finalizations can occur out of order, or combined with allocations.
836          * append_off must be set to the highest allocated offset.
837          */
838         append_off = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
839         if (layer2->append_off < append_off)
840                 layer2->append_off = append_off;
841
842         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
843         hammer_modify_buffer_done(buffer2);
844         hammer_unlock(&hmp->blkmap_lock);
845
846         if (buffer1)
847                 hammer_rel_buffer(buffer1, 0);
848         if (buffer2)
849                 hammer_rel_buffer(buffer2, 0);
850 }
851
852 /*
853  * Return the number of free bytes in the big-block containing the
854  * specified blockmap offset.
855  */
856 int
857 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
858                         int *curp, int *errorp)
859 {
860         hammer_volume_t root_volume;
861         hammer_blockmap_t blockmap;
862         hammer_blockmap_t freemap;
863         struct hammer_blockmap_layer1 *layer1;
864         struct hammer_blockmap_layer2 *layer2;
865         hammer_buffer_t buffer = NULL;
866         hammer_off_t layer1_offset;
867         hammer_off_t layer2_offset;
868         int bytes;
869         int zone;
870
871         zone = HAMMER_ZONE_DECODE(zone_offset);
872         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
873         root_volume = hammer_get_root_volume(hmp, errorp);
874         if (*errorp) {
875                 *curp = 0;
876                 return(0);
877         }
878         blockmap = &hmp->blockmap[zone];
879         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
880
881         /*
882          * Dive layer 1.
883          */
884         layer1_offset = freemap->phys_offset +
885                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
886         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
887         KKASSERT(*errorp == 0);
888         KKASSERT(layer1->phys_offset);
889         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
890                 Debugger("CRC FAILED: LAYER1");
891         }
892
893         /*
894          * Dive layer 2, each entry represents a large-block.
895          */
896         layer2_offset = layer1->phys_offset +
897                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
898         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
899         KKASSERT(*errorp == 0);
900         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
901                 Debugger("CRC FAILED: LAYER2");
902         }
903         KKASSERT(layer2->zone == zone);
904
905         bytes = layer2->bytes_free;
906
907         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
908                 *curp = 0;
909         else
910                 *curp = 1;
911         if (buffer)
912                 hammer_rel_buffer(buffer, 0);
913         hammer_rel_volume(root_volume, 0);
914         if (hammer_debug_general & 0x0800) {
915                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
916                         zone_offset, bytes);
917         }
918         return(bytes);
919 }
920
921
922 /*
923  * Lookup a blockmap offset.
924  */
925 hammer_off_t
926 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
927                        int *errorp)
928 {
929         hammer_volume_t root_volume;
930         hammer_blockmap_t freemap;
931         struct hammer_blockmap_layer1 *layer1;
932         struct hammer_blockmap_layer2 *layer2;
933         hammer_buffer_t buffer = NULL;
934         hammer_off_t layer1_offset;
935         hammer_off_t layer2_offset;
936         hammer_off_t result_offset;
937         hammer_off_t base_off;
938         hammer_reserve_t resv;
939         int zone;
940
941         /*
942          * Calculate the zone-2 offset.
943          */
944         zone = HAMMER_ZONE_DECODE(zone_offset);
945         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
946
947         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
948                         HAMMER_ZONE_RAW_BUFFER;
949
950         /*
951          * We can actually stop here, normal blockmaps are now direct-mapped
952          * onto the freemap and so represent zone-2 addresses.
953          */
954         if (hammer_verify_zone == 0) {
955                 *errorp = 0;
956                 return(result_offset);
957         }
958
959         /*
960          * Validate the allocation zone
961          */
962         root_volume = hammer_get_root_volume(hmp, errorp);
963         if (*errorp)
964                 return(0);
965         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
966         KKASSERT(freemap->phys_offset != 0);
967
968         /*
969          * Dive layer 1.
970          */
971         layer1_offset = freemap->phys_offset +
972                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
973         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
974         KKASSERT(*errorp == 0);
975         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
976         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
977                 Debugger("CRC FAILED: LAYER1");
978         }
979
980         /*
981          * Dive layer 2, each entry represents a large-block.
982          */
983         layer2_offset = layer1->phys_offset +
984                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
985         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
986
987         KKASSERT(*errorp == 0);
988         if (layer2->zone == 0) {
989                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
990                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
991                                  base_off);
992                 KKASSERT(resv && resv->zone == zone);
993
994         } else if (layer2->zone != zone) {
995                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
996                         layer2->zone, zone);
997         }
998         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
999                 Debugger("CRC FAILED: LAYER2");
1000         }
1001
1002         if (buffer)
1003                 hammer_rel_buffer(buffer, 0);
1004         hammer_rel_volume(root_volume, 0);
1005         if (hammer_debug_general & 0x0800) {
1006                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1007                         zone_offset, result_offset);
1008         }
1009         return(result_offset);
1010 }
1011
1012
1013 /*
1014  * Check space availability
1015  */
1016 int
1017 hammer_checkspace(hammer_mount_t hmp)
1018 {
1019         const int in_size = sizeof(struct hammer_inode_data) +
1020                             sizeof(union hammer_btree_elm);
1021         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1022         const int blkconv = HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE;
1023         const int limit_inodes = HAMMER_LARGEBLOCK_SIZE / in_size;
1024         const int limit_recs = HAMMER_LARGEBLOCK_SIZE / rec_size;
1025         int usedbigblocks;;
1026
1027         /*
1028          * Quick and very dirty, not even using the right units (bigblocks
1029          * vs 16K buffers), but this catches almost everything.
1030          */
1031         if (hmp->copy_stat_freebigblocks >= hmp->rsv_databufs + 8 &&
1032             hmp->rsv_inodes < limit_inodes &&
1033             hmp->rsv_recs < limit_recs &&
1034             hmp->rsv_databytes < HAMMER_LARGEBLOCK_SIZE) {
1035                 return(0);
1036         }
1037
1038         /*
1039          * Do a more involved check
1040          */
1041         usedbigblocks = (hmp->rsv_inodes * in_size / HAMMER_LARGEBLOCK_SIZE) +
1042                         (hmp->rsv_recs * rec_size / HAMMER_LARGEBLOCK_SIZE) +
1043                         hmp->rsv_databufs / blkconv + 6;
1044         if (hmp->copy_stat_freebigblocks >= usedbigblocks)
1045                 return(0);
1046         return (ENOSPC);
1047 }
1048