HAMMER 59G/Many: Stabilization pass (low memory issues)
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.21 2008/07/01 02:08:58 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static int hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
44                         hammer_off_t zone2_offset);
45
46
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51              hammer_res_rb_compare, hammer_off_t, zone_offset);
52
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56         if (res1->zone_offset < res2->zone_offset)
57                 return(-1);
58         if (res1->zone_offset > res2->zone_offset)
59                 return(1);
60         return(0);
61 }
62
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
68                       int bytes, int *errorp)
69 {
70         hammer_mount_t hmp;
71         hammer_volume_t root_volume;
72         hammer_blockmap_t blockmap;
73         hammer_blockmap_t freemap;
74         hammer_reserve_t resv;
75         struct hammer_blockmap_layer1 *layer1;
76         struct hammer_blockmap_layer2 *layer2;
77         hammer_buffer_t buffer1 = NULL;
78         hammer_buffer_t buffer2 = NULL;
79         hammer_buffer_t buffer3 = NULL;
80         hammer_off_t tmp_offset;
81         hammer_off_t next_offset;
82         hammer_off_t result_offset;
83         hammer_off_t layer1_offset;
84         hammer_off_t layer2_offset;
85         hammer_off_t base_off;
86         int loops = 0;
87         int offset;             /* offset within big-block */
88
89         hmp = trans->hmp;
90
91         /*
92          * Deal with alignment and buffer-boundary issues.
93          *
94          * Be careful, certain primary alignments are used below to allocate
95          * new blockmap blocks.
96          */
97         bytes = (bytes + 15) & ~15;
98         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
99         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
100
101         /*
102          * Setup
103          */
104         root_volume = trans->rootvol;
105         *errorp = 0;
106         blockmap = &hmp->blockmap[zone];
107         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
108         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
109
110         next_offset = blockmap->next_offset;
111 again:
112         /*
113          * Check for wrap
114          */
115         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
116                 if (++loops == 2) {
117                         result_offset = 0;
118                         *errorp = ENOSPC;
119                         goto failed;
120                 }
121                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
122         }
123
124         /*
125          * The allocation request may not cross a buffer boundary.  Special
126          * large allocations must not cross a large-block boundary.
127          */
128         tmp_offset = next_offset + bytes - 1;
129         if (bytes <= HAMMER_BUFSIZE) {
130                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
131                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
132                         goto again;
133                 }
134         } else {
135                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
136                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
137                         goto again;
138                 }
139         }
140         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
141
142         /*
143          * Dive layer 1.
144          */
145         layer1_offset = freemap->phys_offset +
146                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
147         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
148         KKASSERT(*errorp == 0);
149
150         /*
151          * Check CRC.
152          */
153         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
154                 Debugger("CRC FAILED: LAYER1");
155         }
156
157         /*
158          * If we are at a big-block boundary and layer1 indicates no 
159          * free big-blocks, then we cannot allocate a new bigblock in
160          * layer2, skip to the next layer1 entry.
161          */
162         if (offset == 0 && layer1->blocks_free == 0) {
163                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
164                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
165                 goto again;
166         }
167         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
168
169         /*
170          * Dive layer 2, each entry represents a large-block.
171          */
172         layer2_offset = layer1->phys_offset +
173                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
174         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
175         KKASSERT(*errorp == 0);
176
177         /*
178          * Check CRC.
179          */
180         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
181                 Debugger("CRC FAILED: LAYER2");
182         }
183
184         /*
185          * Skip the layer if the zone is owned by someone other then us.
186          */
187         if (layer2->zone && layer2->zone != zone) {
188                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
189                 goto again;
190         }
191         if (offset < layer2->append_off) {
192                 next_offset += layer2->append_off - offset;
193                 goto again;
194         }
195
196         /*
197          * We need the lock from this point on.  We have to re-check zone
198          * ownership after acquiring the lock and also check for reservations.
199          */
200         hammer_lock_ex(&hmp->blkmap_lock);
201
202         if (layer2->zone && layer2->zone != zone) {
203                 hammer_unlock(&hmp->blkmap_lock);
204                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
205                 goto again;
206         }
207         if (offset < layer2->append_off) {
208                 hammer_unlock(&hmp->blkmap_lock);
209                 next_offset += layer2->append_off - offset;
210                 goto again;
211         }
212
213         /*
214          * The bigblock might be reserved by another zone.  If it is reserved
215          * by our zone we may have to move next_offset past the append_off.
216          */
217         base_off = (next_offset &
218                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 
219                     HAMMER_ZONE_RAW_BUFFER;
220         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
221         if (resv) {
222                 if (resv->zone != zone) {
223                         hammer_unlock(&hmp->blkmap_lock);
224                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
225                                       ~HAMMER_LARGEBLOCK_MASK64;
226                         goto again;
227                 }
228                 if (offset < resv->append_off) {
229                         hammer_unlock(&hmp->blkmap_lock);
230                         next_offset += resv->append_off - offset;
231                         goto again;
232                 }
233         }
234
235         /*
236          * Ok, we can allocate out of this layer2 big-block.  Assume ownership
237          * of the layer for real.  At this point we've validated any
238          * reservation that might exist and can just ignore resv.
239          */
240         if (layer2->zone == 0) {
241                 /*
242                  * Assign the bigblock to our zone
243                  */
244                 hammer_modify_buffer(trans, buffer1,
245                                      layer1, sizeof(*layer1));
246                 --layer1->blocks_free;
247                 layer1->layer1_crc = crc32(layer1,
248                                            HAMMER_LAYER1_CRCSIZE);
249                 hammer_modify_buffer_done(buffer1);
250                 hammer_modify_buffer(trans, buffer2,
251                                      layer2, sizeof(*layer2));
252                 layer2->zone = zone;
253                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
254                 KKASSERT(layer2->append_off == 0);
255                 hammer_modify_volume_field(trans, trans->rootvol,
256                                            vol0_stat_freebigblocks);
257                 --root_volume->ondisk->vol0_stat_freebigblocks;
258                 hmp->copy_stat_freebigblocks =
259                         root_volume->ondisk->vol0_stat_freebigblocks;
260                 hammer_modify_volume_done(trans->rootvol);
261         } else {
262                 hammer_modify_buffer(trans, buffer2,
263                                      layer2, sizeof(*layer2));
264         }
265         KKASSERT(layer2->zone == zone);
266
267         layer2->bytes_free -= bytes;
268         KKASSERT(layer2->append_off <= offset);
269         layer2->append_off = offset + bytes;
270         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
271         hammer_modify_buffer_done(buffer2);
272         KKASSERT(layer2->bytes_free >= 0);
273
274         if (resv) {
275                 KKASSERT(resv->append_off <= offset);
276                 resv->append_off = offset + bytes;
277         }
278
279         /*
280          * If we are allocating from the base of a new buffer we can avoid
281          * a disk read by calling hammer_bnew().
282          */
283         if ((next_offset & HAMMER_BUFMASK) == 0) {
284                 hammer_bnew_ext(trans->hmp, next_offset, bytes,
285                                 errorp, &buffer3);
286         }
287         result_offset = next_offset;
288
289         /*
290          * Process allocated result_offset
291          */
292         hammer_modify_volume(NULL, root_volume, NULL, 0);
293         blockmap->next_offset = next_offset + bytes;
294         hammer_modify_volume_done(root_volume);
295         hammer_unlock(&hmp->blkmap_lock);
296 failed:
297
298         /*
299          * Cleanup
300          */
301         if (buffer1)
302                 hammer_rel_buffer(buffer1, 0);
303         if (buffer2)
304                 hammer_rel_buffer(buffer2, 0);
305         if (buffer3)
306                 hammer_rel_buffer(buffer3, 0);
307
308         return(result_offset);
309 }
310
311 /*
312  * Frontend function - Reserve bytes in a zone.
313  *
314  * This code reserves bytes out of a blockmap without committing to any
315  * meta-data modifications, allowing the front-end to directly issue disk
316  * write I/O for large blocks of data
317  *
318  * The backend later finalizes the reservation with hammer_blockmap_finalize()
319  * upon committing the related record.
320  */
321 hammer_reserve_t
322 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
323                         hammer_off_t *zone_offp, int *errorp)
324 {
325         hammer_volume_t root_volume;
326         hammer_blockmap_t blockmap;
327         hammer_blockmap_t freemap;
328         struct hammer_blockmap_layer1 *layer1;
329         struct hammer_blockmap_layer2 *layer2;
330         hammer_buffer_t buffer1 = NULL;
331         hammer_buffer_t buffer2 = NULL;
332         hammer_buffer_t buffer3 = NULL;
333         hammer_off_t tmp_offset;
334         hammer_off_t next_offset;
335         hammer_off_t layer1_offset;
336         hammer_off_t layer2_offset;
337         hammer_off_t base_off;
338         hammer_reserve_t resv;
339         hammer_reserve_t resx;
340         int loops = 0;
341         int offset;
342
343         /*
344          * Setup
345          */
346         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
347         root_volume = hammer_get_root_volume(hmp, errorp);
348         if (*errorp)
349                 return(NULL);
350         blockmap = &hmp->blockmap[zone];
351         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
352         KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
353
354         /*
355          * Deal with alignment and buffer-boundary issues.
356          *
357          * Be careful, certain primary alignments are used below to allocate
358          * new blockmap blocks.
359          */
360         bytes = (bytes + 15) & ~15;
361         KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
362
363         next_offset = blockmap->next_offset;
364         resv = NULL;
365 again:
366         /*
367          * Check for wrap
368          */
369         if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
370                 if (++loops == 2) {
371                         *errorp = ENOSPC;
372                         goto failed;
373                 }
374                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
375         }
376
377         /*
378          * The allocation request may not cross a buffer boundary.  Special
379          * large allocations must not cross a large-block boundary.
380          */
381         tmp_offset = next_offset + bytes - 1;
382         if (bytes <= HAMMER_BUFSIZE) {
383                 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
384                         next_offset = tmp_offset & ~HAMMER_BUFMASK64;
385                         goto again;
386                 }
387         } else {
388                 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
389                         next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
390                         goto again;
391                 }
392         }
393         offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
394
395         /*
396          * Dive layer 1.
397          */
398         layer1_offset = freemap->phys_offset +
399                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
400         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
401         KKASSERT(*errorp == 0);
402
403         /*
404          * Check CRC.
405          */
406         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
407                 Debugger("CRC FAILED: LAYER1");
408         }
409
410         /*
411          * If we are at a big-block boundary and layer1 indicates no 
412          * free big-blocks, then we cannot allocate a new bigblock in
413          * layer2, skip to the next layer1 entry.
414          */
415         if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
416             layer1->blocks_free == 0) {
417                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
418                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
419                 goto again;
420         }
421         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
422
423         /*
424          * Dive layer 2, each entry represents a large-block.
425          */
426         layer2_offset = layer1->phys_offset +
427                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
428         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
429         KKASSERT(*errorp == 0);
430
431         /*
432          * Check CRC if not allocating into uninitialized space (which we
433          * aren't when reserving space).
434          */
435         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
436                 Debugger("CRC FAILED: LAYER2");
437         }
438
439         /*
440          * Skip the layer if the zone is owned by someone other then us.
441          */
442         if (layer2->zone && layer2->zone != zone) {
443                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
444                 goto again;
445         }
446         if (offset < layer2->append_off) {
447                 next_offset += layer2->append_off - offset;
448                 goto again;
449         }
450
451         /*
452          * We need the lock from this point on.  We have to re-check zone
453          * ownership after acquiring the lock and also check for reservations.
454          */
455         hammer_lock_ex(&hmp->blkmap_lock);
456
457         if (layer2->zone && layer2->zone != zone) {
458                 hammer_unlock(&hmp->blkmap_lock);
459                 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
460                 goto again;
461         }
462         if (offset < layer2->append_off) {
463                 hammer_unlock(&hmp->blkmap_lock);
464                 next_offset += layer2->append_off - offset;
465                 goto again;
466         }
467
468         /*
469          * The bigblock might be reserved by another zone.  If it is reserved
470          * by our zone we may have to move next_offset past the append_off.
471          */
472         base_off = (next_offset &
473                     (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
474                     HAMMER_ZONE_RAW_BUFFER;
475         resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
476         if (resv) {
477                 if (resv->zone != zone) {
478                         hammer_unlock(&hmp->blkmap_lock);
479                         next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
480                                       ~HAMMER_LARGEBLOCK_MASK64;
481                         goto again;
482                 }
483                 if (offset < resv->append_off) {
484                         hammer_unlock(&hmp->blkmap_lock);
485                         next_offset += resv->append_off - offset;
486                         goto again;
487                 }
488                 ++resv->refs;
489                 resx = NULL;
490         } else {
491                 resx = kmalloc(sizeof(*resv), M_HAMMER,
492                                M_WAITOK | M_ZERO | M_USE_RESERVE);
493                 resx->refs = 1;
494                 resx->zone = zone;
495                 resx->zone_offset = base_off;
496                 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
497                 KKASSERT(resv == NULL);
498                 resv = resx;
499         }
500         resv->append_off = offset + bytes;
501
502         /*
503          * If we are not reserving a whole buffer but are at the start of
504          * a new block, call hammer_bnew() to avoid a disk read.
505          *
506          * If we are reserving a whole buffer (or more), the caller will
507          * probably use a direct read, so do nothing.
508          */
509         if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
510                 hammer_bnew(hmp, next_offset, errorp, &buffer3);
511         }
512
513         /*
514          * Adjust our iterator and alloc_offset.  The layer1 and layer2
515          * space beyond alloc_offset is uninitialized.  alloc_offset must
516          * be big-block aligned.
517          */
518         blockmap->next_offset = next_offset + bytes;
519         hammer_unlock(&hmp->blkmap_lock);
520
521 failed:
522         if (buffer1)
523                 hammer_rel_buffer(buffer1, 0);
524         if (buffer2)
525                 hammer_rel_buffer(buffer2, 0);
526         if (buffer3)
527                 hammer_rel_buffer(buffer3, 0);
528         hammer_rel_volume(root_volume, 0);
529         *zone_offp = next_offset;
530
531         return(resv);
532 }
533
534 /*
535  * A record with a storage reservation calls this function when it is
536  * being freed.  The storage may or may not have actually been allocated.
537  *
538  * This function removes the lock that prevented other entities from
539  * allocating out of the storage or removing the zone assignment.
540  */
541 void
542 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
543 {
544         KKASSERT(resv->refs > 0);
545         if (--resv->refs == 0) {
546                 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
547                 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
548                 kfree(resv, M_HAMMER);
549                 --hammer_count_reservations;
550         }
551 }
552
553 /*
554  * This ensures that no data reallocations will take place at the specified
555  * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
556  * preventing deleted data space, which has no UNDO, from being reallocated 
557  * too quickly.
558  */
559 static int
560 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
561                         hammer_off_t zone2_offset)
562 {
563         int error;
564
565         if (resv == NULL) {
566                 resv = kmalloc(sizeof(*resv), M_HAMMER,
567                                M_WAITOK | M_ZERO | M_USE_RESERVE);
568                 resv->refs = 1; /* ref for on-delay list */
569                 resv->zone_offset = zone2_offset;
570                 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
571                 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
572                         error = EAGAIN;
573                         kfree(resv, M_HAMMER);
574                 } else {
575                         error = 0;
576                         ++hammer_count_reservations;
577                 }
578         } else if (resv->flags & HAMMER_RESF_ONDELAY) {
579                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
580                 resv->flush_group = hmp->flusher.next + 1;
581                 error = 0;
582         } else {
583                 ++resv->refs;   /* ref for on-delay list */
584                 error = 0;
585         }
586         if (error == 0) {
587                 resv->flags |= HAMMER_RESF_ONDELAY;
588                 resv->flush_group = hmp->flusher.next + 1;
589                 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
590         }
591         return(error);
592 }
593
594 void
595 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
596 {
597         KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
598         resv->flags &= ~HAMMER_RESF_ONDELAY;
599         TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
600         hammer_blockmap_reserve_complete(hmp, resv);
601 }
602
603 /*
604  * Backend function - free (offset, bytes) in a zone.
605  */
606 void
607 hammer_blockmap_free(hammer_transaction_t trans,
608                      hammer_off_t zone_offset, int bytes)
609 {
610         hammer_mount_t hmp;
611         hammer_volume_t root_volume;
612         hammer_reserve_t resv;
613         hammer_blockmap_t blockmap;
614         hammer_blockmap_t freemap;
615         struct hammer_blockmap_layer1 *layer1;
616         struct hammer_blockmap_layer2 *layer2;
617         hammer_buffer_t buffer1 = NULL;
618         hammer_buffer_t buffer2 = NULL;
619         hammer_off_t layer1_offset;
620         hammer_off_t layer2_offset;
621         hammer_off_t base_off;
622         int error;
623         int zone;
624
625         if (bytes == 0)
626                 return;
627         hmp = trans->hmp;
628
629         /*
630          * Alignment
631          */
632         bytes = (bytes + 15) & ~15;
633         KKASSERT(bytes <= HAMMER_XBUFSIZE);
634         KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 
635                   ~HAMMER_LARGEBLOCK_MASK64) == 0);
636
637         /*
638          * Basic zone validation & locking
639          */
640         zone = HAMMER_ZONE_DECODE(zone_offset);
641         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
642         root_volume = trans->rootvol;
643         error = 0;
644
645         blockmap = &hmp->blockmap[zone];
646         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
647
648         /*
649          * Dive layer 1.
650          */
651         layer1_offset = freemap->phys_offset +
652                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
653         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
654         KKASSERT(error == 0);
655         KKASSERT(layer1->phys_offset &&
656                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
657         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
658                 Debugger("CRC FAILED: LAYER1");
659         }
660
661         /*
662          * Dive layer 2, each entry represents a large-block.
663          */
664         layer2_offset = layer1->phys_offset +
665                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
666         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
667         KKASSERT(error == 0);
668         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
669                 Debugger("CRC FAILED: LAYER2");
670         }
671
672         hammer_lock_ex(&hmp->blkmap_lock);
673
674         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
675
676         /*
677          * Freeing previously allocated space
678          */
679         KKASSERT(layer2->zone == zone);
680         layer2->bytes_free += bytes;
681         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
682         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
683                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
684 again:
685                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
686                                  base_off);
687                 if (resv) {
688                         /*
689                          * Portions of this block have been reserved, do
690                          * not free it.
691                          *
692                          * Make sure the reservation remains through
693                          * the next flush cycle so potentially undoable
694                          * data is not overwritten.
695                          */
696                         KKASSERT(resv->zone == zone);
697                         hammer_reserve_setdelay(hmp, resv, base_off);
698                 } else if ((blockmap->next_offset ^ zone_offset) &
699                             ~HAMMER_LARGEBLOCK_MASK64) {
700                         /*
701                          * Our iterator is not in the now-free big-block
702                          * and we can release it.
703                          *
704                          * Make sure the reservation remains through
705                          * the next flush cycle so potentially undoable
706                          * data is not overwritten.
707                          */
708                         if (hammer_reserve_setdelay(hmp, resv, base_off))
709                                 goto again;
710                         KKASSERT(layer2->zone == zone);
711                         hammer_del_buffers(hmp,
712                                            zone_offset &
713                                               ~HAMMER_LARGEBLOCK_MASK64,
714                                            base_off,
715                                            HAMMER_LARGEBLOCK_SIZE);
716                         layer2->zone = 0;
717                         layer2->append_off = 0;
718                         hammer_modify_buffer(trans, buffer1,
719                                              layer1, sizeof(*layer1));
720                         ++layer1->blocks_free;
721                         layer1->layer1_crc = crc32(layer1,
722                                                    HAMMER_LAYER1_CRCSIZE);
723                         hammer_modify_buffer_done(buffer1);
724                         hammer_modify_volume_field(trans,
725                                         trans->rootvol,
726                                         vol0_stat_freebigblocks);
727                         ++root_volume->ondisk->vol0_stat_freebigblocks;
728                         hmp->copy_stat_freebigblocks =
729                            root_volume->ondisk->vol0_stat_freebigblocks;
730                         hammer_modify_volume_done(trans->rootvol);
731                 }
732         }
733
734         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
735         hammer_modify_buffer_done(buffer2);
736         hammer_unlock(&hmp->blkmap_lock);
737
738         if (buffer1)
739                 hammer_rel_buffer(buffer1, 0);
740         if (buffer2)
741                 hammer_rel_buffer(buffer2, 0);
742 }
743
744 /*
745  * Backend function - finalize (offset, bytes) in a zone.
746  *
747  * Allocate space that was previously reserved by the frontend.
748  */
749 void
750 hammer_blockmap_finalize(hammer_transaction_t trans,
751                          hammer_off_t zone_offset, int bytes)
752 {
753         hammer_mount_t hmp;
754         hammer_volume_t root_volume;
755         hammer_blockmap_t blockmap;
756         hammer_blockmap_t freemap;
757         struct hammer_blockmap_layer1 *layer1;
758         struct hammer_blockmap_layer2 *layer2;
759         hammer_buffer_t buffer1 = NULL;
760         hammer_buffer_t buffer2 = NULL;
761         hammer_off_t layer1_offset;
762         hammer_off_t layer2_offset;
763         int error;
764         int zone;
765         int offset;
766
767         if (bytes == 0)
768                 return;
769         hmp = trans->hmp;
770
771         /*
772          * Alignment
773          */
774         bytes = (bytes + 15) & ~15;
775         KKASSERT(bytes <= HAMMER_XBUFSIZE);
776
777         /*
778          * Basic zone validation & locking
779          */
780         zone = HAMMER_ZONE_DECODE(zone_offset);
781         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
782         root_volume = trans->rootvol;
783         error = 0;
784
785         blockmap = &hmp->blockmap[zone];
786         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
787
788         /*
789          * Dive layer 1.
790          */
791         layer1_offset = freemap->phys_offset +
792                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
793         layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
794         KKASSERT(error == 0);
795         KKASSERT(layer1->phys_offset &&
796                  layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
797         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
798                 Debugger("CRC FAILED: LAYER1");
799         }
800
801         /*
802          * Dive layer 2, each entry represents a large-block.
803          */
804         layer2_offset = layer1->phys_offset +
805                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
806         layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
807         KKASSERT(error == 0);
808         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
809                 Debugger("CRC FAILED: LAYER2");
810         }
811
812         hammer_lock_ex(&hmp->blkmap_lock);
813
814         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
815
816         /*
817          * Finalize some or all of the space covered by a current
818          * reservation.  An allocation in the same layer may have
819          * already assigned ownership.
820          */
821         if (layer2->zone == 0) {
822                 hammer_modify_buffer(trans, buffer1,
823                                      layer1, sizeof(*layer1));
824                 --layer1->blocks_free;
825                 layer1->layer1_crc = crc32(layer1,
826                                            HAMMER_LAYER1_CRCSIZE);
827                 hammer_modify_buffer_done(buffer1);
828                 layer2->zone = zone;
829                 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
830                 KKASSERT(layer2->append_off == 0);
831                 hammer_modify_volume_field(trans,
832                                 trans->rootvol,
833                                 vol0_stat_freebigblocks);
834                 --root_volume->ondisk->vol0_stat_freebigblocks;
835                 hmp->copy_stat_freebigblocks =
836                    root_volume->ondisk->vol0_stat_freebigblocks;
837                 hammer_modify_volume_done(trans->rootvol);
838         }
839         if (layer2->zone != zone)
840                 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
841         KKASSERT(layer2->zone == zone);
842         layer2->bytes_free -= bytes;
843
844         /*
845          * Finalizations can occur out of order, or combined with allocations.
846          * append_off must be set to the highest allocated offset.
847          */
848         offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
849         if (layer2->append_off < offset)
850                 layer2->append_off = offset;
851
852         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
853         hammer_modify_buffer_done(buffer2);
854         hammer_unlock(&hmp->blkmap_lock);
855
856         if (buffer1)
857                 hammer_rel_buffer(buffer1, 0);
858         if (buffer2)
859                 hammer_rel_buffer(buffer2, 0);
860 }
861
862 /*
863  * Return the number of free bytes in the big-block containing the
864  * specified blockmap offset.
865  */
866 int
867 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
868                         int *curp, int *errorp)
869 {
870         hammer_volume_t root_volume;
871         hammer_blockmap_t blockmap;
872         hammer_blockmap_t freemap;
873         struct hammer_blockmap_layer1 *layer1;
874         struct hammer_blockmap_layer2 *layer2;
875         hammer_buffer_t buffer = NULL;
876         hammer_off_t layer1_offset;
877         hammer_off_t layer2_offset;
878         int bytes;
879         int zone;
880
881         zone = HAMMER_ZONE_DECODE(zone_offset);
882         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
883         root_volume = hammer_get_root_volume(hmp, errorp);
884         if (*errorp) {
885                 *curp = 0;
886                 return(0);
887         }
888         blockmap = &hmp->blockmap[zone];
889         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
890
891         /*
892          * Dive layer 1.
893          */
894         layer1_offset = freemap->phys_offset +
895                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
896         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
897         KKASSERT(*errorp == 0);
898         KKASSERT(layer1->phys_offset);
899         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
900                 Debugger("CRC FAILED: LAYER1");
901         }
902
903         /*
904          * Dive layer 2, each entry represents a large-block.
905          */
906         layer2_offset = layer1->phys_offset +
907                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
908         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
909         KKASSERT(*errorp == 0);
910         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
911                 Debugger("CRC FAILED: LAYER2");
912         }
913         KKASSERT(layer2->zone == zone);
914
915         bytes = layer2->bytes_free;
916
917         if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
918                 *curp = 0;
919         else
920                 *curp = 1;
921         if (buffer)
922                 hammer_rel_buffer(buffer, 0);
923         hammer_rel_volume(root_volume, 0);
924         if (hammer_debug_general & 0x0800) {
925                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
926                         zone_offset, bytes);
927         }
928         return(bytes);
929 }
930
931
932 /*
933  * Lookup a blockmap offset.
934  */
935 hammer_off_t
936 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
937                        int *errorp)
938 {
939         hammer_volume_t root_volume;
940         hammer_blockmap_t freemap;
941         struct hammer_blockmap_layer1 *layer1;
942         struct hammer_blockmap_layer2 *layer2;
943         hammer_buffer_t buffer = NULL;
944         hammer_off_t layer1_offset;
945         hammer_off_t layer2_offset;
946         hammer_off_t result_offset;
947         hammer_off_t base_off;
948         hammer_reserve_t resv;
949         int zone;
950
951         /*
952          * Calculate the zone-2 offset.
953          */
954         zone = HAMMER_ZONE_DECODE(zone_offset);
955         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
956
957         result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
958                         HAMMER_ZONE_RAW_BUFFER;
959
960         /*
961          * We can actually stop here, normal blockmaps are now direct-mapped
962          * onto the freemap and so represent zone-2 addresses.
963          */
964         if (hammer_verify_zone == 0) {
965                 *errorp = 0;
966                 return(result_offset);
967         }
968
969         /*
970          * Validate the allocation zone
971          */
972         root_volume = hammer_get_root_volume(hmp, errorp);
973         if (*errorp)
974                 return(0);
975         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
976         KKASSERT(freemap->phys_offset != 0);
977
978         /*
979          * Dive layer 1.
980          */
981         layer1_offset = freemap->phys_offset +
982                         HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
983         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
984         KKASSERT(*errorp == 0);
985         KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
986         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
987                 Debugger("CRC FAILED: LAYER1");
988         }
989
990         /*
991          * Dive layer 2, each entry represents a large-block.
992          */
993         layer2_offset = layer1->phys_offset +
994                         HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
995         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
996
997         KKASSERT(*errorp == 0);
998         if (layer2->zone == 0) {
999                 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1000                 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1001                                  base_off);
1002                 KKASSERT(resv && resv->zone == zone);
1003
1004         } else if (layer2->zone != zone) {
1005                 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1006                         layer2->zone, zone);
1007         }
1008         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1009                 Debugger("CRC FAILED: LAYER2");
1010         }
1011
1012         if (buffer)
1013                 hammer_rel_buffer(buffer, 0);
1014         hammer_rel_volume(root_volume, 0);
1015         if (hammer_debug_general & 0x0800) {
1016                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1017                         zone_offset, result_offset);
1018         }
1019         return(result_offset);
1020 }
1021
1022
1023 /*
1024  * Check space availability
1025  */
1026 int
1027 hammer_checkspace(hammer_mount_t hmp)
1028 {
1029         const int in_size = sizeof(struct hammer_inode_data) +
1030                             sizeof(union hammer_btree_elm);
1031         const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1032         const int blkconv = HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE;
1033         const int limit_inodes = HAMMER_LARGEBLOCK_SIZE / in_size;
1034         const int limit_recs = HAMMER_LARGEBLOCK_SIZE / rec_size;
1035         int usedbigblocks;;
1036
1037         /*
1038          * Quick and very dirty, not even using the right units (bigblocks
1039          * vs 16K buffers), but this catches almost everything.
1040          */
1041         if (hmp->copy_stat_freebigblocks >= hmp->rsv_databufs + 8 &&
1042             hmp->rsv_inodes < limit_inodes &&
1043             hmp->rsv_recs < limit_recs &&
1044             hmp->rsv_databytes < HAMMER_LARGEBLOCK_SIZE) {
1045                 return(0);
1046         }
1047
1048         /*
1049          * Do a more involved check
1050          */
1051         usedbigblocks = (hmp->rsv_inodes * in_size / HAMMER_LARGEBLOCK_SIZE) +
1052                         (hmp->rsv_recs * rec_size / HAMMER_LARGEBLOCK_SIZE) +
1053                         hmp->rsv_databufs / blkconv + 6;
1054         if (hmp->copy_stat_freebigblocks >= usedbigblocks)
1055                 return(0);
1056         return (ENOSPC);
1057 }
1058