HAMMER 53A/Many: Read and write performance enhancements, etc.
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.15 2008/06/07 07:41:51 dillon Exp $
35  */
36
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41
42 static hammer_off_t hammer_find_hole(hammer_mount_t hmp,
43                                    hammer_holes_t holes, int bytes);
44 static void hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
45                                    hammer_off_t offset, int bytes);
46 static void hammer_clean_holes(hammer_mount_t hmp, hammer_holes_t holes,
47                                    hammer_off_t offset);
48
49 /*
50  * Allocate a big-block from the freemap and stuff it into the blockmap
51  * at layer1/layer2.
52  */
53 static void
54 hammer_blockmap_llalloc(hammer_transaction_t trans,
55                 hammer_off_t zone_offset, int *errorp,
56                 hammer_buffer_t buffer1, hammer_blockmap_layer1_t layer1,
57                 hammer_buffer_t buffer2, hammer_blockmap_layer2_t layer2)
58 {
59         hammer_off_t zone2_offset;
60
61         zone2_offset = hammer_freemap_alloc(trans, zone_offset, errorp);
62         if (*errorp)
63                 return;
64         hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
65         KKASSERT(layer1->blocks_free);
66         --layer1->blocks_free;
67         layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
68         hammer_modify_buffer_done(buffer1);
69         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
70         bzero(layer2, sizeof(*layer2));
71         layer2->u.phys_offset = zone2_offset;
72         layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
73         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
74         hammer_modify_buffer_done(buffer2);
75 }
76
77
78 /*
79  * Allocate bytes from a zone
80  */
81 hammer_off_t
82 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
83                       int bytes, int *errorp)
84 {
85         hammer_volume_t root_volume;
86         hammer_blockmap_t rootmap;
87         struct hammer_blockmap_layer1 *layer1;
88         struct hammer_blockmap_layer2 *layer2;
89         hammer_buffer_t buffer1 = NULL;
90         hammer_buffer_t buffer2 = NULL;
91         hammer_buffer_t buffer3 = NULL;
92         hammer_off_t tmp_offset;
93         hammer_off_t next_offset;
94         hammer_off_t layer1_offset;
95         hammer_off_t layer2_offset;
96         hammer_off_t bigblock_offset;
97         int loops = 0;
98         int skip_amount;
99         int used_hole;
100
101         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
102         root_volume = hammer_get_root_volume(trans->hmp, errorp);
103         if (*errorp)
104                 return(0);
105         rootmap = &trans->hmp->blockmap[zone];
106         KKASSERT(rootmap->phys_offset != 0);
107         KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
108                  HAMMER_ZONE_RAW_BUFFER_INDEX);
109         KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
110         KKASSERT(HAMMER_ZONE_DECODE(rootmap->next_offset) == zone);
111
112         /*
113          * Deal with alignment and buffer-boundary issues.
114          *
115          * Be careful, certain primary alignments are used below to allocate
116          * new blockmap blocks.
117          */
118         bytes = (bytes + 7) & ~7;
119         KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
120
121         lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
122
123         /*
124          * Try to use a known-free hole, otherwise append.
125          */
126         next_offset = hammer_find_hole(trans->hmp, &trans->hmp->holes[zone],
127                                        bytes);
128         if (next_offset == 0) {
129                 next_offset = rootmap->next_offset;
130                 used_hole = 0;
131         } else {
132                 used_hole = 1;
133         }
134
135 again:
136         /*
137          * The allocation request may not cross a buffer boundary.
138          */
139         tmp_offset = next_offset + bytes - 1;
140         if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
141                 skip_amount = HAMMER_BUFSIZE - 
142                               ((int)next_offset & HAMMER_BUFMASK);
143                 hammer_add_hole(trans->hmp, &trans->hmp->holes[zone],
144                                 next_offset, skip_amount);
145                 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
146         }
147
148         /*
149          * Dive layer 1.  If we are starting a new layer 1 entry,
150          * allocate a layer 2 block for it.
151          */
152         layer1_offset = rootmap->phys_offset +
153                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
154         layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
155         KKASSERT(*errorp == 0);
156         KKASSERT(next_offset <= rootmap->alloc_offset);
157
158         /*
159          * Check CRC if not allocating into uninitialized space
160          */
161         if ((next_offset != rootmap->alloc_offset) ||
162             (next_offset & HAMMER_BLOCKMAP_LAYER2_MASK)) {
163                 if (layer1->layer1_crc != crc32(layer1,
164                                                 HAMMER_LAYER1_CRCSIZE)) {
165                         Debugger("CRC FAILED: LAYER1");
166                 }
167         }
168
169         /*
170          * Allocate layer2 backing store in layer1 if necessary.  next_offset
171          * can skip to a bigblock boundary but alloc_offset is at least
172          * bigblock-aligned so that's ok.
173          */
174         if ((next_offset == rootmap->alloc_offset &&
175             (next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0) ||
176             layer1->phys_offset == HAMMER_BLOCKMAP_FREE
177         ) {
178                 KKASSERT((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0);
179                 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
180                 bzero(layer1, sizeof(*layer1));
181                 layer1->phys_offset =
182                         hammer_freemap_alloc(trans, next_offset, errorp);
183                 layer1->blocks_free = HAMMER_BLOCKMAP_RADIX2;
184                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
185                 hammer_modify_buffer_done(buffer1);
186                 KKASSERT(*errorp == 0);
187         }
188         KKASSERT(layer1->phys_offset);
189
190         /*
191          * If layer1 indicates no free blocks in layer2 and our alloc_offset
192          * is not in layer2, skip layer2 entirely.
193          */
194         if (layer1->blocks_free == 0 &&
195             ((next_offset ^ rootmap->alloc_offset) & ~HAMMER_BLOCKMAP_LAYER2_MASK) != 0) {
196                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2_MASK) &
197                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
198                 if (next_offset >= trans->hmp->zone_limits[zone]) {
199                         hkprintf("blockmap wrap1\n");
200                         next_offset = HAMMER_ZONE_ENCODE(zone, 0);
201                         if (++loops == 2) {     /* XXX poor-man's */
202                                 next_offset = 0;
203                                 *errorp = ENOSPC;
204                                 goto done;
205                         }
206                 }
207                 goto again;
208         }
209
210         /*
211          * Dive layer 2, each entry represents a large-block.
212          */
213         layer2_offset = layer1->phys_offset +
214                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215         layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2);
216         KKASSERT(*errorp == 0);
217
218         /*
219          * Check CRC if not allocating into uninitialized space
220          */
221         if (next_offset != rootmap->alloc_offset ||
222             (next_offset & HAMMER_LARGEBLOCK_MASK64)) {
223                 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
224                         Debugger("CRC FAILED: LAYER2");
225                 }
226         }
227
228         if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
229                 /*
230                  * We are at the beginning of a new bigblock
231                  */
232                 if (next_offset == rootmap->alloc_offset ||
233                     layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
234                         /*
235                          * Allocate the bigblock in layer2 if diving into
236                          * uninitialized space or if the block was previously
237                          * freed.
238                          */
239                         hammer_blockmap_llalloc(trans,
240                                                 next_offset, errorp,
241                                                 buffer1, layer1,
242                                                 buffer2, layer2);
243                         KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
244                 } else if (layer2->bytes_free != HAMMER_LARGEBLOCK_SIZE) {
245                         /*
246                          * We have encountered a block that is already
247                          * partially allocated.  We must skip this block.
248                          */
249                         next_offset += HAMMER_LARGEBLOCK_SIZE;
250                         if (next_offset >= trans->hmp->zone_limits[zone]) {
251                                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
252                                 hkprintf("blockmap wrap2\n");
253                                 if (++loops == 2) {     /* XXX poor-man's */
254                                         next_offset = 0;
255                                         *errorp = ENOSPC;
256                                         goto done;
257                                 }
258                         }
259                         goto again;
260                 }
261         } else {
262                 /*
263                  * We are appending within a bigblock.  It is possible that
264                  * the blockmap has been marked completely free via a prior
265                  * pruning operation.  We no longer reset the append index
266                  * for that case because it compromises the UNDO by allowing
267                  * data overwrites.
268                  */
269                 /*
270                 KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
271                 */
272         }
273
274         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
275         layer2->bytes_free -= bytes;
276         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
277         hammer_modify_buffer_done(buffer2);
278         KKASSERT(layer2->bytes_free >= 0);
279
280         /*
281          * If the buffer was completely free we do not have to read it from
282          * disk, call hammer_bnew() to instantiate it.
283          */
284         if ((next_offset & HAMMER_BUFMASK) == 0) {
285                 bigblock_offset = layer2->u.phys_offset +
286                                   (next_offset & HAMMER_LARGEBLOCK_MASK64);
287                 hammer_bnew(trans->hmp, bigblock_offset, errorp, &buffer3);
288         }
289
290         /*
291          * Adjust our iterator and alloc_offset.  The layer1 and layer2
292          * space beyond alloc_offset is uninitialized.  alloc_offset must
293          * be big-block aligned.
294          */
295         if (used_hole == 0) {
296                 hammer_modify_volume(trans, root_volume, NULL, 0);
297                 rootmap->next_offset = next_offset + bytes;
298                 if (rootmap->alloc_offset < rootmap->next_offset) {
299                         rootmap->alloc_offset =
300                             (rootmap->next_offset + HAMMER_LARGEBLOCK_MASK) &
301                             ~HAMMER_LARGEBLOCK_MASK64;
302                 }
303                 hammer_modify_volume_done(root_volume);
304         }
305 done:
306         if (buffer1)
307                 hammer_rel_buffer(buffer1, 0);
308         if (buffer2)
309                 hammer_rel_buffer(buffer2, 0);
310         if (buffer3)
311                 hammer_rel_buffer(buffer3, 0);
312         hammer_rel_volume(root_volume, 0);
313         lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
314         return(next_offset);
315 }
316
317 /*
318  * Front-end blockmap reservation
319  *
320  * This code reserves bytes out of a blockmap without committing to any
321  * meta-data modifications, allowing the front-end to issue disk write I/O
322  * for large blocks of data without having to queue the BIOs to the back-end.
323  * If the reservation winds up not being used, for example due to a crash,
324  * the reblocker should eventually come along and clean it up.
325  *
326  * This code will attempt to assign free big-blocks to the blockmap to
327  * accomodate the request.
328  *
329  * If we return 0 a reservation was not possible and the caller must queue
330  * the I/O to the backend.
331  */
332 hammer_off_t
333 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes, int *errorp)
334 {
335         hammer_volume_t root_volume;
336         hammer_blockmap_t rootmap;
337         struct hammer_blockmap_layer1 *layer1;
338         struct hammer_blockmap_layer2 *layer2;
339         hammer_buffer_t buffer1 = NULL;
340         hammer_buffer_t buffer2 = NULL;
341         hammer_buffer_t buffer3 = NULL;
342         hammer_off_t tmp_offset;
343         hammer_off_t next_offset;
344         hammer_off_t layer1_offset;
345         hammer_off_t layer2_offset;
346         hammer_off_t bigblock_offset;
347         int loops = 0;
348         int skip_amount;
349
350         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
351         root_volume = hammer_get_root_volume(hmp, errorp);
352         if (*errorp)
353                 return(0);
354         rootmap = &hmp->blockmap[zone];
355         KKASSERT(rootmap->phys_offset != 0);
356         KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
357                  HAMMER_ZONE_RAW_BUFFER_INDEX);
358         KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
359         KKASSERT(HAMMER_ZONE_DECODE(rootmap->next_offset) == zone);
360
361         /*
362          * Deal with alignment and buffer-boundary issues.
363          *
364          * Be careful, certain primary alignments are used below to allocate
365          * new blockmap blocks.
366          */
367         bytes = (bytes + 7) & ~7;
368         KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
369
370         lockmgr(&hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
371
372         /*
373          * Starting zoneX offset.  The reservation code always wraps at the
374          * alloc_offset (the allocation code is allowed to go through to the
375          * limit).
376          */
377         next_offset = rootmap->next_offset;
378 again:
379         if (next_offset >= rootmap->alloc_offset) {
380                 if (++loops == 2) {     /* XXX poor-man's */
381                         next_offset = 0;
382                         *errorp = ENOSPC;
383                         goto done;
384                 }
385                 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
386         }
387
388         /*
389          * The allocation request may not cross a buffer boundary.
390          */
391         tmp_offset = next_offset + bytes - 1;
392         if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
393                 skip_amount = HAMMER_BUFSIZE - 
394                               ((int)next_offset & HAMMER_BUFMASK);
395                 hammer_add_hole(hmp, &hmp->holes[zone],
396                                 next_offset, skip_amount);
397                 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
398         }
399
400         /*
401          * Dive layer 1.
402          */
403         layer1_offset = rootmap->phys_offset +
404                         HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
405         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
406         KKASSERT(*errorp == 0);
407         KKASSERT(next_offset <= rootmap->alloc_offset);
408
409         /*
410          * Check CRC if not allocating into uninitialized space
411          */
412         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
413                 Debugger("CRC FAILED: LAYER1");
414         }
415         KKASSERT(layer1->phys_offset);
416
417         /*
418          * If layer1 indicates no free blocks in layer2 and our alloc_offset
419          * is not in layer2, skip layer2 entirely.
420          */
421         if (layer1->blocks_free == 0 &&
422             ((next_offset ^ rootmap->alloc_offset) & ~HAMMER_BLOCKMAP_LAYER2_MASK) != 0) {
423                 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2_MASK) &
424                               ~HAMMER_BLOCKMAP_LAYER2_MASK;
425                 goto again;
426         }
427
428         /*
429          * Dive layer 2, each entry represents a large-block.
430          */
431         layer2_offset = layer1->phys_offset +
432                         HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
433         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
434         KKASSERT(*errorp == 0);
435
436         /*
437          * Check CRC if not allocating into uninitialized space
438          */
439         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
440                 Debugger("CRC FAILED: LAYER2");
441         }
442
443         if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
444                 /*
445                  * We are at the beginning of a new bigblock
446                  */
447                 if (layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
448                         struct hammer_transaction trans;
449
450                         hammer_start_transaction(&trans, hmp);
451                         if (hammer_sync_lock_sh_try(&trans) == 0) {
452                                 hammer_blockmap_llalloc(&trans,
453                                                         next_offset, errorp,
454                                                         buffer1, layer1,
455                                                         buffer2, layer2);
456                                 hammer_sync_unlock(&trans);
457                         } else {
458                                 hkprintf("e");
459                                 hammer_sync_lock_sh(&trans);
460                                 hammer_blockmap_llalloc(&trans,
461                                                         next_offset, errorp,
462                                                         buffer1, layer1,
463                                                         buffer2, layer2);
464                                 hammer_sync_unlock(&trans);
465                                 /* *errorp = EDEADLK; */
466                         }
467                         hammer_done_transaction(&trans);
468                         if (layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
469                                 next_offset = 0;
470                                 goto done;
471                         }
472                 } else if (layer2->bytes_free != HAMMER_LARGEBLOCK_SIZE) {
473                         /*
474                          * We have encountered a block that is already
475                          * partially allocated.  We must skip this block.
476                          */
477                         next_offset += HAMMER_LARGEBLOCK_SIZE;
478                         goto again;
479                 }
480         } else {
481                 /*
482                  * We are appending within a bigblock.  It is possible that
483                  * the blockmap has been marked completely free via a prior
484                  * pruning operation.  We no longer reset the append index
485                  * for that case because it compromises the UNDO by allowing
486                  * data overwrites.
487                  */
488                 KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
489                 KKASSERT(layer2->bytes_free >= HAMMER_LARGEBLOCK_SIZE - (int)(next_offset & HAMMER_LARGEBLOCK_MASK64));
490         }
491
492         /*
493          * The reservation code does not modify layer2->bytes_free, it
494          * simply adjusts next_offset.
495          */
496         KKASSERT(layer2->bytes_free >= 0);
497
498         /*
499          * Reservations are used for direct I/O, make sure there is no
500          * zone-2 bp cached in the device layer.
501          */
502         bigblock_offset = layer2->u.phys_offset +
503                           (next_offset & HAMMER_LARGEBLOCK_MASK64);
504         hammer_binval(hmp, bigblock_offset);
505
506         /*
507          * Adjust our iterator and alloc_offset.  The layer1 and layer2
508          * space beyond alloc_offset is uninitialized.  alloc_offset must
509          * be big-block aligned.
510          */
511         rootmap->next_offset = next_offset + bytes;
512 done:
513         if (buffer1)
514                 hammer_rel_buffer(buffer1, 0);
515         if (buffer2)
516                 hammer_rel_buffer(buffer2, 0);
517         if (buffer3)
518                 hammer_rel_buffer(buffer3, 0);
519         hammer_rel_volume(root_volume, 0);
520         lockmgr(&hmp->blockmap_lock, LK_RELEASE);
521         return(next_offset);
522 }
523
524 /*
525  * Free (offset,bytes) in a zone.
526  *
527  * If bytes is negative we are actually allocating previously reserved
528  * space in the zone.
529  */
530 void
531 hammer_blockmap_free(hammer_transaction_t trans,
532                      hammer_off_t bmap_off, int bytes)
533 {
534         hammer_volume_t root_volume;
535         hammer_blockmap_t rootmap;
536         struct hammer_blockmap_layer1 *layer1;
537         struct hammer_blockmap_layer2 *layer2;
538         hammer_buffer_t buffer1 = NULL;
539         hammer_buffer_t buffer2 = NULL;
540         hammer_off_t layer1_offset;
541         hammer_off_t layer2_offset;
542         int error;
543         int zone;
544
545         if (bytes >= 0) {
546                 bytes = (bytes + 7) & ~7;
547                 KKASSERT(bytes <= HAMMER_BUFSIZE);
548                 KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) & 
549                           ~HAMMER_LARGEBLOCK_MASK64) == 0);
550         } else {
551                 KKASSERT(bytes >= -HAMMER_BUFSIZE);
552         }
553         zone = HAMMER_ZONE_DECODE(bmap_off);
554         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
555         root_volume = hammer_get_root_volume(trans->hmp, &error);
556         if (error)
557                 return;
558
559         lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
560
561         rootmap = &trans->hmp->blockmap[zone];
562         KKASSERT(rootmap->phys_offset != 0);
563         KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
564                  HAMMER_ZONE_RAW_BUFFER_INDEX);
565         KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
566
567         if (bmap_off >= rootmap->alloc_offset) {
568                 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
569                       bmap_off, rootmap->alloc_offset);
570                 goto done;
571         }
572
573         /*
574          * Dive layer 1.
575          */
576         layer1_offset = rootmap->phys_offset +
577                         HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
578         layer1 = hammer_bread(trans->hmp, layer1_offset, &error, &buffer1);
579         KKASSERT(error == 0);
580         KKASSERT(layer1->phys_offset);
581         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
582                 Debugger("CRC FAILED: LAYER1");
583         }
584
585         /*
586          * Dive layer 2, each entry represents a large-block.
587          */
588         layer2_offset = layer1->phys_offset +
589                         HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
590         layer2 = hammer_bread(trans->hmp, layer2_offset, &error, &buffer2);
591         KKASSERT(error == 0);
592         KKASSERT(layer2->u.phys_offset);
593         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
594                 Debugger("CRC FAILED: LAYER2");
595         }
596
597         hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
598         layer2->bytes_free += bytes;
599         KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
600
601         /*
602          * If the big-block is free, return it to the free pool.  The layer2
603          * infrastructure is left intact even if the entire layer2 becomes
604          * free.
605          *
606          * At the moment if our iterator is in a bigblock that becomes
607          * wholely free, we have to leave the block allocated and we cannot
608          * reset the iterator because there may be UNDOs on-disk that
609          * reference areas of that block and we cannot overwrite those areas.
610          */
611         if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
612                 if ((rootmap->next_offset ^ bmap_off) &
613                     ~HAMMER_LARGEBLOCK_MASK64) {
614                         /*
615                          * Our iterator is not in the now-free big-block
616                          * and we can release it.
617                          */
618                         hammer_clean_holes(trans->hmp,
619                                            &trans->hmp->holes[zone],
620                                            bmap_off);
621                         hammer_freemap_free(trans, layer2->u.phys_offset,
622                                             bmap_off, &error);
623                         hammer_clrxlate_buffer(trans->hmp,
624                                                layer2->u.phys_offset);
625                         layer2->u.phys_offset = HAMMER_BLOCKMAP_FREE;
626
627                         hammer_modify_buffer(trans, buffer1,
628                                              layer1, sizeof(*layer1));
629                         ++layer1->blocks_free;
630 #if 0
631                         /*
632                          * This commented out code would release the layer2
633                          * bigblock.  We do not want to do this, at least
634                          * not right now.
635                          *
636                          * This also may be incomplete.
637                          */
638                         if (layer1->blocks_free == HAMMER_BLOCKMAP_RADIX2) {
639                                 hammer_freemap_free(
640                                         trans, layer1->phys_offset,
641                                         bmap_off & ~HAMMER_BLOCKMAP_LAYER2_MASK,
642                                         &error);
643                                 layer1->phys_offset = HAMMER_BLOCKMAP_FREE;
644                         }
645 #endif
646                         layer1->layer1_crc = crc32(layer1,
647                                                    HAMMER_LAYER1_CRCSIZE);
648                         hammer_modify_buffer_done(buffer1);
649                 } else {
650 #if 0
651                         /*
652                          * This commented out code would reset the iterator,
653                          * which we cannot do at the moment as it could cause
654                          * new allocations to overwrite deleted data still
655                          * subject to undo on reboot.
656                          */
657                         hammer_modify_volume(trans, root_volume,
658                                              NULL, 0);
659                         rootmap->next_offset &= ~HAMMER_LARGEBLOCK_MASK64;
660                         hammer_modify_volume_done(root_volume);
661 #endif
662                 }
663         }
664         layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
665         hammer_modify_buffer_done(buffer2);
666 done:
667         lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
668
669         if (buffer1)
670                 hammer_rel_buffer(buffer1, 0);
671         if (buffer2)
672                 hammer_rel_buffer(buffer2, 0);
673         hammer_rel_volume(root_volume, 0);
674 }
675
676 /*
677  * Return the number of free bytes in the big-block containing the
678  * specified blockmap offset.
679  */
680 int
681 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t bmap_off,
682                         int *curp, int *errorp)
683 {
684         hammer_volume_t root_volume;
685         hammer_blockmap_t rootmap;
686         struct hammer_blockmap_layer1 *layer1;
687         struct hammer_blockmap_layer2 *layer2;
688         hammer_buffer_t buffer = NULL;
689         hammer_off_t layer1_offset;
690         hammer_off_t layer2_offset;
691         int bytes;
692         int zone;
693
694         zone = HAMMER_ZONE_DECODE(bmap_off);
695         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
696         root_volume = hammer_get_root_volume(hmp, errorp);
697         if (*errorp) {
698                 *curp = 0;
699                 return(0);
700         }
701         rootmap = &hmp->blockmap[zone];
702         KKASSERT(rootmap->phys_offset != 0);
703         KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
704                  HAMMER_ZONE_RAW_BUFFER_INDEX);
705         KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
706
707         if (bmap_off >= rootmap->alloc_offset) {
708                 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
709                       bmap_off, rootmap->alloc_offset);
710                 bytes = 0;
711                 *curp = 0;
712                 goto done;
713         }
714
715         /*
716          * Dive layer 1.
717          */
718         layer1_offset = rootmap->phys_offset +
719                         HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
720         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
721         KKASSERT(*errorp == 0);
722         KKASSERT(layer1->phys_offset);
723         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
724                 Debugger("CRC FAILED: LAYER1");
725         }
726
727         /*
728          * Dive layer 2, each entry represents a large-block.
729          */
730         layer2_offset = layer1->phys_offset +
731                         HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
732         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
733         KKASSERT(*errorp == 0);
734         KKASSERT(layer2->u.phys_offset);
735         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
736                 Debugger("CRC FAILED: LAYER2");
737         }
738
739         bytes = layer2->bytes_free;
740
741         if ((rootmap->next_offset ^ bmap_off) & ~HAMMER_LARGEBLOCK_MASK64)
742                 *curp = 0;
743         else
744                 *curp = 1;
745 done:
746         if (buffer)
747                 hammer_rel_buffer(buffer, 0);
748         hammer_rel_volume(root_volume, 0);
749         if (hammer_debug_general & 0x0800) {
750                 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
751                         bmap_off, bytes);
752         }
753         return(bytes);
754 }
755
756
757 /*
758  * Lookup a blockmap offset.
759  */
760 hammer_off_t
761 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp)
762 {
763         hammer_volume_t root_volume;
764         hammer_blockmap_t rootmap;
765         struct hammer_blockmap_layer1 *layer1;
766         struct hammer_blockmap_layer2 *layer2;
767         hammer_buffer_t buffer = NULL;
768         hammer_off_t layer1_offset;
769         hammer_off_t layer2_offset;
770         hammer_off_t result_offset;
771         int zone;
772
773         zone = HAMMER_ZONE_DECODE(bmap_off);
774         KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
775         root_volume = hammer_get_root_volume(hmp, errorp);
776         if (*errorp)
777                 return(0);
778         rootmap = &hmp->blockmap[zone];
779         KKASSERT(rootmap->phys_offset != 0);
780         KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
781                  HAMMER_ZONE_RAW_BUFFER_INDEX);
782         KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
783
784         if (bmap_off >= rootmap->alloc_offset) {
785                 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
786                       bmap_off, rootmap->alloc_offset);
787                 result_offset = 0;
788                 goto done;
789         }
790
791         /*
792          * Dive layer 1.
793          */
794         layer1_offset = rootmap->phys_offset +
795                         HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
796         layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
797         KKASSERT(*errorp == 0);
798         KKASSERT(layer1->phys_offset);
799         if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
800                 Debugger("CRC FAILED: LAYER1");
801         }
802
803         /*
804          * Dive layer 2, each entry represents a large-block.
805          */
806         layer2_offset = layer1->phys_offset +
807                         HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
808         layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
809
810         KKASSERT(*errorp == 0);
811         KKASSERT(layer2->u.phys_offset);
812         if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
813                 Debugger("CRC FAILED: LAYER2");
814         }
815
816         result_offset = layer2->u.phys_offset +
817                         (bmap_off & HAMMER_LARGEBLOCK_MASK64);
818 done:
819         if (buffer)
820                 hammer_rel_buffer(buffer, 0);
821         hammer_rel_volume(root_volume, 0);
822         if (hammer_debug_general & 0x0800) {
823                 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
824                         bmap_off, result_offset);
825         }
826         return(result_offset);
827 }
828
829 /************************************************************************
830  *                  IN-CORE TRACKING OF ALLOCATION HOLES                *
831  ************************************************************************
832  *
833  * This is a temporary shim in need of a more permanent solution.
834  *
835  * As we allocate space holes are created due to having to align to a new
836  * 16K buffer when an allocation would otherwise cross the buffer boundary.
837  * These holes are recorded here and used to fullfill smaller requests as
838  * much as possible.  Only a limited number of holes are recorded and these
839  * functions operate somewhat like a heuristic, where information is allowed
840  * to be thrown away.
841  */
842
843 void
844 hammer_init_holes(hammer_mount_t hmp, hammer_holes_t holes)
845 {
846         TAILQ_INIT(&holes->list);
847         holes->count = 0;
848 }
849
850 void
851 hammer_free_holes(hammer_mount_t hmp, hammer_holes_t holes)
852 {
853         hammer_hole_t hole;
854
855         while ((hole = TAILQ_FIRST(&holes->list)) != NULL) {
856                 TAILQ_REMOVE(&holes->list, hole, entry);
857                 kfree(hole, M_HAMMER);
858         }
859 }
860
861 /*
862  * Attempt to locate a hole with sufficient free space to accomodate the
863  * requested allocation.  Return the offset or 0 if no hole could be found.
864  */
865 static hammer_off_t
866 hammer_find_hole(hammer_mount_t hmp, hammer_holes_t holes, int bytes)
867 {
868         hammer_hole_t hole;
869         hammer_off_t result_off = 0;
870
871         TAILQ_FOREACH(hole, &holes->list, entry) {
872                 if (bytes <= hole->bytes) {
873                         result_off = hole->offset;
874                         hole->offset += bytes;
875                         hole->bytes -= bytes;
876                         break;
877                 }
878         }
879         return(result_off);
880 }
881
882 /*
883  * If a newly created hole is reasonably sized then record it.  We only
884  * keep track of a limited number of holes.  Lost holes are recovered by
885  * reblocking.
886  *
887  * offset is a zone-N offset.
888  */
889 static void
890 hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
891                 hammer_off_t offset, int bytes)
892 {
893         hammer_hole_t hole;
894
895         if (bytes <= 128)
896                 return;
897
898         if (holes->count < HAMMER_MAX_HOLES) {
899                 hole = kmalloc(sizeof(*hole), M_HAMMER, M_WAITOK);
900                 ++holes->count;
901         } else {
902                 hole = TAILQ_FIRST(&holes->list);
903                 TAILQ_REMOVE(&holes->list, hole, entry);
904         }
905         TAILQ_INSERT_TAIL(&holes->list, hole, entry);
906         hole->offset = offset;
907         hole->bytes = bytes;
908 }
909
910 /*
911  * Clean out any holes cached for the bigblock we are about to release back
912  * to the free pool.
913  */
914 static void
915 hammer_clean_holes(hammer_mount_t hmp, hammer_holes_t holes,
916                    hammer_off_t offset)
917 {
918         hammer_hole_t hole;
919
920         offset &= ~HAMMER_LARGEBLOCK_MASK64;
921
922 restart:
923         TAILQ_FOREACH(hole, &holes->list, entry) {
924                 if ((hole->offset & ~HAMMER_LARGEBLOCK_MASK64) == offset) {
925                         TAILQ_REMOVE(&holes->list, hole, entry);
926                         kfree(hole, M_HAMMER);
927                         goto restart;
928                 }
929         }
930 }
931