hammer2 - freemap part 3 - group by allocation size
[dragonfly.git] / sys / vfs / hammer2 / hammer2_freemap.c
1 /*
2  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45
46 #include "hammer2.h"
47
48 #define USE_SIMPLE_ALLOC        0
49
50 #if !USE_SIMPLE_ALLOC
51
52 static int hammer2_freemap_try_alloc(hammer2_trans_t *trans,
53                         hammer2_chain_t **parentp, hammer2_blockref_t *bref,
54                         int radix, hammer2_off_t bpref, hammer2_off_t *bnext);
55 static int hammer2_freemap_iterate(hammer2_trans_t *trans,
56                         hammer2_chain_t **parentp, hammer2_chain_t **chainp,
57                         hammer2_off_t bpref, hammer2_off_t *bnextp);
58
59 #endif
60
61 static __inline
62 int
63 hammer2_freemapradix(int radix)
64 {
65         return(radix);
66 }
67
68 /*
69  * Calculate the device offset for the specified FREEMAP_NODE or FREEMAP_LEAF
70  * bref.  Return a combined media offset and physical size radix.  Freemap
71  * chains use fixed storage offsets in the 4MB reserved area at the
72  * beginning of each 2GB zone
73  *
74  * Rotate between four possibilities.  Theoretically this means we have three
75  * good freemaps in case of a crash which we can use as a base for the fixup
76  * scan at mount-time.
77  */
78 #define H2FMBASE(key, radix)    ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
79 #define H2FMSHIFT(radix)        ((hammer2_off_t)1 << (radix))
80
81 static
82 int
83 hammer2_freemap_reserve(hammer2_mount_t *hmp, hammer2_blockref_t *bref,
84                         int radix)
85 {
86         hammer2_off_t off;
87         size_t bytes;
88
89         /*
90          * Physical allocation size -> radix.  Typically either 256 for
91          * a level 0 freemap leaf or 65536 for a level N freemap node.
92          *
93          * NOTE: A 256 byte bitmap represents 256 x 8 x 1024 = 2MB of storage.
94          *       Do not use hammer2_allocsize() here as it has a min cap.
95          */
96         bytes = 1 << radix;
97
98         /*
99          * Adjust by HAMMER2_ZONE_FREEMAP_{A,B,C,D} using the existing
100          * offset as a basis.  Start in zone A if previously unallocated.
101          */
102         if ((bref->data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) {
103                 off = HAMMER2_ZONE_FREEMAP_A;
104         } else {
105                 off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX &
106                       (((hammer2_off_t)1 << HAMMER2_FREEMAP_LEVEL1_RADIX) - 1);
107                 off = off / HAMMER2_PBUFSIZE;
108                 KKASSERT(off >= HAMMER2_ZONE_FREEMAP_A);
109                 KKASSERT(off < HAMMER2_ZONE_FREEMAP_D + 8);
110
111                 if (off >= HAMMER2_ZONE_FREEMAP_D)
112                         off = HAMMER2_ZONE_FREEMAP_A;
113                 else if (off >= HAMMER2_ZONE_FREEMAP_C)
114                         off = HAMMER2_ZONE_FREEMAP_D;
115                 else if (off >= HAMMER2_ZONE_FREEMAP_B)
116                         off = HAMMER2_ZONE_FREEMAP_C;
117                 else
118                         off = HAMMER2_ZONE_FREEMAP_B;
119         }
120         off = off * HAMMER2_PBUFSIZE;
121
122         /*
123          * Calculate the block offset of the reserved block.  This will
124          * point into the 4MB reserved area at the base of the appropriate
125          * 2GB zone, once added to the FREEMAP_x selection above.
126          */
127         switch(bref->keybits) {
128         /* case HAMMER2_FREEMAP_LEVEL5_RADIX: not applicable */
129         case HAMMER2_FREEMAP_LEVEL4_RADIX:      /* 2EB */
130                 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
131                 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
132                 off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL4_RADIX) +
133                        HAMMER2_ZONEFM_LEVEL4 * HAMMER2_PBUFSIZE;
134                 break;
135         case HAMMER2_FREEMAP_LEVEL3_RADIX:      /* 2PB */
136                 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
137                 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
138                 off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL3_RADIX) +
139                        HAMMER2_ZONEFM_LEVEL3 * HAMMER2_PBUFSIZE;
140                 break;
141         case HAMMER2_FREEMAP_LEVEL2_RADIX:      /* 2TB */
142                 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
143                 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
144                 off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL2_RADIX) +
145                        HAMMER2_ZONEFM_LEVEL2 * HAMMER2_PBUFSIZE;
146                 break;
147         case HAMMER2_FREEMAP_LEVEL1_RADIX:      /* 2GB */
148                 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
149                 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
150                 off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
151                        HAMMER2_ZONEFM_LEVEL1 * HAMMER2_PBUFSIZE;
152                 break;
153         case HAMMER2_FREEMAP_LEVEL0_RADIX:      /* 2MB (256 byte bitmap) */
154                 /*
155                  * Terminal bitmap, start with 2GB base, then offset by
156                  * 256 bytes of device offset per 2MB of logical space
157                  * (8 bits per byte, 1024 byte allocation chunking).
158                  */
159                 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF);
160                 KKASSERT(bytes == HAMMER2_FREEMAP_LEVEL0_PSIZE);
161                 off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
162                        HAMMER2_ZONEFM_LEVEL0 * HAMMER2_PBUFSIZE;
163
164                 off += ((bref->key >> HAMMER2_FREEMAP_LEVEL0_RADIX) &
165                         ((1 << (HAMMER2_FREEMAP_LEVEL1_RADIX -
166                                HAMMER2_FREEMAP_LEVEL0_RADIX)) - 1)) *
167                         HAMMER2_FREEMAP_LEVEL0_PSIZE;
168                 break;
169         default:
170                 panic("freemap: bad radix(2) %p %d\n", bref, bref->keybits);
171                 /* NOT REACHED */
172                 break;
173         }
174         bref->data_off = off | radix;
175         return (0);
176 }
177
178 /*
179  * Allocate media space, returning a combined data offset and radix.
180  * THIS ROUTINE IS USE FOR DEBUGGING ONLY.
181  *
182  * This version of the routine is ONLY usable for testing and debug
183  * purposes and only if the filesystem never instantiated an actual
184  * freemap.  It uses the initial allocation iterator that newfs_hammer2
185  * used to build the filesystem to allocate new space and is not capable
186  * of dealing with freed space.
187  */
188 #if USE_SIMPLE_ALLOC
189
190 static
191 int
192 hammer2_freemap_simple_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref,
193                              int radix)
194 {
195         hammer2_off_t data_off;
196         hammer2_off_t data_next;
197         hammer2_freecache_t *fc;
198         /*struct buf *bp;*/
199         size_t bytes;
200         int fctype;
201
202         bytes = (size_t)(1 << radix);
203         KKASSERT(bytes >= HAMMER2_MIN_ALLOC &&
204                  bytes <= HAMMER2_MAX_ALLOC);
205
206         /*
207          * Must not be used if the filesystem is using a real freemap.
208          */
209         KKASSERT(hmp->voldata.freemap_blockset.blockref[0].data_off == 0);
210
211         switch(bref->type) {
212         case HAMMER2_BREF_TYPE_INODE:
213                 fctype = HAMMER2_FREECACHE_INODE;
214                 break;
215         case HAMMER2_BREF_TYPE_INDIRECT:
216                 fctype = HAMMER2_FREECACHE_INODE;
217                 break;
218         case HAMMER2_BREF_TYPE_DATA:
219                 fctype = HAMMER2_FREECACHE_DATA;
220                 break;
221         default:
222                 fctype = HAMMER2_FREECACHE_DATA;
223                 break;
224         }
225
226         if (radix <= HAMMER2_MAX_RADIX)
227                 fc = &hmp->freecache[fctype][radix];
228         else
229                 fc = NULL;
230
231         lockmgr(&hmp->alloclk, LK_EXCLUSIVE);
232         if (fc && fc->single) {
233                 /*
234                  * Allocate from our single-block cache.
235                  */
236                 data_off = fc->single;
237                 fc->single = 0;
238         } else if (fc && fc->bulk) {
239                 /*
240                  * Allocate from our packing cache.
241                  */
242                 data_off = fc->bulk;
243                 fc->bulk += bytes;
244                 if ((fc->bulk & HAMMER2_SEGMASK) == 0)
245                         fc->bulk = 0;
246         } else {
247                 /*
248                  * Allocate from the allocation iterator using a SEGSIZE
249                  * aligned block and reload the packing cache if possible.
250                  *
251                  * Skip reserved areas at the beginning of each zone.
252                  */
253                 hammer2_voldata_lock(hmp);
254                 data_off = hmp->voldata.allocator_beg;
255                 data_off = (data_off + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64;
256                 if ((data_off & HAMMER2_ZONE_MASK64) < HAMMER2_ZONE_SEG) {
257                         KKASSERT((data_off & HAMMER2_ZONE_MASK64) == 0);
258                         data_off += HAMMER2_ZONE_SEG64;
259                 }
260                 data_next = data_off + bytes;
261
262                 if ((data_next & HAMMER2_SEGMASK) == 0) {
263                         hmp->voldata.allocator_beg = data_next;
264                 } else {
265                         KKASSERT(radix <= HAMMER2_MAX_RADIX);
266                         hmp->voldata.allocator_beg =
267                                         (data_next + HAMMER2_SEGMASK64) &
268                                         ~HAMMER2_SEGMASK64;
269                         fc->bulk = data_next;
270                 }
271                 hammer2_voldata_unlock(hmp, 1);
272         }
273         lockmgr(&hmp->alloclk, LK_RELEASE);
274
275         bref->data_off = data_off | radix;
276         return (0);
277 }
278
279 #endif
280
281 /*
282  * Normal freemap allocator
283  *
284  * Use available hints to allocate space using the freemap.  Create missing
285  * freemap infrastructure on-the-fly as needed (including marking initial
286  * allocations using the iterator as allocated, instantiating new 2GB zones,
287  * and dealing with the end-of-media edge case).
288  *
289  * ip and bpref are only used as a heuristic to determine locality of
290  * reference.  bref->key may also be used heuristically.
291  */
292 int
293 hammer2_freemap_alloc(hammer2_trans_t *trans,
294                       hammer2_blockref_t *bref, size_t bytes)
295 {
296         hammer2_mount_t *hmp = trans->hmp;
297         hammer2_chain_t *parent;
298         hammer2_off_t bpref;
299         hammer2_off_t bnext;
300         int freemap_radix;
301         int radix;
302         int error;
303
304         /*
305          * Validate the allocation size.  It must be a power of 2.
306          */
307         radix = hammer2_getradix(bytes);
308         KKASSERT((size_t)1 << radix == bytes);
309
310         /*
311          * Freemap elements are assigned from the reserve area.
312          * Note that FREEMAP_LEVEL0_PSIZE is 256 bytes which is
313          * allowed for this case.
314          */
315         if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE ||
316             bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) {
317                 return(hammer2_freemap_reserve(hmp, bref, radix));
318         }
319 #if USE_SIMPLE_ALLOC
320         return (hammer2_freemap_simple_alloc(hmp, bref, radix));
321 #else
322
323         KKASSERT(bytes >= HAMMER2_MIN_ALLOC &&
324                  bytes <= HAMMER2_MAX_ALLOC);
325
326         /*
327          * Calculate the starting point for our allocation search.
328          *
329          * Each freemap leaf is dedicated to a specific freemap_radix.
330          * The freemap_radix can be more fine-grained than the device buffer
331          * radix which results in inodes being grouped together in their
332          * own segment, terminal-data (16K or less) and initial indirect
333          * block being grouped together, and then full-indirect and full-data
334          * blocks (64K) being grouped together.
335          *
336          * The single most important aspect of this is the inode grouping
337          * because that is what allows 'find' and 'ls' and other filesystem
338          * topology operations to run fast.
339          */
340         freemap_radix = hammer2_freemapradix(radix);
341 #if 0
342         if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX)
343                 bpref = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
344         else if (trans->tmp_bpref)
345                 bpref = trans->tmp_bpref;
346         else if (trans->tmp_ip)
347                 bpref = trans->tmp_ip->chain->bref.data_off;
348         else
349 #endif
350         KKASSERT(radix >= 0 && radix <= HAMMER2_MAX_RADIX);
351         bpref = hmp->heur_freemap[freemap_radix];
352
353         /*
354          * Make sure bpref is in-bounds.  It's ok if bpref covers a zone's
355          * reserved area, the try code will iterate past it.
356          */
357         if (bpref > hmp->voldata.volu_size)
358                 bpref = hmp->voldata.volu_size - 1;
359
360         /*
361          * Iterate the freemap looking for free space before and after.
362          */
363         parent = &hmp->fchain;
364         hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
365         error = EAGAIN;
366         bnext = bpref;
367
368         while (error == EAGAIN) {
369                 error = hammer2_freemap_try_alloc(trans, &parent, bref,
370                                                   radix, bpref, &bnext);
371         }
372         hmp->heur_freemap[freemap_radix] = bnext;
373         hammer2_chain_unlock(parent);
374
375         return (error);
376 #endif
377 }
378
379 #if !USE_SIMPLE_ALLOC
380
381 /*
382  * Attempt to allocate (1 << radix) bytes from the freemap at bnext.
383  * Return 0 on success with the bref appropriately updated, non-zero
384  * on failure.  Updates bnextp and returns EAGAIN to continue the
385  * iteration.
386  *
387  * This function will create missing freemap infrastructure as well as
388  * properly initialize reserved areas as already having been allocated.
389  */
390 static __inline
391 int
392 countbits(uint64_t *data)
393 {
394         int i;
395         int r = 0;
396         uint64_t v;
397
398         for (i = 0; i < 32; ++i) {
399                 v = data[i];
400                 while (v) {
401                         if (v & 1)
402                                 ++r;
403                         v >>= 1;
404                 }
405         }
406         return(r);
407 }
408
409 static int
410 hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
411                           hammer2_blockref_t *bref, int radix,
412                           hammer2_off_t bpref, hammer2_off_t *bnextp)
413 {
414         hammer2_mount_t *hmp = trans->hmp;
415         hammer2_off_t l0mask;
416         hammer2_off_t l0size;
417         hammer2_chain_t *chain;
418         hammer2_off_t key;
419         hammer2_off_t tmp;
420         size_t bytes;
421         uint64_t mask;
422         uint64_t tmp_mask;
423         uint64_t *data;
424         int error = 0;
425         int bits;
426         int index;
427         int count;
428         int subindex;
429         int freemap_radix;
430         int devblk_radix;
431
432         /*
433          * Calculate the number of bytes being allocated, the number
434          * of contiguous bits of bitmap being allocated, and the bitmap
435          * mask.
436          *
437          * WARNING! cpu hardware may mask bits == 64 -> 0 and blow up the
438          *          mask calculation.
439          */
440         bytes = (size_t)1 << radix;
441         bits = 1 << (radix - HAMMER2_MIN_RADIX);
442         mask = (bits == 64) ? (uint64_t)-1 : (((uint64_t)1 << bits) - 1);
443
444         devblk_radix = hammer2_devblkradix(radix);
445         freemap_radix = hammer2_freemapradix(radix);
446
447         /*
448          * Lookup the level0 freemap chain, creating and initializing one
449          * if necessary.  Intermediate levels will be created automatically
450          * when necessary by hammer2_chain_create().
451          */
452         key = H2FMBASE(*bnextp, HAMMER2_FREEMAP_LEVEL0_RADIX);
453         l0mask = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX) - 1;
454         l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
455
456         chain = hammer2_chain_lookup(parentp, key, key + l0mask,
457                                      HAMMER2_LOOKUP_FREEMAP |
458                                      HAMMER2_LOOKUP_ALWAYS |
459                                      HAMMER2_LOOKUP_MATCHIND/*XXX*/);
460         if (chain == NULL) {
461                 /*
462                  * Create the missing leaf, be sure to initialize
463                  * the auxillary freemap tracking information in
464                  * the bref.check.freemap structure.
465                  */
466 #if 0
467                 kprintf("freemap create L0 @ %016jx bpref %016jx\n",
468                         key, bpref);
469 #endif
470                 error = hammer2_chain_create(trans, parentp, &chain,
471                                      key, HAMMER2_FREEMAP_LEVEL0_RADIX,
472                                      HAMMER2_BREF_TYPE_FREEMAP_LEAF,
473                                      HAMMER2_FREEMAP_LEVEL0_PSIZE);
474                 if (error == 0) {
475                         hammer2_chain_modify(trans, &chain, 0);
476                         bzero(chain->data->bmdata.array,
477                               HAMMER2_FREEMAP_LEVEL0_PSIZE);
478                         chain->bref.check.freemap.biggest =
479                                         HAMMER2_FREEMAP_LEVEL0_RADIX;
480                         chain->bref.check.freemap.avail = l0size;
481                         chain->bref.check.freemap.radix = freemap_radix;
482
483                         /*
484                          * Preset bitmap for existing static allocations.
485                          * 64-bit-align so we don't have to worry about
486                          * endian for the memset().
487                          */
488                         tmp = (hmp->voldata.allocator_beg +
489                                HAMMER2_MAX_ALLOC - 1) &
490                               ~(hammer2_off_t)(HAMMER2_MAX_ALLOC - 1);
491                         if (key < tmp) {
492                                 if (key + l0size <= tmp) {
493                                         memset(chain->data->bmdata.array, -1,
494                                                l0size / HAMMER2_MIN_ALLOC / 8);
495                                         chain->bref.check.freemap.avail = 0;
496                                 } else {
497                                         count = (tmp - key) / HAMMER2_MIN_ALLOC;
498                                         kprintf("Init L0 BASE %d\n", count);
499                                         memset(chain->data->bmdata.array, -1,
500                                                count / 8);
501                                         chain->bref.check.freemap.avail -=
502                                                 count * HAMMER2_MIN_ALLOC;
503                                 }
504                         }
505
506                         /*
507                          * Preset bitmap for reserved area.  Calculate
508                          * 2GB base.
509                          */
510                         tmp = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
511                         if (key - tmp < HAMMER2_ZONE_SEG) {
512                                 memset(chain->data->bmdata.array, -1,
513                                        l0size / HAMMER2_MIN_ALLOC / 8);
514                                 chain->bref.check.freemap.avail = 0;
515                         }
516
517                         /*
518                          * Preset bitmap for end of media
519                          */
520                         if (key >= trans->hmp->voldata.volu_size) {
521                                 memset(chain->data->bmdata.array, -1,
522                                        l0size / HAMMER2_MIN_ALLOC / 8);
523                                 chain->bref.check.freemap.avail = 0;
524                         }
525                 }
526         } else if (chain->bref.check.freemap.biggest < radix) {
527                 /*
528                  * Already flagged as not having enough space
529                  */
530                 error = ENOSPC;
531         } else if (chain->bref.check.freemap.radix != freemap_radix) {
532                 /*
533                  * Wrong cluster radix, cannot allocate from this leaf.
534                  */
535                 error = ENOSPC;
536         } else {
537                 /*
538                  * Modify existing chain to setup for adjustment.
539                  */
540                 hammer2_chain_modify(trans, &chain, 0);
541         }
542         if (error)
543                 goto skip;
544
545         /*
546          * Calculate mask and count.  Each bit represents 1KB and (currently)
547          * the maximum allocation is 65536 bytes.  Allocations are always
548          * natively aligned.
549          */
550         count = HAMMER2_FREEMAP_LEVEL0_PSIZE / sizeof(uint64_t); /* 32 */
551         data = &chain->data->bmdata.array[0];
552
553         tmp_mask = 0; /* avoid compiler warnings */
554         subindex = 0; /* avoid compiler warnings */
555
556         /*
557          * Allocate data and meta-data from the beginning and inodes
558          * from the end.
559          */
560         for (index = 0; index < count; ++index) {
561                 if (data[index] == (uint64_t)-1) /* all allocated */
562                         continue;
563                 tmp_mask = mask;                 /* iterate */
564                 for (subindex = 0; subindex < 64; subindex += bits) {
565                         if ((data[index] & tmp_mask) == 0)
566                                 break;
567                         tmp_mask <<= bits;
568                 }
569                 if (subindex != 64) {
570                         key += HAMMER2_MIN_ALLOC * 64 * index;
571                         key += HAMMER2_MIN_ALLOC * subindex;
572                         break;
573                 }
574         }
575         if (index == count)
576                 error = ENOSPC;
577
578 skip:
579         if (error == 0) {
580                 /*
581                  * Assert validity.  Must be beyond the static allocator used
582                  * by newfs_hammer2 (and thus also beyond the aux area),
583                  * not go past the volume size, and must not be in the
584                  * reserved segment area for a zone.
585                  */
586                 int prebuf;
587
588                 KKASSERT(key >= hmp->voldata.allocator_beg &&
589                          key + bytes <= hmp->voldata.volu_size);
590                 KKASSERT((key & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG);
591
592                 /*
593                  * Modify the chain and set the bitmap appropriately.
594                  *
595                  * For smaller allocations try to avoid a read-before-write
596                  * by priming the buffer cache buffer.  The caller handles
597                  * read-avoidance for larger allocations (or more properly,
598                  * when the chain is locked).
599                  */
600                 prebuf = 0;
601                 hammer2_chain_modify(trans, &chain, 0);
602                 data = &chain->data->bmdata.array[0];
603                 if (radix != devblk_radix) {
604                         uint64_t iomask;
605                         int iobmradix = HAMMER2_MINIORADIX - HAMMER2_MIN_RADIX;
606                         int ioindex;
607                         int iobmskip = 1 << iobmradix;
608
609                         iomask = ((uint64_t)1 << iobmskip) - 1;
610                         for (ioindex = 0; ioindex < 64; ioindex += iobmskip) {
611                                 if (tmp_mask & iomask) {
612                                         if ((data[index] & iomask) == 0)
613                                                 prebuf = 1;
614                                         break;
615                                 }
616                                 iomask <<= iobmskip;
617                         }
618                 }
619
620                 KKASSERT((data[index] & tmp_mask) == 0);
621                 data[index] |= tmp_mask;
622
623                 /*
624                  * We return the allocated space in bref->data_off.
625                  */
626                 *bnextp = key;
627                 bref->data_off = key | radix;
628
629                 if (prebuf) {
630                         struct buf *bp;
631                         hammer2_off_t pbase;
632                         hammer2_off_t csize;
633                         hammer2_off_t cmask;
634
635                         csize = (hammer2_off_t)1 << devblk_radix;
636                         cmask = csize - 1;
637                         pbase = key & ~mask;
638
639                         bp = getblk(hmp->devvp, pbase, csize,
640                                     GETBLK_NOWAIT, 0);
641                         if (bp) {
642                                 if ((bp->b_flags & B_CACHE) == 0)
643                                         vfs_bio_clrbuf(bp);
644                                 bqrelse(bp);
645                         }
646                 }
647
648 #if 0
649                 kprintf("alloc cp=%p %016jx %016jx using %016jx chain->data %d\n",
650                         chain,
651                         bref->key, bref->data_off, chain->bref.data_off,
652                         countbits(data));
653 #endif
654         } else if (error == ENOSPC) {
655                 /*
656                  * Return EAGAIN with next iteration in *bnextp, or
657                  * return ENOSPC if the allocation map has been exhausted.
658                  */
659                 if (chain->bref.check.freemap.biggest > radix)
660                         chain->bref.check.freemap.biggest = radix;
661                 error = hammer2_freemap_iterate(trans, parentp, &chain,
662                                                 bpref, bnextp);
663         }
664
665         /*
666          * Cleanup
667          */
668         if (chain)
669                 hammer2_chain_unlock(chain);
670         return (error);
671 }
672
673 static int
674 hammer2_freemap_iterate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
675                         hammer2_chain_t **chainp,
676                         hammer2_off_t bpref, hammer2_off_t *bnextp)
677 {
678         *bnextp += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
679         if (*bnextp >= trans->hmp->voldata.volu_size)
680                 return (ENOSPC);
681         return(EAGAIN);
682 }
683
684 #endif
685
686 #if 0
687
688 void
689 hammer2_freemap_free(hammer2_mount_t *hmp, hammer2_off_t data_off, int type)
690 {
691         hammer2_freecache_t *fc;
692         int radix;
693         int fctype;
694
695         switch(type) {
696         case HAMMER2_BREF_TYPE_INODE:
697                 fctype = HAMMER2_FREECACHE_INODE;
698                 break;
699         case HAMMER2_BREF_TYPE_INDIRECT:
700                 fctype = HAMMER2_FREECACHE_INODE;
701                 break;
702         case HAMMER2_BREF_TYPE_DATA:
703                 fctype = HAMMER2_FREECACHE_DATA;
704                 break;
705         default:
706                 fctype = HAMMER2_FREECACHE_DATA;
707                 break;
708         }
709         radix = (int)data_off & HAMMER2_OFF_MASK_RADIX;
710         data_off &= ~HAMMER2_OFF_MASK_RADIX;
711         if (radix >= HAMMER2_MAX_RADIX)
712                 return;
713
714         fc = &hmp->freecache[fctype][radix];
715         if (fc->single == 0) {
716                 lockmgr(&hmp->alloclk, LK_EXCLUSIVE);
717                 fc->single = data_off;
718                 lockmgr(&hmp->alloclk, LK_RELEASE);
719         }
720 }
721
722 #endif
723
724 #if 0
725 /*
726  * Allocate media space, returning a combined data offset and radix.
727  * Also return the related (device) buffer cache buffer.
728  */
729 hammer2_off_t
730 hammer2_freemap_alloc_bp(hammer2_mount_t *hmp, size_t bytes, struct buf **bpp)
731 {
732 }
733
734 #endif