Commit | Line | Data |
---|---|---|
5c23d7f1 | 1 | /* |
68b321c1 | 2 | * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. |
5c23d7f1 MD |
3 | * |
4 | * This code is derived from software contributed to The DragonFly Project | |
5 | * by Matthew Dillon <dillon@dragonflybsd.org> | |
6 | * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * | |
12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in | |
16 | * the documentation and/or other materials provided with the | |
17 | * distribution. | |
18 | * 3. Neither the name of The DragonFly Project nor the names of its | |
19 | * contributors may be used to endorse or promote products derived | |
20 | * from this software without specific, prior written permission. | |
21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
25 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
26 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
27 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
28 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
29 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
30 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
31 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
32 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
33 | * SUCH DAMAGE. | |
34 | */ | |
35 | #include <sys/param.h> | |
36 | #include <sys/systm.h> | |
37 | #include <sys/kernel.h> | |
5c23d7f1 | 38 | #include <sys/proc.h> |
5c23d7f1 | 39 | #include <sys/mount.h> |
5c23d7f1 MD |
40 | |
41 | #include "hammer2.h" | |
42 | ||
8138a154 MD |
43 | #define FREEMAP_DEBUG 0 |
44 | ||
91abd410 MD |
45 | struct hammer2_fiterate { |
46 | hammer2_off_t bpref; | |
47 | hammer2_off_t bnext; | |
48 | int loops; | |
65cacacf | 49 | int relaxed; |
91abd410 MD |
50 | }; |
51 | ||
52 | typedef struct hammer2_fiterate hammer2_fiterate_t; | |
53 | ||
c603b86b | 54 | static int hammer2_freemap_try_alloc(hammer2_chain_t **parentp, |
e2163f5b MD |
55 | hammer2_blockref_t *bref, int radix, |
56 | hammer2_fiterate_t *iter, hammer2_tid_t mtid); | |
c603b86b | 57 | static void hammer2_freemap_init(hammer2_dev_t *hmp, |
a5913bdf | 58 | hammer2_key_t key, hammer2_chain_t *chain); |
c603b86b | 59 | static int hammer2_bmap_alloc(hammer2_dev_t *hmp, |
91abd410 | 60 | hammer2_bmap_data_t *bmap, uint16_t class, |
aff72996 | 61 | int n, int sub_key, int radix, hammer2_key_t *basep); |
c603b86b MD |
62 | static int hammer2_freemap_iterate(hammer2_chain_t **parentp, |
63 | hammer2_chain_t **chainp, | |
91abd410 | 64 | hammer2_fiterate_t *iter); |
1a7cfe5a | 65 | |
1a7cfe5a MD |
66 | /* |
67 | * Calculate the device offset for the specified FREEMAP_NODE or FREEMAP_LEAF | |
68 | * bref. Return a combined media offset and physical size radix. Freemap | |
69 | * chains use fixed storage offsets in the 4MB reserved area at the | |
641a6901 | 70 | * beginning of each 1GB zone. |
1a7cfe5a | 71 | * |
641a6901 | 72 | * Rotate between eight possibilities. Theoretically this means we have seven |
1a7cfe5a MD |
73 | * good freemaps in case of a crash which we can use as a base for the fixup |
74 | * scan at mount-time. | |
75 | */ | |
1a7cfe5a MD |
76 | static |
77 | int | |
c603b86b | 78 | hammer2_freemap_reserve(hammer2_chain_t *chain, int radix) |
1a7cfe5a | 79 | { |
623d43d4 | 80 | hammer2_blockref_t *bref = &chain->bref; |
1a7cfe5a | 81 | hammer2_off_t off; |
a3fd5153 | 82 | int index; |
5cebbe36 | 83 | int index_inc; |
1a7cfe5a MD |
84 | size_t bytes; |
85 | ||
86 | /* | |
5cebbe36 | 87 | * Physical allocation size. |
1a7cfe5a | 88 | */ |
5cebbe36 | 89 | bytes = (size_t)1 << radix; |
1a7cfe5a MD |
90 | |
91 | /* | |
a71db85d MD |
92 | * Calculate block selection index 0..7 of current block. If this |
93 | * is the first allocation of the block (verses a modification of an | |
94 | * existing block), we use index 0, otherwise we use the next rotating | |
95 | * index. | |
1a7cfe5a | 96 | */ |
3148f677 MD |
97 | if ((bref->data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) { |
98 | index = 0; | |
1a7cfe5a | 99 | } else { |
3148f677 | 100 | off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX & |
cef11e5a | 101 | HAMMER2_SEGMASK; |
3148f677 MD |
102 | off = off / HAMMER2_PBUFSIZE; |
103 | KKASSERT(off >= HAMMER2_ZONE_FREEMAP_00 && | |
104 | off < HAMMER2_ZONE_FREEMAP_END); | |
5031809d MD |
105 | index = (int)(off - HAMMER2_ZONE_FREEMAP_00) / |
106 | HAMMER2_ZONE_FREEMAP_INC; | |
3148f677 MD |
107 | KKASSERT(index >= 0 && index < HAMMER2_NFREEMAPS); |
108 | if (++index == HAMMER2_NFREEMAPS) | |
a71db85d | 109 | index = 0; |
623d43d4 | 110 | } |
1a7cfe5a | 111 | |
1a7cfe5a MD |
112 | /* |
113 | * Calculate the block offset of the reserved block. This will | |
114 | * point into the 4MB reserved area at the base of the appropriate | |
115 | * 2GB zone, once added to the FREEMAP_x selection above. | |
116 | */ | |
5cebbe36 MD |
117 | index_inc = index * HAMMER2_ZONE_FREEMAP_INC; |
118 | ||
1a7cfe5a | 119 | switch(bref->keybits) { |
5cebbe36 | 120 | /* case HAMMER2_FREEMAP_LEVEL6_RADIX: not applicable */ |
18a3521f | 121 | case HAMMER2_FREEMAP_LEVEL5_RADIX: /* 4EB */ |
5cebbe36 MD |
122 | KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); |
123 | KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); | |
124 | off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL5_RADIX) + | |
125 | (index_inc + HAMMER2_ZONE_FREEMAP_00 + | |
126 | HAMMER2_ZONEFM_LEVEL5) * HAMMER2_PBUFSIZE; | |
127 | break; | |
18a3521f | 128 | case HAMMER2_FREEMAP_LEVEL4_RADIX: /* 16PB */ |
1a7cfe5a MD |
129 | KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); |
130 | KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); | |
a3fd5153 | 131 | off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL4_RADIX) + |
5cebbe36 | 132 | (index_inc + HAMMER2_ZONE_FREEMAP_00 + |
ba8a9be0 | 133 | HAMMER2_ZONEFM_LEVEL4) * HAMMER2_PBUFSIZE; |
1a7cfe5a | 134 | break; |
18a3521f | 135 | case HAMMER2_FREEMAP_LEVEL3_RADIX: /* 64TB */ |
1a7cfe5a MD |
136 | KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); |
137 | KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); | |
a3fd5153 | 138 | off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL3_RADIX) + |
5cebbe36 | 139 | (index_inc + HAMMER2_ZONE_FREEMAP_00 + |
ba8a9be0 | 140 | HAMMER2_ZONEFM_LEVEL3) * HAMMER2_PBUFSIZE; |
1a7cfe5a | 141 | break; |
18a3521f | 142 | case HAMMER2_FREEMAP_LEVEL2_RADIX: /* 256GB */ |
1a7cfe5a MD |
143 | KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); |
144 | KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); | |
a3fd5153 | 145 | off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL2_RADIX) + |
5cebbe36 | 146 | (index_inc + HAMMER2_ZONE_FREEMAP_00 + |
ba8a9be0 | 147 | HAMMER2_ZONEFM_LEVEL2) * HAMMER2_PBUFSIZE; |
1a7cfe5a | 148 | break; |
18a3521f | 149 | case HAMMER2_FREEMAP_LEVEL1_RADIX: /* 1GB */ |
93f3933a | 150 | KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF); |
1a7cfe5a | 151 | KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); |
a3fd5153 | 152 | off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) + |
5cebbe36 | 153 | (index_inc + HAMMER2_ZONE_FREEMAP_00 + |
ba8a9be0 | 154 | HAMMER2_ZONEFM_LEVEL1) * HAMMER2_PBUFSIZE; |
1a7cfe5a | 155 | break; |
1a7cfe5a MD |
156 | default: |
157 | panic("freemap: bad radix(2) %p %d\n", bref, bref->keybits); | |
158 | /* NOT REACHED */ | |
a3fd5153 | 159 | off = (hammer2_off_t)-1; |
1a7cfe5a MD |
160 | break; |
161 | } | |
162 | bref->data_off = off | radix; | |
8138a154 MD |
163 | #if FREEMAP_DEBUG |
164 | kprintf("FREEMAP BLOCK TYPE %d %016jx/%d DATA_OFF=%016jx\n", | |
165 | bref->type, bref->key, bref->keybits, bref->data_off); | |
623d43d4 | 166 | #endif |
1a7cfe5a MD |
167 | return (0); |
168 | } | |
169 | ||
1a7cfe5a MD |
170 | /* |
171 | * Normal freemap allocator | |
172 | * | |
173 | * Use available hints to allocate space using the freemap. Create missing | |
174 | * freemap infrastructure on-the-fly as needed (including marking initial | |
175 | * allocations using the iterator as allocated, instantiating new 2GB zones, | |
176 | * and dealing with the end-of-media edge case). | |
177 | * | |
8d920968 | 178 | * bpref is only used as a heuristic to determine locality of reference. |
da0cdd33 MD |
179 | * |
180 | * This function is a NOP if bytes is 0. | |
1a7cfe5a MD |
181 | */ |
182 | int | |
c603b86b | 183 | hammer2_freemap_alloc(hammer2_chain_t *chain, size_t bytes) |
1a7cfe5a | 184 | { |
506bd6d1 | 185 | hammer2_dev_t *hmp = chain->hmp; |
623d43d4 | 186 | hammer2_blockref_t *bref = &chain->bref; |
1a7cfe5a | 187 | hammer2_chain_t *parent; |
e2163f5b | 188 | hammer2_tid_t mtid; |
1a7cfe5a MD |
189 | int radix; |
190 | int error; | |
91abd410 MD |
191 | unsigned int hindex; |
192 | hammer2_fiterate_t iter; | |
1a7cfe5a | 193 | |
da0cdd33 MD |
194 | /* |
195 | * If allocating or downsizing to zero we just get rid of whatever | |
196 | * data_off we had. | |
197 | */ | |
198 | if (bytes == 0) { | |
199 | chain->bref.data_off = 0; | |
200 | return 0; | |
201 | } | |
202 | ||
65c894ff | 203 | KKASSERT(hmp->spmp); |
e2163f5b MD |
204 | mtid = hammer2_trans_sub(hmp->spmp); |
205 | ||
1a7cfe5a MD |
206 | /* |
207 | * Validate the allocation size. It must be a power of 2. | |
93f3933a MD |
208 | * |
209 | * For now require that the caller be aware of the minimum | |
210 | * allocation (1K). | |
1a7cfe5a MD |
211 | */ |
212 | radix = hammer2_getradix(bytes); | |
213 | KKASSERT((size_t)1 << radix == bytes); | |
214 | ||
1a7cfe5a MD |
215 | if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE || |
216 | bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { | |
50456506 MD |
217 | /* |
218 | * Freemap blocks themselves are assigned from the reserve | |
219 | * area, not allocated from the freemap. | |
220 | */ | |
c603b86b | 221 | error = hammer2_freemap_reserve(chain, radix); |
470dad14 | 222 | |
50456506 | 223 | return error; |
1a7cfe5a | 224 | } |
1a7cfe5a | 225 | |
50456506 | 226 | KKASSERT(bytes >= HAMMER2_ALLOC_MIN && bytes <= HAMMER2_ALLOC_MAX); |
91abd410 | 227 | |
91abd410 MD |
228 | /* |
229 | * Heuristic tracking index. We would like one for each distinct | |
230 | * bref type if possible. heur_freemap[] has room for two classes | |
231 | * for each type. At a minimum we have to break-up our heuristic | |
232 | * by device block sizes. | |
233 | */ | |
ee78054e | 234 | hindex = HAMMER2_PBUFRADIX - HAMMER2_LBUFRADIX; |
91abd410 MD |
235 | KKASSERT(hindex < HAMMER2_FREEMAP_HEUR_NRADIX); |
236 | hindex += bref->type * HAMMER2_FREEMAP_HEUR_NRADIX; | |
237 | hindex &= HAMMER2_FREEMAP_HEUR_TYPES * HAMMER2_FREEMAP_HEUR_NRADIX - 1; | |
3f01ebaa | 238 | KKASSERT(hindex < HAMMER2_FREEMAP_HEUR_SIZE); |
91abd410 MD |
239 | |
240 | iter.bpref = hmp->heur_freemap[hindex]; | |
65cacacf | 241 | iter.relaxed = hmp->freemap_relaxed; |
1a7cfe5a MD |
242 | |
243 | /* | |
244 | * Make sure bpref is in-bounds. It's ok if bpref covers a zone's | |
245 | * reserved area, the try code will iterate past it. | |
246 | */ | |
0b738157 TK |
247 | if (iter.bpref > hmp->total_size) |
248 | iter.bpref = hmp->total_size - 1; | |
1a7cfe5a MD |
249 | |
250 | /* | |
251 | * Iterate the freemap looking for free space before and after. | |
252 | */ | |
253 | parent = &hmp->fchain; | |
e513e77e | 254 | hammer2_chain_ref(parent); |
1a7cfe5a | 255 | hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); |
65cacacf | 256 | error = HAMMER2_ERROR_EAGAIN; |
91abd410 MD |
257 | iter.bnext = iter.bpref; |
258 | iter.loops = 0; | |
1a7cfe5a | 259 | |
65cacacf | 260 | while (error == HAMMER2_ERROR_EAGAIN) { |
e2163f5b MD |
261 | error = hammer2_freemap_try_alloc(&parent, bref, radix, |
262 | &iter, mtid); | |
1a7cfe5a | 263 | } |
65cacacf | 264 | hmp->freemap_relaxed |= iter.relaxed; /* heuristical, SMP race ok */ |
91abd410 | 265 | hmp->heur_freemap[hindex] = iter.bnext; |
1a7cfe5a | 266 | hammer2_chain_unlock(parent); |
e513e77e | 267 | hammer2_chain_drop(parent); |
1a7cfe5a MD |
268 | |
269 | return (error); | |
1a7cfe5a MD |
270 | } |
271 | ||
272 | static int | |
c603b86b | 273 | hammer2_freemap_try_alloc(hammer2_chain_t **parentp, |
1a7cfe5a | 274 | hammer2_blockref_t *bref, int radix, |
e2163f5b | 275 | hammer2_fiterate_t *iter, hammer2_tid_t mtid) |
1a7cfe5a | 276 | { |
506bd6d1 | 277 | hammer2_dev_t *hmp = (*parentp)->hmp; |
1a7cfe5a | 278 | hammer2_off_t l0size; |
93f3933a MD |
279 | hammer2_off_t l1size; |
280 | hammer2_off_t l1mask; | |
1897c66e | 281 | hammer2_key_t key_dummy; |
1a7cfe5a MD |
282 | hammer2_chain_t *chain; |
283 | hammer2_off_t key; | |
1a7cfe5a | 284 | size_t bytes; |
91abd410 | 285 | uint16_t class; |
c8c0a18a | 286 | int error; |
1a7cfe5a MD |
287 | |
288 | /* | |
8d920968 | 289 | * Calculate the number of bytes being allocated. |
1a7cfe5a MD |
290 | */ |
291 | bytes = (size_t)1 << radix; | |
ee78054e | 292 | class = (bref->type << 8) | HAMMER2_PBUFRADIX; |
a98aa0b0 | 293 | |
1a7cfe5a | 294 | /* |
93f3933a | 295 | * Lookup the level1 freemap chain, creating and initializing one |
1a7cfe5a MD |
296 | * if necessary. Intermediate levels will be created automatically |
297 | * when necessary by hammer2_chain_create(). | |
298 | */ | |
91abd410 | 299 | key = H2FMBASE(iter->bnext, HAMMER2_FREEMAP_LEVEL1_RADIX); |
4e47c34e TK |
300 | l0size = HAMMER2_FREEMAP_LEVEL0_SIZE; |
301 | l1size = HAMMER2_FREEMAP_LEVEL1_SIZE; | |
93f3933a | 302 | l1mask = l1size - 1; |
1a7cfe5a | 303 | |
1897c66e | 304 | chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask, |
c8c0a18a | 305 | &error, |
1a7cfe5a | 306 | HAMMER2_LOOKUP_ALWAYS | |
b8ba9690 | 307 | HAMMER2_LOOKUP_MATCHIND); |
623d43d4 | 308 | |
1a7cfe5a MD |
309 | if (chain == NULL) { |
310 | /* | |
311 | * Create the missing leaf, be sure to initialize | |
312 | * the auxillary freemap tracking information in | |
313 | * the bref.check.freemap structure. | |
314 | */ | |
315 | #if 0 | |
93f3933a | 316 | kprintf("freemap create L1 @ %016jx bpref %016jx\n", |
91abd410 | 317 | key, iter->bpref); |
1a7cfe5a | 318 | #endif |
ecfe89b8 MD |
319 | error = hammer2_chain_create(parentp, &chain, NULL, hmp->spmp, |
320 | HAMMER2_METH_DEFAULT, | |
93f3933a | 321 | key, HAMMER2_FREEMAP_LEVEL1_RADIX, |
1a7cfe5a | 322 | HAMMER2_BREF_TYPE_FREEMAP_LEAF, |
b3659de2 | 323 | HAMMER2_FREEMAP_LEVELN_PSIZE, |
3f01ebaa | 324 | mtid, 0, 0); |
0924b3f8 | 325 | KKASSERT(error == 0); |
1a7cfe5a | 326 | if (error == 0) { |
3f01ebaa | 327 | hammer2_chain_modify(chain, mtid, 0, 0); |
93f3933a MD |
328 | bzero(&chain->data->bmdata[0], |
329 | HAMMER2_FREEMAP_LEVELN_PSIZE); | |
330 | chain->bref.check.freemap.bigmask = (uint32_t)-1; | |
331 | chain->bref.check.freemap.avail = l1size; | |
332 | /* bref.methods should already be inherited */ | |
1a7cfe5a | 333 | |
c603b86b | 334 | hammer2_freemap_init(hmp, key, chain); |
1a7cfe5a | 335 | } |
b93cc2e0 MD |
336 | } else if (chain->error) { |
337 | /* | |
338 | * Error during lookup. | |
339 | */ | |
340 | kprintf("hammer2_freemap_try_alloc: %016jx: error %s\n", | |
341 | (intmax_t)bref->data_off, | |
342 | hammer2_error_str(chain->error)); | |
65cacacf | 343 | error = HAMMER2_ERROR_EIO; |
5cebbe36 MD |
344 | } else if ((chain->bref.check.freemap.bigmask & |
345 | ((size_t)1 << radix)) == 0) { | |
1a7cfe5a MD |
346 | /* |
347 | * Already flagged as not having enough space | |
348 | */ | |
65cacacf | 349 | error = HAMMER2_ERROR_ENOSPC; |
1a7cfe5a MD |
350 | } else { |
351 | /* | |
352 | * Modify existing chain to setup for adjustment. | |
353 | */ | |
3f01ebaa | 354 | hammer2_chain_modify(chain, mtid, 0, 0); |
1a7cfe5a | 355 | } |
1a7cfe5a MD |
356 | |
357 | /* | |
30b0abf3 | 358 | * Scan 4MB entries. |
1a7cfe5a | 359 | */ |
93f3933a MD |
360 | if (error == 0) { |
361 | hammer2_bmap_data_t *bmap; | |
362 | hammer2_key_t base_key; | |
363 | int count; | |
364 | int start; | |
365 | int n; | |
366 | ||
10136ab6 | 367 | KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF); |
91abd410 MD |
368 | start = (int)((iter->bnext - key) >> |
369 | HAMMER2_FREEMAP_LEVEL0_RADIX); | |
93f3933a | 370 | KKASSERT(start >= 0 && start < HAMMER2_FREEMAP_COUNT); |
3f01ebaa | 371 | hammer2_chain_modify(chain, mtid, 0, 0); |
1a7cfe5a | 372 | |
65cacacf | 373 | error = HAMMER2_ERROR_ENOSPC; |
93f3933a | 374 | for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) { |
271568b1 MD |
375 | int availchk; |
376 | ||
93f3933a MD |
377 | if (start + count >= HAMMER2_FREEMAP_COUNT && |
378 | start - count < 0) { | |
1a7cfe5a | 379 | break; |
93f3933a | 380 | } |
271568b1 MD |
381 | |
382 | /* | |
65cacacf MD |
383 | * Calculate bmap pointer from thart starting index |
384 | * forwards. | |
271568b1 MD |
385 | * |
386 | * NOTE: bmap pointer is invalid if n >= FREEMAP_COUNT. | |
387 | */ | |
93f3933a MD |
388 | n = start + count; |
389 | bmap = &chain->data->bmdata[n]; | |
271568b1 MD |
390 | |
391 | if (n >= HAMMER2_FREEMAP_COUNT) { | |
392 | availchk = 0; | |
393 | } else if (bmap->avail) { | |
394 | availchk = 1; | |
395 | } else if (radix < HAMMER2_FREEMAP_BLOCK_RADIX && | |
396 | (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK)) { | |
397 | availchk = 1; | |
398 | } else { | |
399 | availchk = 0; | |
400 | } | |
401 | ||
65cacacf MD |
402 | /* |
403 | * Try to allocate from a matching freemap class | |
404 | * superblock. If we are in relaxed mode we allocate | |
405 | * from any freemap class superblock. | |
406 | */ | |
271568b1 | 407 | if (availchk && |
65cacacf MD |
408 | (bmap->class == 0 || bmap->class == class || |
409 | iter->relaxed)) { | |
93f3933a | 410 | base_key = key + n * l0size; |
c603b86b | 411 | error = hammer2_bmap_alloc(hmp, bmap, |
aff72996 MD |
412 | class, n, |
413 | (int)bref->key, | |
414 | radix, | |
91abd410 | 415 | &base_key); |
65cacacf | 416 | if (error != HAMMER2_ERROR_ENOSPC) { |
93f3933a MD |
417 | key = base_key; |
418 | break; | |
419 | } | |
420 | } | |
271568b1 MD |
421 | |
422 | /* | |
c4fd5af3 | 423 | * Calculate bmap pointer from the starting index |
65cacacf MD |
424 | * backwards (locality). |
425 | * | |
271568b1 MD |
426 | * Must recalculate after potentially having called |
427 | * hammer2_bmap_alloc() above in case chain was | |
428 | * reallocated. | |
429 | * | |
430 | * NOTE: bmap pointer is invalid if n < 0. | |
431 | */ | |
93f3933a MD |
432 | n = start - count; |
433 | bmap = &chain->data->bmdata[n]; | |
271568b1 MD |
434 | if (n < 0) { |
435 | availchk = 0; | |
436 | } else if (bmap->avail) { | |
437 | availchk = 1; | |
438 | } else if (radix < HAMMER2_FREEMAP_BLOCK_RADIX && | |
439 | (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK)) { | |
440 | availchk = 1; | |
441 | } else { | |
442 | availchk = 0; | |
443 | } | |
444 | ||
65cacacf MD |
445 | /* |
446 | * Try to allocate from a matching freemap class | |
447 | * superblock. If we are in relaxed mode we allocate | |
448 | * from any freemap class superblock. | |
449 | */ | |
271568b1 | 450 | if (availchk && |
65cacacf MD |
451 | (bmap->class == 0 || bmap->class == class || |
452 | iter->relaxed)) { | |
93f3933a | 453 | base_key = key + n * l0size; |
c603b86b | 454 | error = hammer2_bmap_alloc(hmp, bmap, |
aff72996 MD |
455 | class, n, |
456 | (int)bref->key, | |
457 | radix, | |
91abd410 | 458 | &base_key); |
65cacacf | 459 | if (error != HAMMER2_ERROR_ENOSPC) { |
93f3933a MD |
460 | key = base_key; |
461 | break; | |
462 | } | |
463 | } | |
1a7cfe5a | 464 | } |
65cacacf MD |
465 | |
466 | /* | |
467 | * We only know for sure that we can clear the bitmap bit | |
c4fd5af3 MD |
468 | * if we scanned the entire array (start == 0) in relaxed |
469 | * mode. | |
65cacacf | 470 | */ |
c4fd5af3 MD |
471 | if (error == HAMMER2_ERROR_ENOSPC && |
472 | start == 0 && | |
473 | iter->relaxed) | |
474 | { | |
5cebbe36 MD |
475 | chain->bref.check.freemap.bigmask &= |
476 | (uint32_t)~((size_t)1 << radix); | |
477 | } | |
93f3933a | 478 | /* XXX also scan down from original count */ |
1a7cfe5a MD |
479 | } |
480 | ||
1a7cfe5a MD |
481 | if (error == 0) { |
482 | /* | |
483 | * Assert validity. Must be beyond the static allocator used | |
484 | * by newfs_hammer2 (and thus also beyond the aux area), | |
485 | * not go past the volume size, and must not be in the | |
486 | * reserved segment area for a zone. | |
487 | */ | |
488 | KKASSERT(key >= hmp->voldata.allocator_beg && | |
0b738157 | 489 | key + bytes <= hmp->total_size); |
1a7cfe5a | 490 | KKASSERT((key & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG); |
1a7cfe5a | 491 | bref->data_off = key | radix; |
0b8efeb7 MD |
492 | |
493 | /* | |
494 | * Record dedupability. The dedup bits are cleared | |
495 | * when bulkfree transitions the freemap from 11->10, | |
496 | * and asserted to be clear on the 10->00 transition. | |
497 | * | |
498 | * We must record the bitmask with the chain locked | |
499 | * at the time we set the allocation bits to avoid | |
500 | * racing a bulkfree. | |
501 | */ | |
502 | if (bref->type == HAMMER2_BREF_TYPE_DATA) | |
503 | hammer2_io_dedup_set(hmp, bref); | |
1a7cfe5a | 504 | #if 0 |
93f3933a | 505 | kprintf("alloc cp=%p %016jx %016jx using %016jx\n", |
1a7cfe5a | 506 | chain, |
93f3933a | 507 | bref->key, bref->data_off, chain->bref.data_off); |
1a7cfe5a | 508 | #endif |
65cacacf | 509 | } else if (error == HAMMER2_ERROR_ENOSPC) { |
1a7cfe5a | 510 | /* |
91abd410 | 511 | * Return EAGAIN with next iteration in iter->bnext, or |
1a7cfe5a MD |
512 | * return ENOSPC if the allocation map has been exhausted. |
513 | */ | |
c603b86b | 514 | error = hammer2_freemap_iterate(parentp, &chain, iter); |
1a7cfe5a MD |
515 | } |
516 | ||
517 | /* | |
518 | * Cleanup | |
519 | */ | |
e513e77e | 520 | if (chain) { |
1a7cfe5a | 521 | hammer2_chain_unlock(chain); |
e513e77e MD |
522 | hammer2_chain_drop(chain); |
523 | } | |
1a7cfe5a MD |
524 | return (error); |
525 | } | |
526 | ||
93f3933a MD |
527 | /* |
528 | * Allocate (1<<radix) bytes from the bmap whos base data offset is (*basep). | |
529 | * | |
530 | * If the linear iterator is mid-block we use it directly (the bitmap should | |
aff72996 MD |
531 | * already be marked allocated), otherwise we search for a block in the |
532 | * bitmap that fits the allocation request. | |
93f3933a MD |
533 | * |
534 | * A partial bitmap allocation sets the minimum bitmap granularity (16KB) | |
535 | * to fully allocated and adjusts the linear allocator to allow the | |
536 | * remaining space to be allocated. | |
aff72996 MD |
537 | * |
538 | * sub_key is the lower 32 bits of the chain->bref.key for the chain whos | |
539 | * bref is being allocated. If the radix represents an allocation >= 16KB | |
540 | * (aka HAMMER2_FREEMAP_BLOCK_RADIX) we try to use this key to select the | |
541 | * blocks directly out of the bmap. | |
93f3933a MD |
542 | */ |
543 | static | |
544 | int | |
c603b86b | 545 | hammer2_bmap_alloc(hammer2_dev_t *hmp, hammer2_bmap_data_t *bmap, |
aff72996 MD |
546 | uint16_t class, int n, int sub_key, |
547 | int radix, hammer2_key_t *basep) | |
93f3933a | 548 | { |
93f3933a | 549 | size_t size; |
271568b1 | 550 | size_t bgsize; |
93f3933a | 551 | int bmradix; |
5cebbe36 | 552 | hammer2_bitmap_t bmmask; |
93f3933a MD |
553 | int offset; |
554 | int i; | |
555 | int j; | |
556 | ||
557 | /* | |
558 | * Take into account 2-bits per block when calculating bmradix. | |
559 | */ | |
560 | size = (size_t)1 << radix; | |
91abd410 | 561 | |
93f3933a MD |
562 | if (radix <= HAMMER2_FREEMAP_BLOCK_RADIX) { |
563 | bmradix = 2; | |
93f3933a MD |
564 | /* (16K) 2 bits per allocation block */ |
565 | } else { | |
5cebbe36 MD |
566 | bmradix = (hammer2_bitmap_t)2 << |
567 | (radix - HAMMER2_FREEMAP_BLOCK_RADIX); | |
e12df3bc | 568 | /* (32K-64K) 4, 8 bits per allocation block */ |
93f3933a MD |
569 | } |
570 | ||
571 | /* | |
91abd410 MD |
572 | * Use the linear iterator to pack small allocations, otherwise |
573 | * fall-back to finding a free 16KB chunk. The linear iterator | |
574 | * is only valid when *NOT* on a freemap chunking boundary (16KB). | |
575 | * If it is the bitmap must be scanned. It can become invalid | |
576 | * once we pack to the boundary. We adjust it after a bitmap | |
577 | * allocation only for sub-16KB allocations (so the perfectly good | |
578 | * previous value can still be used for fragments when 16KB+ | |
b49f72c3 | 579 | * allocations are made inbetween fragmentary allocations). |
91abd410 | 580 | * |
5cebbe36 | 581 | * Beware of hardware artifacts when bmradix == 64 (intermediate |
91abd410 | 582 | * result can wind up being '1' instead of '0' if hardware masks |
b49f72c3 | 583 | * bit-count & 63). |
93f3933a MD |
584 | * |
585 | * NOTE: j needs to be even in the j= calculation. As an artifact | |
586 | * of the /2 division, our bitmask has to clear bit 0. | |
91abd410 MD |
587 | * |
588 | * NOTE: TODO this can leave little unallocatable fragments lying | |
589 | * around. | |
93f3933a | 590 | */ |
91abd410 MD |
591 | if (((uint32_t)bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) + size <= |
592 | HAMMER2_FREEMAP_BLOCK_SIZE && | |
2c6f594d | 593 | (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) && |
91abd410 | 594 | bmap->linear < HAMMER2_SEGSIZE) { |
aff72996 MD |
595 | /* |
596 | * Use linear iterator if it is not block-aligned to avoid | |
597 | * wasting space. | |
83815ec6 MD |
598 | * |
599 | * Calculate the bitmapq[] index (i) and calculate the | |
600 | * shift count within the 64-bit bitmapq[] entry. | |
601 | * | |
602 | * The freemap block size is 16KB, but each bitmap | |
603 | * entry is two bits so use a little trick to get | |
604 | * a (j) shift of 0, 2, 4, ... 62 in 16KB chunks. | |
aff72996 | 605 | */ |
93f3933a | 606 | KKASSERT(bmap->linear >= 0 && |
91abd410 | 607 | bmap->linear + size <= HAMMER2_SEGSIZE && |
50456506 | 608 | (bmap->linear & (HAMMER2_ALLOC_MIN - 1)) == 0); |
93f3933a | 609 | offset = bmap->linear; |
83815ec6 MD |
610 | i = offset / (HAMMER2_SEGSIZE / HAMMER2_BMAP_ELEMENTS); |
611 | j = (offset / (HAMMER2_FREEMAP_BLOCK_SIZE / 2)) & 62; | |
5cebbe36 MD |
612 | bmmask = (bmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? |
613 | HAMMER2_BMAP_ALLONES : | |
614 | ((hammer2_bitmap_t)1 << bmradix) - 1; | |
93f3933a | 615 | bmmask <<= j; |
91abd410 | 616 | bmap->linear = offset + size; |
93f3933a | 617 | } else { |
aff72996 MD |
618 | /* |
619 | * Try to index a starting point based on sub_key. This | |
620 | * attempts to restore sequential block ordering on-disk | |
621 | * whenever possible, even if data is committed out of | |
622 | * order. | |
623 | * | |
624 | * i - Index bitmapq[], full data range represented is | |
625 | * HAMMER2_BMAP_SIZE. | |
626 | * | |
627 | * j - Index within bitmapq[i], full data range represented is | |
628 | * HAMMER2_BMAP_INDEX_SIZE. | |
629 | * | |
630 | * WARNING! | |
631 | */ | |
632 | i = -1; | |
633 | j = -1; | |
634 | ||
635 | switch(class >> 8) { | |
636 | case HAMMER2_BREF_TYPE_DATA: | |
637 | if (radix >= HAMMER2_FREEMAP_BLOCK_RADIX) { | |
638 | i = (sub_key & HAMMER2_BMAP_MASK) / | |
639 | (HAMMER2_BMAP_SIZE / HAMMER2_BMAP_ELEMENTS); | |
640 | j = (sub_key & HAMMER2_BMAP_INDEX_MASK) / | |
641 | (HAMMER2_BMAP_INDEX_SIZE / | |
642 | HAMMER2_BMAP_BLOCKS_PER_ELEMENT); | |
643 | j = j * 2; | |
644 | } | |
645 | break; | |
646 | case HAMMER2_BREF_TYPE_INODE: | |
647 | break; | |
648 | default: | |
649 | break; | |
650 | } | |
651 | if (i >= 0) { | |
652 | KKASSERT(i < HAMMER2_BMAP_ELEMENTS && | |
653 | j < 2 * HAMMER2_BMAP_BLOCKS_PER_ELEMENT); | |
654 | KKASSERT(j + bmradix <= HAMMER2_BMAP_BITS_PER_ELEMENT); | |
655 | bmmask = (bmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? | |
656 | HAMMER2_BMAP_ALLONES : | |
657 | ((hammer2_bitmap_t)1 << bmradix) - 1; | |
658 | bmmask <<= j; | |
659 | ||
660 | if ((bmap->bitmapq[i] & bmmask) == 0) | |
661 | goto success; | |
662 | } | |
663 | ||
664 | /* | |
665 | * General element scan. | |
666 | * | |
667 | * WARNING: (j) is iterating a bit index (by 2's) | |
668 | */ | |
5cebbe36 MD |
669 | for (i = 0; i < HAMMER2_BMAP_ELEMENTS; ++i) { |
670 | bmmask = (bmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? | |
671 | HAMMER2_BMAP_ALLONES : | |
672 | ((hammer2_bitmap_t)1 << bmradix) - 1; | |
673 | for (j = 0; | |
674 | j < HAMMER2_BMAP_BITS_PER_ELEMENT; | |
675 | j += bmradix) { | |
676 | if ((bmap->bitmapq[i] & bmmask) == 0) | |
93f3933a MD |
677 | goto success; |
678 | bmmask <<= bmradix; | |
679 | } | |
680 | } | |
91abd410 MD |
681 | /*fragments might remain*/ |
682 | /*KKASSERT(bmap->avail == 0);*/ | |
65cacacf | 683 | return (HAMMER2_ERROR_ENOSPC); |
93f3933a | 684 | success: |
5cebbe36 | 685 | offset = i * (HAMMER2_SEGSIZE / HAMMER2_BMAP_ELEMENTS) + |
93f3933a | 686 | (j * (HAMMER2_FREEMAP_BLOCK_SIZE / 2)); |
91abd410 MD |
687 | if (size & HAMMER2_FREEMAP_BLOCK_MASK) |
688 | bmap->linear = offset + size; | |
93f3933a MD |
689 | } |
690 | ||
5cebbe36 MD |
691 | /* 8 x (64/2) -> 256 x 16K -> 4MB */ |
692 | KKASSERT(i >= 0 && i < HAMMER2_BMAP_ELEMENTS); | |
93f3933a MD |
693 | |
694 | /* | |
695 | * Optimize the buffer cache to avoid unnecessary read-before-write | |
696 | * operations. | |
91abd410 MD |
697 | * |
698 | * The device block size could be larger than the allocation size | |
699 | * so the actual bitmap test is somewhat more involved. We have | |
700 | * to use a compatible buffer size for this operation. | |
93f3933a | 701 | */ |
5cebbe36 | 702 | if ((bmap->bitmapq[i] & bmmask) == 0 && |
ee78054e TK |
703 | HAMMER2_PBUFSIZE != size) { |
704 | size_t psize = HAMMER2_PBUFSIZE; | |
91abd410 | 705 | hammer2_off_t pmask = (hammer2_off_t)psize - 1; |
5cebbe36 | 706 | int pbmradix = (hammer2_bitmap_t)2 << |
ee78054e | 707 | (HAMMER2_PBUFRADIX - |
5cebbe36 MD |
708 | HAMMER2_FREEMAP_BLOCK_RADIX); |
709 | hammer2_bitmap_t pbmmask; | |
fdf62707 | 710 | int pradix = hammer2_getradix(psize); |
91abd410 | 711 | |
5cebbe36 MD |
712 | pbmmask = (pbmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? |
713 | HAMMER2_BMAP_ALLONES : | |
714 | ((hammer2_bitmap_t)1 << pbmradix) - 1; | |
91abd410 MD |
715 | while ((pbmmask & bmmask) == 0) |
716 | pbmmask <<= pbmradix; | |
717 | ||
2c6f594d | 718 | #if 0 |
5cebbe36 MD |
719 | kprintf("%016jx mask %016jx %016jx %016jx (%zd/%zd)\n", |
720 | *basep + offset, bmap->bitmapq[i], | |
2c6f594d MD |
721 | pbmmask, bmmask, size, psize); |
722 | #endif | |
723 | ||
5cebbe36 | 724 | if ((bmap->bitmapq[i] & pbmmask) == 0) { |
85b1f267 MD |
725 | hammer2_io_t *dio; |
726 | ||
727 | hammer2_io_newnz(hmp, class >> 8, | |
ba38182b | 728 | (*basep + (offset & ~pmask)) | |
85b1f267 MD |
729 | pradix, psize, &dio); |
730 | hammer2_io_putblk(&dio); | |
93f3933a MD |
731 | } |
732 | } | |
733 | ||
734 | #if 0 | |
735 | /* | |
736 | * When initializing a new inode segment also attempt to initialize | |
737 | * an adjacent segment. Be careful not to index beyond the array | |
738 | * bounds. | |
739 | * | |
740 | * We do this to try to localize inode accesses to improve | |
741 | * directory scan rates. XXX doesn't improve scan rates. | |
742 | */ | |
743 | if (size == HAMMER2_INODE_BYTES) { | |
744 | if (n & 1) { | |
745 | if (bmap[-1].radix == 0 && bmap[-1].avail) | |
746 | bmap[-1].radix = radix; | |
747 | } else { | |
748 | if (bmap[1].radix == 0 && bmap[1].avail) | |
749 | bmap[1].radix = radix; | |
750 | } | |
751 | } | |
752 | #endif | |
271568b1 MD |
753 | /* |
754 | * Calculate the bitmap-granular change in bgsize for the volume | |
755 | * header. We cannot use the fine-grained change here because | |
756 | * the bulkfree code can't undo it. If the bitmap element is already | |
757 | * marked allocated it has already been accounted for. | |
758 | */ | |
759 | if (radix < HAMMER2_FREEMAP_BLOCK_RADIX) { | |
5cebbe36 | 760 | if (bmap->bitmapq[i] & bmmask) |
271568b1 MD |
761 | bgsize = 0; |
762 | else | |
763 | bgsize = HAMMER2_FREEMAP_BLOCK_SIZE; | |
764 | } else { | |
765 | bgsize = size; | |
766 | } | |
93f3933a MD |
767 | |
768 | /* | |
271568b1 MD |
769 | * Adjust the bitmap, set the class (it might have been 0), |
770 | * and available bytes, update the allocation offset (*basep) | |
771 | * from the L0 base to the actual offset. | |
772 | * | |
65cacacf MD |
773 | * Do not override the class if doing a relaxed class allocation. |
774 | * | |
271568b1 MD |
775 | * avail must reflect the bitmap-granular availability. The allocator |
776 | * tests will also check the linear iterator. | |
93f3933a | 777 | */ |
5cebbe36 | 778 | bmap->bitmapq[i] |= bmmask; |
65cacacf MD |
779 | if (bmap->class == 0) |
780 | bmap->class = class; | |
271568b1 | 781 | bmap->avail -= bgsize; |
93f3933a MD |
782 | *basep += offset; |
783 | ||
271568b1 MD |
784 | /* |
785 | * Adjust the volume header's allocator_free parameter. This | |
786 | * parameter has to be fixed up by bulkfree which has no way to | |
787 | * figure out sub-16K chunking, so it must be adjusted by the | |
788 | * bitmap-granular size. | |
789 | */ | |
790 | if (bgsize) { | |
791 | hammer2_voldata_lock(hmp); | |
792 | hammer2_voldata_modify(hmp); | |
793 | hmp->voldata.allocator_free -= bgsize; | |
794 | hammer2_voldata_unlock(hmp); | |
795 | } | |
9f604b01 | 796 | |
93f3933a MD |
797 | return(0); |
798 | } | |
799 | ||
65cacacf MD |
800 | /* |
801 | * Initialize a freemap for the storage area (in bytes) that begins at (key). | |
802 | */ | |
93f3933a MD |
803 | static |
804 | void | |
c603b86b MD |
805 | hammer2_freemap_init(hammer2_dev_t *hmp, hammer2_key_t key, |
806 | hammer2_chain_t *chain) | |
93f3933a | 807 | { |
93f3933a MD |
808 | hammer2_off_t lokey; |
809 | hammer2_off_t hikey; | |
810 | hammer2_bmap_data_t *bmap; | |
811 | int count; | |
812 | ||
93f3933a | 813 | /* |
65cacacf | 814 | * Calculate the portion of the 1GB map that should be initialized |
93f3933a MD |
815 | * as free. Portions below or after will be initialized as allocated. |
816 | * SEGMASK-align the areas so we don't have to worry about sub-scans | |
817 | * or endianess when using memset. | |
818 | * | |
93f3933a MD |
819 | * WARNING! It is possible for lokey to be larger than hikey if the |
820 | * entire 2GB segment is within the static allocation. | |
821 | */ | |
65cacacf MD |
822 | /* |
823 | * (1) Ensure that all statically allocated space from newfs_hammer2 | |
824 | * is marked allocated, and take it up to the level1 base for | |
825 | * this key. | |
826 | */ | |
a5913bdf | 827 | lokey = (hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) & |
93f3933a | 828 | ~HAMMER2_SEGMASK64; |
65cacacf MD |
829 | if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX)) |
830 | lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX); | |
93f3933a | 831 | |
65cacacf MD |
832 | /* |
833 | * (2) Ensure that the reserved area is marked allocated (typically | |
834 | * the first 4MB of each 2GB area being represented). Since | |
835 | * each LEAF represents 1GB of storage and the zone is 2GB, we | |
836 | * have to adjust lowkey upward every other LEAF sequentially. | |
837 | */ | |
838 | if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64) | |
839 | lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64; | |
93f3933a | 840 | |
65cacacf MD |
841 | /* |
842 | * (3) Ensure that any trailing space at the end-of-volume is marked | |
843 | * allocated. | |
844 | */ | |
4e47c34e | 845 | hikey = key + HAMMER2_FREEMAP_LEVEL1_SIZE; |
0b738157 TK |
846 | if (hikey > hmp->total_size) { |
847 | hikey = hmp->total_size & ~HAMMER2_SEGMASK64; | |
93f3933a MD |
848 | } |
849 | ||
65cacacf MD |
850 | /* |
851 | * Heuristic highest possible value | |
852 | */ | |
4e47c34e | 853 | chain->bref.check.freemap.avail = HAMMER2_FREEMAP_LEVEL1_SIZE; |
93f3933a MD |
854 | bmap = &chain->data->bmdata[0]; |
855 | ||
65cacacf MD |
856 | /* |
857 | * Initialize bitmap (bzero'd by caller) | |
858 | */ | |
93f3933a MD |
859 | for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) { |
860 | if (key < lokey || key >= hikey) { | |
5cebbe36 MD |
861 | memset(bmap->bitmapq, -1, |
862 | sizeof(bmap->bitmapq)); | |
93f3933a | 863 | bmap->avail = 0; |
2c6f594d | 864 | bmap->linear = HAMMER2_SEGSIZE; |
93f3933a | 865 | chain->bref.check.freemap.avail -= |
4e47c34e | 866 | HAMMER2_FREEMAP_LEVEL0_SIZE; |
93f3933a | 867 | } else { |
4e47c34e | 868 | bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE; |
93f3933a | 869 | } |
4e47c34e | 870 | key += HAMMER2_FREEMAP_LEVEL0_SIZE; |
93f3933a MD |
871 | ++bmap; |
872 | } | |
873 | } | |
874 | ||
875 | /* | |
876 | * The current Level 1 freemap has been exhausted, iterate to the next | |
877 | * one, return ENOSPC if no freemaps remain. | |
878 | * | |
65cacacf MD |
879 | * At least two loops are required. If we are not in relaxed mode and |
880 | * we run out of storage we enter relaxed mode and do a third loop. | |
881 | * The relaxed mode is recorded back in the hmp so once we enter the mode | |
882 | * we remain relaxed until stuff begins to get freed and only do 2 loops. | |
883 | * | |
93f3933a MD |
884 | * XXX this should rotate back to the beginning to handle freed-up space |
885 | * XXX or use intermediate entries to locate free space. TODO | |
886 | */ | |
1a7cfe5a | 887 | static int |
c603b86b MD |
888 | hammer2_freemap_iterate(hammer2_chain_t **parentp, hammer2_chain_t **chainp, |
889 | hammer2_fiterate_t *iter) | |
1a7cfe5a | 890 | { |
506bd6d1 | 891 | hammer2_dev_t *hmp = (*parentp)->hmp; |
a5913bdf | 892 | |
20a886d5 | 893 | iter->bnext &= ~HAMMER2_FREEMAP_LEVEL1_MASK; |
4e47c34e | 894 | iter->bnext += HAMMER2_FREEMAP_LEVEL1_SIZE; |
0b738157 | 895 | if (iter->bnext >= hmp->total_size) { |
91abd410 | 896 | iter->bnext = 0; |
65cacacf MD |
897 | if (++iter->loops >= 2) { |
898 | if (iter->relaxed == 0) | |
899 | iter->relaxed = 1; | |
900 | else | |
901 | return (HAMMER2_ERROR_ENOSPC); | |
902 | } | |
91abd410 | 903 | } |
65cacacf | 904 | return(HAMMER2_ERROR_EAGAIN); |
5c23d7f1 MD |
905 | } |
906 | ||
91abd410 | 907 | /* |
877eacb6 MD |
908 | * Adjust the bit-pattern for data in the freemap bitmap according to |
909 | * (how). This code is called from on-mount recovery to fixup (mark | |
910 | * as allocated) blocks whos freemap upates might not have been committed | |
911 | * in the last crash and is used by the bulk freemap scan to stage frees. | |
91abd410 | 912 | * |
da0cdd33 MD |
913 | * WARNING! Cannot be called with a empty-data bref (radix == 0). |
914 | * | |
91abd410 MD |
915 | * XXX currently disabled when how == 0 (the normal real-time case). At |
916 | * the moment we depend on the bulk freescan to actually free blocks. It | |
917 | * will still call this routine with a non-zero how to stage possible frees | |
918 | * and to do the actual free. | |
919 | */ | |
004f88b4 | 920 | void |
e2163f5b MD |
921 | hammer2_freemap_adjust(hammer2_dev_t *hmp, hammer2_blockref_t *bref, |
922 | int how) | |
004f88b4 | 923 | { |
91abd410 MD |
924 | hammer2_off_t data_off = bref->data_off; |
925 | hammer2_chain_t *chain; | |
926 | hammer2_chain_t *parent; | |
927 | hammer2_bmap_data_t *bmap; | |
928 | hammer2_key_t key; | |
1897c66e | 929 | hammer2_key_t key_dummy; |
91abd410 MD |
930 | hammer2_off_t l1size; |
931 | hammer2_off_t l1mask; | |
e2163f5b | 932 | hammer2_tid_t mtid; |
5cebbe36 MD |
933 | hammer2_bitmap_t *bitmap; |
934 | const hammer2_bitmap_t bmmask00 = 0; | |
0a7ab006 TK |
935 | //hammer2_bitmap_t bmmask01; |
936 | //hammer2_bitmap_t bmmask10; | |
5cebbe36 | 937 | hammer2_bitmap_t bmmask11; |
91abd410 MD |
938 | size_t bytes; |
939 | uint16_t class; | |
004f88b4 | 940 | int radix; |
91abd410 MD |
941 | int start; |
942 | int count; | |
943 | int modified = 0; | |
10136ab6 | 944 | int error; |
470dad14 | 945 | size_t bgsize = 0; |
004f88b4 | 946 | |
271568b1 MD |
947 | KKASSERT(how == HAMMER2_FREEMAP_DORECOVER); |
948 | ||
65c894ff | 949 | KKASSERT(hmp->spmp); |
e2163f5b MD |
950 | mtid = hammer2_trans_sub(hmp->spmp); |
951 | ||
004f88b4 | 952 | radix = (int)data_off & HAMMER2_OFF_MASK_RADIX; |
da0cdd33 | 953 | KKASSERT(radix != 0); |
004f88b4 | 954 | data_off &= ~HAMMER2_OFF_MASK_RADIX; |
50456506 | 955 | KKASSERT(radix <= HAMMER2_RADIX_MAX); |
91abd410 | 956 | |
da0cdd33 MD |
957 | if (radix) |
958 | bytes = (size_t)1 << radix; | |
959 | else | |
960 | bytes = 0; | |
ee78054e | 961 | class = (bref->type << 8) | HAMMER2_PBUFRADIX; |
91abd410 | 962 | |
355d67fc | 963 | /* |
da0cdd33 | 964 | * We can't adjust the freemap for data allocations made by |
10136ab6 | 965 | * newfs_hammer2. |
355d67fc MD |
966 | */ |
967 | if (data_off < hmp->voldata.allocator_beg) | |
968 | return; | |
355d67fc | 969 | |
10136ab6 | 970 | KKASSERT((data_off & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG); |
a7720be7 | 971 | |
91abd410 MD |
972 | /* |
973 | * Lookup the level1 freemap chain. The chain must exist. | |
974 | */ | |
975 | key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL1_RADIX); | |
4e47c34e | 976 | l1size = HAMMER2_FREEMAP_LEVEL1_SIZE; |
91abd410 MD |
977 | l1mask = l1size - 1; |
978 | ||
979 | parent = &hmp->fchain; | |
e513e77e | 980 | hammer2_chain_ref(parent); |
91abd410 MD |
981 | hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); |
982 | ||
1897c66e | 983 | chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask, |
c8c0a18a | 984 | &error, |
91abd410 | 985 | HAMMER2_LOOKUP_ALWAYS | |
b8ba9690 | 986 | HAMMER2_LOOKUP_MATCHIND); |
10136ab6 MD |
987 | |
988 | /* | |
989 | * Stop early if we are trying to free something but no leaf exists. | |
990 | */ | |
991 | if (chain == NULL && how != HAMMER2_FREEMAP_DORECOVER) { | |
992 | kprintf("hammer2_freemap_adjust: %016jx: no chain\n", | |
91abd410 | 993 | (intmax_t)bref->data_off); |
623d43d4 | 994 | goto done; |
91abd410 | 995 | } |
b93cc2e0 MD |
996 | if (chain->error) { |
997 | kprintf("hammer2_freemap_adjust: %016jx: error %s\n", | |
998 | (intmax_t)bref->data_off, | |
999 | hammer2_error_str(chain->error)); | |
1000 | hammer2_chain_unlock(chain); | |
e513e77e | 1001 | hammer2_chain_drop(chain); |
b93cc2e0 MD |
1002 | chain = NULL; |
1003 | goto done; | |
1004 | } | |
91abd410 MD |
1005 | |
1006 | /* | |
10136ab6 MD |
1007 | * Create any missing leaf(s) if we are doing a recovery (marking |
1008 | * the block(s) as being allocated instead of being freed). Be sure | |
1009 | * to initialize the auxillary freemap tracking info in the | |
1010 | * bref.check.freemap structure. | |
91abd410 | 1011 | */ |
10136ab6 | 1012 | if (chain == NULL && how == HAMMER2_FREEMAP_DORECOVER) { |
ecfe89b8 MD |
1013 | error = hammer2_chain_create(&parent, &chain, NULL, hmp->spmp, |
1014 | HAMMER2_METH_DEFAULT, | |
10136ab6 MD |
1015 | key, HAMMER2_FREEMAP_LEVEL1_RADIX, |
1016 | HAMMER2_BREF_TYPE_FREEMAP_LEAF, | |
b3659de2 | 1017 | HAMMER2_FREEMAP_LEVELN_PSIZE, |
3f01ebaa | 1018 | mtid, 0, 0); |
1fca819a MD |
1019 | |
1020 | if (hammer2_debug & 0x0040) { | |
1021 | kprintf("fixup create chain %p %016jx:%d\n", | |
1022 | chain, chain->bref.key, chain->bref.keybits); | |
1023 | } | |
10136ab6 MD |
1024 | |
1025 | if (error == 0) { | |
65cacacf MD |
1026 | error = hammer2_chain_modify(chain, mtid, 0, 0); |
1027 | KKASSERT(error == 0); | |
10136ab6 MD |
1028 | bzero(&chain->data->bmdata[0], |
1029 | HAMMER2_FREEMAP_LEVELN_PSIZE); | |
1030 | chain->bref.check.freemap.bigmask = (uint32_t)-1; | |
1031 | chain->bref.check.freemap.avail = l1size; | |
1032 | /* bref.methods should already be inherited */ | |
91abd410 | 1033 | |
c603b86b | 1034 | hammer2_freemap_init(hmp, key, chain); |
10136ab6 MD |
1035 | } |
1036 | /* XXX handle error */ | |
1037 | } | |
1038 | ||
8138a154 MD |
1039 | #if FREEMAP_DEBUG |
1040 | kprintf("FREEMAP ADJUST TYPE %d %016jx/%d DATA_OFF=%016jx\n", | |
1041 | chain->bref.type, chain->bref.key, | |
1042 | chain->bref.keybits, chain->bref.data_off); | |
1043 | #endif | |
1044 | ||
10136ab6 MD |
1045 | /* |
1046 | * Calculate the bitmask (runs in 2-bit pairs). | |
1047 | */ | |
91abd410 | 1048 | start = ((int)(data_off >> HAMMER2_FREEMAP_BLOCK_RADIX) & 15) * 2; |
0a7ab006 TK |
1049 | //bmmask01 = (hammer2_bitmap_t)1 << start; |
1050 | //bmmask10 = (hammer2_bitmap_t)2 << start; | |
5cebbe36 | 1051 | bmmask11 = (hammer2_bitmap_t)3 << start; |
91abd410 MD |
1052 | |
1053 | /* | |
10136ab6 MD |
1054 | * Fixup the bitmap. Partial blocks cannot be fully freed unless |
1055 | * a bulk scan is able to roll them up. | |
91abd410 MD |
1056 | */ |
1057 | if (radix < HAMMER2_FREEMAP_BLOCK_RADIX) { | |
1058 | count = 1; | |
cfde0144 | 1059 | #if 0 |
10136ab6 MD |
1060 | if (how == HAMMER2_FREEMAP_DOREALFREE) |
1061 | how = HAMMER2_FREEMAP_DOMAYFREE; | |
cfde0144 | 1062 | #endif |
91abd410 MD |
1063 | } else { |
1064 | count = 1 << (radix - HAMMER2_FREEMAP_BLOCK_RADIX); | |
1065 | } | |
004f88b4 | 1066 | |
10136ab6 MD |
1067 | /* |
1068 | * [re]load the bmap and bitmap pointers. Each bmap entry covers | |
30b0abf3 | 1069 | * a 4MB swath. The bmap itself (LEVEL1) covers 2GB. |
8138a154 MD |
1070 | * |
1071 | * Be sure to reset the linear iterator to ensure that the adjustment | |
1072 | * is not ignored. | |
10136ab6 MD |
1073 | */ |
1074 | again: | |
1075 | bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) & | |
1076 | (HAMMER2_FREEMAP_COUNT - 1)]; | |
5cebbe36 | 1077 | bitmap = &bmap->bitmapq[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7]; |
a71db85d MD |
1078 | |
1079 | if (modified) | |
1080 | bmap->linear = 0; | |
10136ab6 | 1081 | |
91abd410 MD |
1082 | while (count) { |
1083 | KKASSERT(bmmask11); | |
10136ab6 MD |
1084 | if (how == HAMMER2_FREEMAP_DORECOVER) { |
1085 | /* | |
1086 | * Recovery request, mark as allocated. | |
1087 | */ | |
1088 | if ((*bitmap & bmmask11) != bmmask11) { | |
1089 | if (modified == 0) { | |
3f01ebaa | 1090 | hammer2_chain_modify(chain, mtid, 0, 0); |
10136ab6 MD |
1091 | modified = 1; |
1092 | goto again; | |
1093 | } | |
271568b1 MD |
1094 | if ((*bitmap & bmmask11) == bmmask00) { |
1095 | bmap->avail -= | |
1096 | HAMMER2_FREEMAP_BLOCK_SIZE; | |
470dad14 | 1097 | bgsize += HAMMER2_FREEMAP_BLOCK_SIZE; |
271568b1 | 1098 | } |
10136ab6 MD |
1099 | if (bmap->class == 0) |
1100 | bmap->class = class; | |
1101 | *bitmap |= bmmask11; | |
1fca819a | 1102 | if (hammer2_debug & 0x0040) { |
9670fc90 | 1103 | kprintf("hammer2_freemap_adjust: " |
1fca819a MD |
1104 | "fixup type=%02x " |
1105 | "block=%016jx/%zd\n", | |
1106 | bref->type, data_off, bytes); | |
1107 | } | |
10136ab6 MD |
1108 | } else { |
1109 | /* | |
9670fc90 | 1110 | kprintf("hammer2_freemap_adjust: good " |
10136ab6 MD |
1111 | "type=%02x block=%016jx/%zd\n", |
1112 | bref->type, data_off, bytes); | |
1113 | */ | |
1114 | } | |
271568b1 MD |
1115 | } |
1116 | #if 0 | |
1117 | /* | |
1118 | * XXX this stuff doesn't work, avail is miscalculated and | |
1119 | * code 10 means something else now. | |
1120 | */ | |
1121 | else if ((*bitmap & bmmask11) == bmmask11) { | |
10136ab6 MD |
1122 | /* |
1123 | * Mayfree/Realfree request and bitmap is currently | |
1124 | * marked as being fully allocated. | |
1125 | */ | |
91abd410 | 1126 | if (!modified) { |
c603b86b | 1127 | hammer2_chain_modify(chain, 0); |
91abd410 | 1128 | modified = 1; |
10136ab6 | 1129 | goto again; |
91abd410 | 1130 | } |
10136ab6 | 1131 | if (how == HAMMER2_FREEMAP_DOREALFREE) |
91abd410 MD |
1132 | *bitmap &= ~bmmask11; |
1133 | else | |
1134 | *bitmap = (*bitmap & ~bmmask11) | bmmask10; | |
1135 | } else if ((*bitmap & bmmask11) == bmmask10) { | |
10136ab6 MD |
1136 | /* |
1137 | * Mayfree/Realfree request and bitmap is currently | |
1138 | * marked as being possibly freeable. | |
1139 | */ | |
1140 | if (how == HAMMER2_FREEMAP_DOREALFREE) { | |
91abd410 | 1141 | if (!modified) { |
c603b86b | 1142 | hammer2_chain_modify(chain, 0); |
91abd410 | 1143 | modified = 1; |
10136ab6 | 1144 | goto again; |
91abd410 MD |
1145 | } |
1146 | *bitmap &= ~bmmask11; | |
1147 | } | |
10136ab6 MD |
1148 | } else { |
1149 | /* | |
1150 | * 01 - Not implemented, currently illegal state | |
1151 | * 00 - Not allocated at all, illegal free. | |
1152 | */ | |
1153 | panic("hammer2_freemap_adjust: " | |
1154 | "Illegal state %08x(%08x)", | |
1155 | *bitmap, *bitmap & bmmask11); | |
91abd410 | 1156 | } |
271568b1 | 1157 | #endif |
91abd410 | 1158 | --count; |
0a7ab006 TK |
1159 | //bmmask01 <<= 2; |
1160 | //bmmask10 <<= 2; | |
91abd410 MD |
1161 | bmmask11 <<= 2; |
1162 | } | |
cfde0144 | 1163 | #if 0 |
5cebbe36 MD |
1164 | #if HAMMER2_BMAP_ELEMENTS != 8 |
1165 | #error "hammer2_freemap.c: HAMMER2_BMAP_ELEMENTS expected to be 8" | |
1166 | #endif | |
10136ab6 | 1167 | if (how == HAMMER2_FREEMAP_DOREALFREE && modified) { |
91abd410 MD |
1168 | bmap->avail += 1 << radix; |
1169 | KKASSERT(bmap->avail <= HAMMER2_SEGSIZE); | |
1170 | if (bmap->avail == HAMMER2_SEGSIZE && | |
5cebbe36 MD |
1171 | bmap->bitmapq[0] == 0 && |
1172 | bmap->bitmapq[1] == 0 && | |
1173 | bmap->bitmapq[2] == 0 && | |
1174 | bmap->bitmapq[3] == 0 && | |
1175 | bmap->bitmapq[4] == 0 && | |
1176 | bmap->bitmapq[5] == 0 && | |
1177 | bmap->bitmapq[6] == 0 && | |
1178 | bmap->bitmapq[7] == 0) { | |
91abd410 MD |
1179 | key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL0_RADIX); |
1180 | kprintf("Freeseg %016jx\n", (intmax_t)key); | |
1181 | bmap->class = 0; | |
1182 | } | |
004f88b4 | 1183 | } |
cfde0144 | 1184 | #endif |
004f88b4 | 1185 | |
91abd410 MD |
1186 | /* |
1187 | * chain->bref.check.freemap.bigmask (XXX) | |
10136ab6 MD |
1188 | * |
1189 | * Setting bigmask is a hint to the allocation code that there might | |
1190 | * be something allocatable. We also set this in recovery... it | |
1191 | * doesn't hurt and we might want to use the hint for other validation | |
1192 | * operations later on. | |
65cacacf MD |
1193 | * |
1194 | * We could calculate the largest possible allocation and set the | |
a4cea70e | 1195 | * radixes that could fit, but its easier just to set bigmask to -1. |
91abd410 | 1196 | */ |
65cacacf MD |
1197 | if (modified) { |
1198 | chain->bref.check.freemap.bigmask = -1; | |
1199 | hmp->freemap_relaxed = 0; /* reset heuristic */ | |
1200 | } | |
1a7cfe5a | 1201 | |
91abd410 | 1202 | hammer2_chain_unlock(chain); |
e513e77e | 1203 | hammer2_chain_drop(chain); |
623d43d4 | 1204 | done: |
91abd410 | 1205 | hammer2_chain_unlock(parent); |
e513e77e | 1206 | hammer2_chain_drop(parent); |
470dad14 MD |
1207 | |
1208 | if (bgsize) { | |
1209 | hammer2_voldata_lock(hmp); | |
1210 | hammer2_voldata_modify(hmp); | |
1211 | hmp->voldata.allocator_free -= bgsize; | |
1212 | hammer2_voldata_unlock(hmp); | |
1213 | } | |
5c23d7f1 | 1214 | } |
bca9f8e6 MD |
1215 | |
1216 | /* | |
1217 | * Validate the freemap, in three stages. | |
1218 | * | |
1219 | * stage-1 ALLOCATED -> POSSIBLY FREE | |
1220 | * POSSIBLY FREE -> POSSIBLY FREE (type corrected) | |
1221 | * | |
1222 | * This transitions bitmap entries from ALLOCATED to POSSIBLY FREE. | |
1223 | * The POSSIBLY FREE state does not mean that a block is actually free | |
1224 | * and may be transitioned back to ALLOCATED in stage-2. | |
1225 | * | |
1226 | * This is typically done during normal filesystem operations when | |
1227 | * something is deleted or a block is replaced. | |
1228 | * | |
1229 | * This is done by bulkfree in-bulk after a memory-bounded meta-data | |
1230 | * scan to try to determine what might be freeable. | |
1231 | * | |
1232 | * This can be done unconditionally through a freemap scan when the | |
1233 | * intention is to brute-force recover the proper state of the freemap. | |
1234 | * | |
1235 | * stage-2 POSSIBLY FREE -> ALLOCATED (scan metadata topology) | |
1236 | * | |
1237 | * This is done by bulkfree during a meta-data scan to ensure that | |
1238 | * all blocks still actually allocated by the filesystem are marked | |
1239 | * as such. | |
1240 | * | |
1241 | * NOTE! Live filesystem transitions to POSSIBLY FREE can occur while | |
1242 | * the bulkfree stage-2 and stage-3 is running. The live filesystem | |
1243 | * will use the alternative POSSIBLY FREE type (2) to prevent | |
1244 | * stage-3 from improperly transitioning unvetted possibly-free | |
1245 | * blocks to FREE. | |
1246 | * | |
1247 | * stage-3 POSSIBLY FREE (type 1) -> FREE (scan freemap) | |
1248 | * | |
1249 | * This is done by bulkfree to finalize POSSIBLY FREE states. | |
1250 | * | |
1251 | */ |