hammer2 - Correct allocator race and related corruption
[dragonfly.git] / sys / vfs / hammer2 / hammer2_bulkfree.c
CommitLineData
3148f677 1/*
68b321c1 2 * Copyright (c) 2013-2018 The DragonFly Project. All rights reserved.
3148f677
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/fcntl.h>
38#include <sys/buf.h>
39#include <sys/proc.h>
40#include <sys/namei.h>
41#include <sys/mount.h>
42#include <sys/vnode.h>
43#include <sys/mountctl.h>
44#include <vm/vm_kern.h>
45#include <vm/vm_extern.h>
46
47#include "hammer2.h"
48
65cacacf
MD
49/*
50 * XXX I made a mistake and made the reserved area begin at each LEVEL1 zone,
51 * which is on a 1GB demark. This will eat a little more space but for
52 * now we retain compatibility and make FMZONEBASE every 1GB
53 */
746bb04f
AL
54#define H2FMZONEBASE(key) ((key) & ~HAMMER2_FREEMAP_LEVEL1_MASK)
55#define H2FMBASE(key, radix) ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
56#define H2FMSHIFT(radix) ((hammer2_off_t)1 << (radix))
470dad14 57
3148f677
MD
58/*
59 * breadth-first search
60 */
61typedef struct hammer2_chain_save {
62 TAILQ_ENTRY(hammer2_chain_save) entry;
125966e8
MD
63 hammer2_chain_t *chain;
64 int pri;
3148f677
MD
65} hammer2_chain_save_t;
66
67TAILQ_HEAD(hammer2_chain_save_list, hammer2_chain_save);
68typedef struct hammer2_chain_save_list hammer2_chain_save_list_t;
69
125966e8
MD
70typedef struct hammer2_bulkfree_info {
71 hammer2_dev_t *hmp;
72 kmem_anon_desc_t kp;
73 hammer2_off_t sbase; /* sub-loop iteration */
74 hammer2_off_t sstop;
75 hammer2_bmap_data_t *bmap;
76 int depth;
b5b45574
MD
77 long count_10_00; /* staged->free */
78 long count_11_10; /* allocated->staged */
79 long count_00_11; /* (should not happen) */
80 long count_01_11; /* (should not happen) */
81 long count_10_11; /* staged->allocated */
125966e8
MD
82 long count_l0cleans;
83 long count_linadjusts;
84 long count_inodes_scanned;
c152fb8d 85 long count_dirents_scanned;
125966e8 86 long count_dedup_factor;
c152fb8d
MD
87 long count_bytes_scanned;
88 long count_chains_scanned;
89 long count_chains_reported;
4ff0b408
MD
90 long bulkfree_calls;
91 int bulkfree_ticks;
125966e8
MD
92 hammer2_off_t adj_free;
93 hammer2_tid_t mtid;
94 hammer2_tid_t saved_mirror_tid;
95 time_t save_time;
96 hammer2_chain_save_list_t list;
97 hammer2_dedup_t *dedup;
98 int pri;
99} hammer2_bulkfree_info_t;
100
101static int h2_bulkfree_test(hammer2_bulkfree_info_t *info,
102 hammer2_blockref_t *bref, int pri);
103
3148f677
MD
104/*
105 * General bulk scan function with callback. Called with a referenced
125966e8 106 * but UNLOCKED parent. The parent is returned in the same state.
3148f677 107 */
125966e8 108static
3148f677 109int
c603b86b 110hammer2_bulk_scan(hammer2_chain_t *parent,
125966e8
MD
111 int (*func)(hammer2_bulkfree_info_t *info,
112 hammer2_blockref_t *bref),
113 hammer2_bulkfree_info_t *info)
3148f677 114{
125966e8
MD
115 hammer2_blockref_t bref;
116 hammer2_chain_t *chain;
125966e8 117 int first = 1;
1e2c8208
MD
118 int rup_error;
119 int error;
125966e8
MD
120
121 ++info->pri;
3148f677 122
125966e8
MD
123 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS |
124 HAMMER2_RESOLVE_SHARED);
125 chain = NULL;
1e2c8208
MD
126 rup_error = 0;
127 error = 0;
3148f677 128
125966e8
MD
129 /*
130 * Generally loop on the contents if we have not been flagged
131 * for abort.
132 *
133 * Remember that these chains are completely isolated from
134 * the frontend, so we can release locks temporarily without
135 * imploding.
136 */
1e2c8208
MD
137 for (;;) {
138 error |= hammer2_chain_scan(parent, &chain, &bref, &first,
1e2c8208
MD
139 HAMMER2_LOOKUP_NODATA |
140 HAMMER2_LOOKUP_SHARED);
141
142 /*
143 * Handle EOF or other error at current level. This stops
144 * the bulkfree scan.
145 */
146 if (error)
147 break;
148
4ff0b408
MD
149 /*
150 * Account for dirents before thre data_off test, since most
151 * dirents do not need a data reference.
152 */
c152fb8d
MD
153 if (bref.type == HAMMER2_BREF_TYPE_DIRENT)
154 ++info->count_dirents_scanned;
155
156 /*
157 * Ignore brefs without data (typically dirents)
158 */
159 if ((bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
160 continue;
161
125966e8
MD
162 /*
163 * Process bref, chain is only non-NULL if the bref
164 * might be recursable (its possible that we sometimes get
165 * a non-NULL chain where the bref cannot be recursed).
166 */
125966e8
MD
167 ++info->pri;
168 if (h2_bulkfree_test(info, &bref, 1))
169 continue;
3148f677 170
c152fb8d
MD
171 if (bref.type == HAMMER2_BREF_TYPE_INODE)
172 ++info->count_inodes_scanned;
173
1e2c8208
MD
174 error |= func(info, &bref);
175 if (error)
125966e8 176 break;
3148f677
MD
177
178 /*
125966e8
MD
179 * A non-null chain is always returned if it is
180 * recursive, otherwise a non-null chain might be
181 * returned but usually is not when not recursive.
3148f677 182 */
125966e8
MD
183 if (chain == NULL)
184 continue;
3148f677 185
c152fb8d
MD
186 if (chain) {
187 info->count_bytes_scanned += chain->bytes;
188 ++info->count_chains_scanned;
189
190 if (info->count_chains_scanned >=
944ddad0
MD
191 info->count_chains_reported + 1000000 ||
192 (info->count_chains_scanned < 1000000 &&
193 info->count_chains_scanned >=
194 info->count_chains_reported + 100000)) {
c152fb8d
MD
195 kprintf(" chains %-7ld inodes %-7ld "
196 "dirents %-7ld bytes %5ldMB\n",
197 info->count_chains_scanned,
198 info->count_inodes_scanned,
199 info->count_dirents_scanned,
200 info->count_bytes_scanned / 1000000);
944ddad0
MD
201 info->count_chains_reported =
202 info->count_chains_scanned;
c152fb8d
MD
203 }
204 }
205
206
3148f677 207 /*
125966e8
MD
208 * Else check type and setup depth-first scan.
209 *
210 * Account for bytes actually read.
3148f677 211 */
125966e8
MD
212 switch(chain->bref.type) {
213 case HAMMER2_BREF_TYPE_INODE:
214 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
215 case HAMMER2_BREF_TYPE_INDIRECT:
216 case HAMMER2_BREF_TYPE_VOLUME:
217 case HAMMER2_BREF_TYPE_FREEMAP:
218 ++info->depth;
219 if (info->depth > 16) {
220 hammer2_chain_save_t *save;
221 save = kmalloc(sizeof(*save), M_HAMMER2,
222 M_WAITOK | M_ZERO);
223 save->chain = chain;
3148f677 224 hammer2_chain_ref(chain);
125966e8
MD
225 TAILQ_INSERT_TAIL(&info->list, save, entry);
226
227 /* guess */
228 info->pri += 10;
229 } else {
230 int savepri = info->pri;
231
232 hammer2_chain_unlock(chain);
65cacacf 233 hammer2_chain_unlock(parent);
125966e8 234 info->pri = 0;
1e2c8208
MD
235 rup_error |=
236 hammer2_bulk_scan(chain, func, info);
125966e8 237 info->pri += savepri;
65cacacf
MD
238 hammer2_chain_lock(parent,
239 HAMMER2_RESOLVE_ALWAYS |
240 HAMMER2_RESOLVE_SHARED);
125966e8
MD
241 hammer2_chain_lock(chain,
242 HAMMER2_RESOLVE_ALWAYS |
243 HAMMER2_RESOLVE_SHARED);
3148f677 244 }
125966e8
MD
245 --info->depth;
246 break;
c152fb8d 247 case HAMMER2_BREF_TYPE_DATA:
c152fb8d 248 break;
125966e8
MD
249 default:
250 /* does not recurse */
251 break;
3148f677 252 }
1e2c8208
MD
253 if (rup_error & HAMMER2_ERROR_ABORTED)
254 break;
125966e8
MD
255 }
256 if (chain) {
257 hammer2_chain_unlock(chain);
258 hammer2_chain_drop(chain);
3148f677 259 }
d662271e
MD
260
261 /*
125966e8 262 * Save with higher pri now that we know what it is.
d662271e 263 */
125966e8
MD
264 h2_bulkfree_test(info, &parent->bref, info->pri + 1);
265
266 hammer2_chain_unlock(parent);
d662271e 267
1e2c8208 268 return ((error | rup_error) & ~HAMMER2_ERROR_EOF);
3148f677
MD
269}
270
271/*
21a90458 272 * Bulkfree algorithm
3148f677 273 *
21a90458 274 * Repeat {
53f84d31 275 * Chain flush (partial synchronization) XXX removed
21a90458
MD
276 * Scan the whole topology - build in-memory freemap (mark 11)
277 * Reconcile the in-memory freemap against the on-disk freemap.
278 * ondisk xx -> ondisk 11 (if allocated)
279 * ondisk 11 -> ondisk 10 (if free in-memory)
280 * ondisk 10 -> ondisk 00 (if free in-memory) - on next pass
3148f677
MD
281 * }
282 *
21a90458
MD
283 * The topology scan may have to be performed multiple times to window
284 * freemaps which are too large to fit in kernel memory.
3148f677 285 *
21a90458
MD
286 * Races are handled using a double-transition (11->10, 10->00). The bulkfree
287 * scan snapshots the volume root's blockset and thus can run concurrent with
288 * normal operations, as long as a full flush is made between each pass to
289 * synchronize any modified chains (otherwise their blocks might be improperly
290 * freed).
291 *
292 * Temporary memory in multiples of 64KB is required to reconstruct the leaf
3148f677
MD
293 * hammer2_bmap_data blocks so they can later be compared against the live
294 * freemap. Each 64KB block represents 128 x 16KB x 1024 = ~2 GB of storage.
295 * A 32MB save area thus represents around ~1 TB. The temporary memory
296 * allocated can be specified. If it is not sufficient multiple topology
297 * passes will be made.
298 */
299
300/*
301 * Bulkfree callback info
302 */
9dca9515 303static void hammer2_bulkfree_thread(void *arg __unused);
470dad14 304static void cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size);
125966e8
MD
305static int h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo,
306 hammer2_blockref_t *bref);
c8c0a18a 307static int h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo);
d0fcd3ec 308static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
7767d389 309 hammer2_off_t data_off, hammer2_bmap_data_t *live,
85b1f267 310 hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base);
3148f677 311
9dca9515
MD
312void
313hammer2_bulkfree_init(hammer2_dev_t *hmp)
314{
315 hammer2_thr_create(&hmp->bfthr, NULL, hmp,
316 hmp->devrepname, -1, -1,
317 hammer2_bulkfree_thread);
318}
319
320void
321hammer2_bulkfree_uninit(hammer2_dev_t *hmp)
322{
323 hammer2_thr_delete(&hmp->bfthr);
324}
325
326static void
327hammer2_bulkfree_thread(void *arg)
328{
329 hammer2_thread_t *thr = arg;
330 hammer2_ioc_bulkfree_t bfi;
331 uint32_t flags;
332
333 for (;;) {
334 hammer2_thr_wait_any(thr,
335 HAMMER2_THREAD_STOP |
336 HAMMER2_THREAD_FREEZE |
337 HAMMER2_THREAD_UNFREEZE |
338 HAMMER2_THREAD_REMASTER,
339 hz * 60);
340
341 flags = thr->flags;
342 cpu_ccfence();
343 if (flags & HAMMER2_THREAD_STOP)
344 break;
345 if (flags & HAMMER2_THREAD_FREEZE) {
346 hammer2_thr_signal2(thr, HAMMER2_THREAD_FROZEN,
347 HAMMER2_THREAD_FREEZE);
348 continue;
349 }
350 if (flags & HAMMER2_THREAD_UNFREEZE) {
351 hammer2_thr_signal2(thr, 0,
352 HAMMER2_THREAD_FROZEN |
353 HAMMER2_THREAD_UNFREEZE);
354 continue;
355 }
356 if (flags & HAMMER2_THREAD_FROZEN)
357 continue;
358 if (flags & HAMMER2_THREAD_REMASTER) {
359 hammer2_thr_signal2(thr, 0, HAMMER2_THREAD_REMASTER);
360 bzero(&bfi, sizeof(bfi));
361 bfi.size = 8192 * 1024;
f079a0a8 362 /* hammer2_bulkfree_pass(thr->hmp, &bfi); */
9dca9515
MD
363 }
364 }
365 thr->td = NULL;
366 hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED);
367 /* structure can go invalid at this point */
368}
369
3148f677 370int
f079a0a8
MD
371hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain,
372 hammer2_ioc_bulkfree_t *bfi)
3148f677 373{
3148f677 374 hammer2_bulkfree_info_t cbinfo;
125966e8 375 hammer2_chain_save_t *save;
d662271e 376 hammer2_off_t incr;
3148f677 377 size_t size;
1e2c8208 378 int error;
3148f677 379
125966e8
MD
380 /*
381 * We have to clear the live dedup cache as it might have entries
382 * that are freeable as of now. Any new entries in the dedup cache
383 * made after this point, even if they become freeable, will have
384 * previously been fully allocated and will be protected by the
385 * 2-stage bulkfree.
386 */
387 hammer2_dedup_clear(hmp);
388
21a90458 389 /*
944ddad0
MD
390 * Setup for free pass. Maximum buffer memory is 1/4 physical
391 * memory.
21a90458 392 */
3148f677
MD
393 bzero(&cbinfo, sizeof(cbinfo));
394 size = (bfi->size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
395 ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
c152fb8d
MD
396 if (size < 1024 * 1024)
397 size = 1024 * 1024;
944ddad0
MD
398 if (size > kmem_lim_size() * 1024 * 1024 / 4) {
399 size = kmem_lim_size() * 1024 * 1024 / 4;
400 kprintf("hammer2: Warning: capping bulkfree buffer at %jdM\n",
401 (intmax_t)size / (1024 * 1024));
402 }
c152fb8d 403
d0fcd3ec 404 cbinfo.hmp = hmp;
3091de50 405 cbinfo.bmap = kmem_alloc_swapbacked(&cbinfo.kp, size, VM_SUBSYS_HAMMER);
53f84d31 406 cbinfo.saved_mirror_tid = hmp->voldata.mirror_tid;
3148f677 407
125966e8
MD
408 cbinfo.dedup = kmalloc(sizeof(*cbinfo.dedup) * HAMMER2_DEDUP_HEUR_SIZE,
409 M_HAMMER2, M_WAITOK | M_ZERO);
410
271568b1 411 /*
d662271e
MD
412 * Normalize start point to a 2GB boundary. We operate on a
413 * 64KB leaf bitmap boundary which represents 2GB of storage.
271568b1 414 */
d662271e
MD
415 cbinfo.sbase = bfi->sbase;
416 if (cbinfo.sbase > hmp->voldata.volu_size)
417 cbinfo.sbase = hmp->voldata.volu_size;
418 cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
125966e8 419 TAILQ_INIT(&cbinfo.list);
21a90458 420
4ff0b408
MD
421 cbinfo.bulkfree_ticks = ticks;
422
271568b1 423 /*
d662271e
MD
424 * Loop on a full meta-data scan as many times as required to
425 * get through all available storage.
271568b1 426 */
1e2c8208 427 error = 0;
d662271e
MD
428 while (cbinfo.sbase < hmp->voldata.volu_size) {
429 /*
430 * We have enough ram to represent (incr) bytes of storage.
431 * Each 64KB of ram represents 2GB of storage.
85b1f267
MD
432 *
433 * We must also clean out our de-duplication heuristic for
434 * each (incr) bytes of storage, otherwise we wind up not
435 * scanning meta-data for later areas of storage because
436 * they had already been scanned in earlier areas of storage.
437 * Since the ranging is different, we have to restart
438 * the dedup heuristic too.
d662271e 439 */
470dad14 440 cbinfo_bmap_init(&cbinfo, size);
be36dcd4
MD
441 bzero(cbinfo.dedup, sizeof(*cbinfo.dedup) *
442 HAMMER2_DEDUP_HEUR_SIZE);
c152fb8d
MD
443 cbinfo.count_inodes_scanned = 0;
444 cbinfo.count_dirents_scanned = 0;
445 cbinfo.count_bytes_scanned = 0;
446 cbinfo.count_chains_scanned = 0;
447 cbinfo.count_chains_reported = 0;
448
d662271e
MD
449 incr = size / HAMMER2_FREEMAP_LEVELN_PSIZE *
450 HAMMER2_FREEMAP_LEVEL1_SIZE;
451 if (hmp->voldata.volu_size - cbinfo.sbase < incr)
452 cbinfo.sstop = hmp->voldata.volu_size;
453 else
454 cbinfo.sstop = cbinfo.sbase + incr;
944ddad0
MD
455 kprintf("hammer2: bulkfree buf=%5jdM "
456 "pass %016jx-%016jx (%jdGB of media)\n",
457 (intmax_t)size / (1024 * 1024),
458 (intmax_t)cbinfo.sbase,
459 (intmax_t)cbinfo.sstop,
460 (intmax_t)incr / (1024L*1024*1024));
d662271e 461
470dad14
MD
462 /*
463 * Scan topology for stuff inside this range.
c88e038b
MD
464 *
465 * NOTE - By not using a transaction the operation can
466 * run concurrent with the frontend as well as
467 * with flushes.
468 *
469 * We cannot safely set a mtid without a transaction,
470 * and in fact we don't want to set one anyway. We
471 * want the bulkfree to be passive and no interfere
472 * with crash recovery.
470dad14 473 */
c88e038b
MD
474#undef HAMMER2_BULKFREE_TRANS /* undef - don't use transaction */
475#ifdef HAMMER2_BULKFREE_TRANS
c603b86b 476 hammer2_trans_init(hmp->spmp, 0);
e2163f5b 477 cbinfo.mtid = hammer2_trans_sub(hmp->spmp);
c88e038b
MD
478#else
479 cbinfo.mtid = 0;
480#endif
125966e8 481 cbinfo.pri = 0;
1e2c8208
MD
482 error |= hammer2_bulk_scan(vchain, h2_bulkfree_callback,
483 &cbinfo);
125966e8
MD
484
485 while ((save = TAILQ_FIRST(&cbinfo.list)) != NULL &&
1e2c8208 486 error == 0) {
125966e8
MD
487 TAILQ_REMOVE(&cbinfo.list, save, entry);
488 cbinfo.pri = 0;
1e2c8208 489 error |= hammer2_bulk_scan(save->chain,
746bb04f
AL
490 h2_bulkfree_callback,
491 &cbinfo);
125966e8
MD
492 hammer2_chain_drop(save->chain);
493 kfree(save, M_HAMMER2);
494 }
495 while (save) {
496 TAILQ_REMOVE(&cbinfo.list, save, entry);
497 hammer2_chain_drop(save->chain);
498 kfree(save, M_HAMMER2);
499 save = TAILQ_FIRST(&cbinfo.list);
500 }
501
d662271e 502 /*
65c894ff 503 * If the complete scan succeeded we can synchronize our
d662271e 504 * in-memory freemap against live storage. If an abort
65c894ff 505 * occured we cannot safely synchronize our partially
d662271e
MD
506 * filled-out in-memory freemap.
507 */
746bb04f
AL
508 if (error) {
509 kprintf("bulkfree lastdrop %d %d error=0x%04x\n",
510 vchain->refs, vchain->core.chain_count, error);
511 } else {
512 kprintf("bulkfree lastdrop %d %d\n",
513 vchain->refs, vchain->core.chain_count);
514
c8c0a18a 515 error = h2_bulkfree_sync(&cbinfo);
d662271e
MD
516
517 hammer2_voldata_lock(hmp);
518 hammer2_voldata_modify(hmp);
519 hmp->voldata.allocator_free += cbinfo.adj_free;
520 hammer2_voldata_unlock(hmp);
521 }
522
523 /*
524 * Cleanup for next loop.
525 */
c88e038b 526#ifdef HAMMER2_BULKFREE_TRANS
257c2728 527 hammer2_trans_done(hmp->spmp, 0);
c88e038b 528#endif
1e2c8208 529 if (error)
d662271e
MD
530 break;
531 cbinfo.sbase = cbinfo.sstop;
470dad14 532 cbinfo.adj_free = 0;
d662271e 533 }
3148f677 534 kmem_free_swapbacked(&cbinfo.kp);
125966e8
MD
535 kfree(cbinfo.dedup, M_HAMMER2);
536 cbinfo.dedup = NULL;
3148f677 537
d662271e
MD
538 bfi->sstop = cbinfo.sbase;
539
540 incr = bfi->sstop / (hmp->voldata.volu_size / 10000);
541 if (incr > 10000)
542 incr = 10000;
543
544 kprintf("bulkfree pass statistics (%d.%02d%% storage processed):\n",
545 (int)incr / 100,
546 (int)incr % 100);
547
1e2c8208
MD
548 if (error) {
549 kprintf(" bulkfree was aborted\n");
550 } else {
551 kprintf(" transition->free %ld\n", cbinfo.count_10_00);
552 kprintf(" transition->staged %ld\n", cbinfo.count_11_10);
553 kprintf(" ERR(00)->allocated %ld\n", cbinfo.count_00_11);
554 kprintf(" ERR(01)->allocated %ld\n", cbinfo.count_01_11);
555 kprintf(" staged->allocated %ld\n", cbinfo.count_10_11);
556 kprintf(" ~2MB segs cleaned %ld\n", cbinfo.count_l0cleans);
557 kprintf(" linear adjusts %ld\n",
558 cbinfo.count_linadjusts);
559 kprintf(" dedup factor %ld\n",
560 cbinfo.count_dedup_factor);
561 }
d0fcd3ec 562
1e2c8208 563 return error;
3148f677
MD
564}
565
470dad14
MD
566static void
567cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size)
568{
569 hammer2_bmap_data_t *bmap = cbinfo->bmap;
570 hammer2_key_t key = cbinfo->sbase;
571 hammer2_key_t lokey;
572 hammer2_key_t hikey;
573
574 lokey = (cbinfo->hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) &
575 ~HAMMER2_SEGMASK64;
576 hikey = cbinfo->hmp->voldata.volu_size & ~HAMMER2_SEGMASK64;
577
578 bzero(bmap, size);
579 while (size) {
65cacacf
MD
580 bzero(bmap, sizeof(*bmap));
581 if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX))
582 lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
583 if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64)
584 lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64;
470dad14 585 if (key < lokey || key >= hikey) {
746bb04f
AL
586 memset(bmap->bitmapq, -1,
587 sizeof(bmap->bitmapq));
588 bmap->avail = 0;
589 bmap->linear = HAMMER2_SEGSIZE;
470dad14
MD
590 } else {
591 bmap->avail = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
592 }
593 size -= sizeof(*bmap);
594 key += HAMMER2_FREEMAP_LEVEL0_SIZE;
595 ++bmap;
596 }
597}
598
3148f677 599static int
125966e8 600h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
3148f677 601{
3148f677
MD
602 hammer2_bmap_data_t *bmap;
603 hammer2_off_t data_off;
604 uint16_t class;
605 size_t bytes;
606 int radix;
3148f677 607
d662271e 608 /*
4ff0b408 609 * Check for signal and allow yield to userland during scan.
d662271e
MD
610 */
611 if (hammer2_signal_check(&cbinfo->save_time))
1e2c8208
MD
612 return HAMMER2_ERROR_ABORTED;
613
4ff0b408
MD
614 /*
615 * Deal with kernel thread cpu or I/O hogging by limiting the
616 * number of chains scanned per second to hammer2_bulkfree_tps.
617 * Ignore leaf records (DIRENT and DATA), no per-record I/O is
618 * involved for those since we don't load their data.
619 */
620 if (bref->type != HAMMER2_BREF_TYPE_DATA &&
621 bref->type != HAMMER2_BREF_TYPE_DIRENT) {
622 ++cbinfo->bulkfree_calls;
623 if (cbinfo->bulkfree_calls > hammer2_bulkfree_tps) {
624 int dticks = ticks - cbinfo->bulkfree_ticks;
625 if (dticks < 0)
626 dticks = 0;
627 if (dticks < hz) {
628 tsleep(&cbinfo->bulkfree_ticks, 0,
629 "h2bw", hz - dticks);
630 }
631 cbinfo->bulkfree_calls = 0;
632 cbinfo->bulkfree_ticks = ticks;
633 }
634 }
635
3148f677
MD
636 /*
637 * Calculate the data offset and determine if it is within
638 * the current freemap range being gathered.
639 */
125966e8 640 data_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
470dad14 641 if (data_off < cbinfo->sbase || data_off >= cbinfo->sstop)
3148f677 642 return 0;
125966e8 643 if (data_off < cbinfo->hmp->voldata.allocator_beg)
3148f677 644 return 0;
470dad14 645 if (data_off >= cbinfo->hmp->voldata.volu_size)
3148f677
MD
646 return 0;
647
648 /*
649 * Calculate the information needed to generate the in-memory
650 * freemap record.
651 *
652 * Hammer2 does not allow allocations to cross the L1 (2GB) boundary,
653 * it's a problem if it does. (Or L0 (2MB) for that matter).
654 */
125966e8 655 radix = (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
da0cdd33 656 KKASSERT(radix != 0);
3148f677 657 bytes = (size_t)1 << radix;
125966e8 658 class = (bref->type << 8) | hammer2_devblkradix(radix);
3148f677 659
65cacacf 660 if (data_off + bytes > cbinfo->sstop) {
3148f677
MD
661 kprintf("hammer2_bulkfree_scan: illegal 2GB boundary "
662 "%016jx %016jx/%d\n",
125966e8
MD
663 (intmax_t)bref->data_off,
664 (intmax_t)bref->key,
665 bref->keybits);
3148f677
MD
666 bytes = cbinfo->sstop - data_off; /* XXX */
667 }
668
669 /*
670 * Convert to a storage offset relative to the beginning of the
671 * storage range we are collecting. Then lookup the level0 bmap entry.
672 */
673 data_off -= cbinfo->sbase;
674 bmap = cbinfo->bmap + (data_off >> HAMMER2_FREEMAP_LEVEL0_RADIX);
675
676 /*
ef5c6e68 677 * Convert data_off to a bmap-relative value (~4MB storage range).
3148f677
MD
678 * Adjust linear, class, and avail.
679 *
ef5c6e68 680 * Hammer2 does not allow allocations to cross the L0 (4MB) boundary,
3148f677
MD
681 */
682 data_off &= HAMMER2_FREEMAP_LEVEL0_MASK;
683 if (data_off + bytes > HAMMER2_FREEMAP_LEVEL0_SIZE) {
ef5c6e68 684 kprintf("hammer2_bulkfree_scan: illegal 4MB boundary "
3148f677 685 "%016jx %016jx/%d\n",
125966e8
MD
686 (intmax_t)bref->data_off,
687 (intmax_t)bref->key,
688 bref->keybits);
3148f677
MD
689 bytes = HAMMER2_FREEMAP_LEVEL0_SIZE - data_off;
690 }
691
692 if (bmap->class == 0) {
693 bmap->class = class;
694 bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
695 }
65cacacf
MD
696
697 /*
698 * NOTE: bmap->class does not have to match class. Classification
699 * is relaxed when free space is low, so some mixing can occur.
700 */
701#if 0
702 /*
703 * XXX removed
704 */
3148f677
MD
705 if (bmap->class != class) {
706 kprintf("hammer2_bulkfree_scan: illegal mixed class "
707 "%016jx %016jx/%d (%04x vs %04x)\n",
125966e8
MD
708 (intmax_t)bref->data_off,
709 (intmax_t)bref->key,
710 bref->keybits,
3148f677
MD
711 class, bmap->class);
712 }
65cacacf 713#endif
b49f72c3
MD
714
715 /*
716 * Just record the highest byte-granular offset for now. Do not
717 * match against allocations which are in multiples of whole blocks.
718 *
719 * Make sure that any in-block linear offset at least covers the
720 * data range. This can cause bmap->linear to become block-aligned.
721 */
722 if (bytes & HAMMER2_FREEMAP_BLOCK_MASK) {
723 if (bmap->linear < (int32_t)data_off + (int32_t)bytes)
724 bmap->linear = (int32_t)data_off + (int32_t)bytes;
725 } else if (bmap->linear >= (int32_t)data_off &&
726 bmap->linear < (int32_t)data_off + (int32_t)bytes) {
3148f677 727 bmap->linear = (int32_t)data_off + (int32_t)bytes;
b49f72c3 728 }
3148f677
MD
729
730 /*
5cebbe36
MD
731 * Adjust the hammer2_bitmap_t bitmap[HAMMER2_BMAP_ELEMENTS].
732 * 64-bit entries, 2 bits per entry, to code 11.
3148f677 733 *
ef5c6e68
MD
734 * NOTE: data_off mask to 524288, shift right by 14 (radix for 16384),
735 * and multiply shift amount by 2 for sets of 2 bits.
736 *
3148f677 737 * NOTE: The allocation can be smaller than HAMMER2_FREEMAP_BLOCK_SIZE.
ef5c6e68 738 * also, data_off may not be FREEMAP_BLOCK_SIZE aligned.
3148f677
MD
739 */
740 while (bytes > 0) {
5cebbe36 741 hammer2_bitmap_t bmask;
ef5c6e68 742 int bindex;
3148f677
MD
743
744 bindex = (int)data_off >> (HAMMER2_FREEMAP_BLOCK_RADIX +
745 HAMMER2_BMAP_INDEX_RADIX);
5cebbe36
MD
746 bmask = (hammer2_bitmap_t)3 <<
747 ((((int)data_off & HAMMER2_BMAP_INDEX_MASK) >>
748 HAMMER2_FREEMAP_BLOCK_RADIX) << 1);
d0fcd3ec
MD
749
750 /*
271568b1
MD
751 * NOTE! The (avail) calculation is bitmap-granular. Multiple
752 * sub-granular records can wind up at the same bitmap
753 * position.
d0fcd3ec 754 */
5cebbe36 755 if ((bmap->bitmapq[bindex] & bmask) == 0) {
271568b1 756 if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE) {
3148f677 757 bmap->avail -= HAMMER2_FREEMAP_BLOCK_SIZE;
271568b1 758 } else {
3148f677 759 bmap->avail -= bytes;
271568b1 760 }
5cebbe36 761 bmap->bitmapq[bindex] |= bmask;
3148f677
MD
762 }
763 data_off += HAMMER2_FREEMAP_BLOCK_SIZE;
764 if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE)
765 bytes = 0;
766 else
767 bytes -= HAMMER2_FREEMAP_BLOCK_SIZE;
768 }
1e2c8208 769 return 0;
3148f677
MD
770}
771
d0fcd3ec
MD
772/*
773 * Synchronize the in-memory bitmap with the live freemap. This is not a
774 * direct copy. Instead the bitmaps must be compared:
775 *
776 * In-memory Live-freemap
53f84d31
MD
777 * 00 11 -> 10 (do nothing if live modified)
778 * 10 -> 00 (do nothing if live modified)
d0fcd3ec
MD
779 * 11 10 -> 11 handles race against live
780 * ** -> 11 nominally warn of corruption
781 *
65cacacf 782 * We must also fixup the hints in HAMMER2_BREF_TYPE_FREEMAP_LEAF.
d0fcd3ec 783 */
c8c0a18a 784static int
3148f677
MD
785h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
786{
787 hammer2_off_t data_off;
d0fcd3ec
MD
788 hammer2_key_t key;
789 hammer2_key_t key_dummy;
3148f677 790 hammer2_bmap_data_t *bmap;
d0fcd3ec
MD
791 hammer2_bmap_data_t *live;
792 hammer2_chain_t *live_parent;
793 hammer2_chain_t *live_chain;
d0fcd3ec 794 int bmapindex;
c8c0a18a 795 int error;
3148f677 796
470dad14
MD
797 kprintf("hammer2_bulkfree - range ");
798
799 if (cbinfo->sbase < cbinfo->hmp->voldata.allocator_beg)
800 kprintf("%016jx-",
801 (intmax_t)cbinfo->hmp->voldata.allocator_beg);
802 else
803 kprintf("%016jx-",
804 (intmax_t)cbinfo->sbase);
805
806 if (cbinfo->sstop > cbinfo->hmp->voldata.volu_size)
807 kprintf("%016jx\n",
808 (intmax_t)cbinfo->hmp->voldata.volu_size);
809 else
810 kprintf("%016jx\n",
811 (intmax_t)cbinfo->sstop);
3148f677
MD
812
813 data_off = cbinfo->sbase;
814 bmap = cbinfo->bmap;
d0fcd3ec
MD
815
816 live_parent = &cbinfo->hmp->fchain;
e513e77e 817 hammer2_chain_ref(live_parent);
d0fcd3ec
MD
818 hammer2_chain_lock(live_parent, HAMMER2_RESOLVE_ALWAYS);
819 live_chain = NULL;
c8c0a18a 820 error = 0;
3148f677 821
b5b45574
MD
822 /*
823 * Iterate each hammer2_bmap_data_t line (128 bytes) managing
824 * 4MB of storage.
825 */
3148f677 826 while (data_off < cbinfo->sstop) {
d0fcd3ec
MD
827 /*
828 * The freemap is not used below allocator_beg or beyond
829 * volu_size.
830 */
470dad14 831
d0fcd3ec
MD
832 if (data_off < cbinfo->hmp->voldata.allocator_beg)
833 goto next;
470dad14 834 if (data_off >= cbinfo->hmp->voldata.volu_size)
d0fcd3ec
MD
835 goto next;
836
837 /*
838 * Locate the freemap leaf on the live filesystem
839 */
840 key = (data_off & ~HAMMER2_FREEMAP_LEVEL1_MASK);
470dad14 841
d0fcd3ec 842 if (live_chain == NULL || live_chain->bref.key != key) {
e513e77e 843 if (live_chain) {
d0fcd3ec 844 hammer2_chain_unlock(live_chain);
e513e77e
MD
845 hammer2_chain_drop(live_chain);
846 }
d0fcd3ec
MD
847 live_chain = hammer2_chain_lookup(
848 &live_parent,
849 &key_dummy,
850 key,
851 key + HAMMER2_FREEMAP_LEVEL1_MASK,
c8c0a18a 852 &error,
b8ba9690 853 HAMMER2_LOOKUP_ALWAYS);
c8c0a18a
MD
854 if (error) {
855 kprintf("hammer2_bulkfree: freemap lookup "
856 "error near %016jx, error %s\n",
857 (intmax_t)data_off,
858 hammer2_error_str(live_chain->error));
859 break;
53f84d31 860 }
d0fcd3ec
MD
861 }
862 if (live_chain == NULL) {
b5b45574
MD
863 /*
864 * XXX if we implement a full recovery mode we need
865 * to create/recreate missing freemap chains if our
866 * bmap has any allocated blocks.
867 */
d0fcd3ec
MD
868 if (bmap->class &&
869 bmap->avail != HAMMER2_FREEMAP_LEVEL0_SIZE) {
870 kprintf("hammer2_bulkfree: cannot locate "
871 "live leaf for allocated data "
872 "near %016jx\n",
873 (intmax_t)data_off);
874 }
875 goto next;
876 }
b93cc2e0 877 if (live_chain->error) {
c8c0a18a
MD
878 kprintf("hammer2_bulkfree: unable to access freemap "
879 "near %016jx, error %s\n",
880 (intmax_t)data_off,
881 hammer2_error_str(live_chain->error));
b93cc2e0 882 hammer2_chain_unlock(live_chain);
e513e77e 883 hammer2_chain_drop(live_chain);
b93cc2e0
MD
884 live_chain = NULL;
885 goto next;
886 }
d0fcd3ec
MD
887
888 bmapindex = (data_off & HAMMER2_FREEMAP_LEVEL1_MASK) >>
889 HAMMER2_FREEMAP_LEVEL0_RADIX;
890 live = &live_chain->data->bmdata[bmapindex];
891
892 /*
b49f72c3
MD
893 * Shortcut if the bitmaps match and the live linear
894 * indicator is sane. We can't do a perfect check of
895 * live->linear because the only real requirement is that
896 * if it is not block-aligned, that it not cover the space
897 * within its current block which overlaps one of the data
898 * ranges we scan. We don't retain enough fine-grained
899 * data in our scan to be able to set it exactly.
900 *
b5b45574
MD
901 * TODO - we could shortcut this by testing that both
902 * live->class and bmap->class are 0, and both avails are
903 * set to HAMMER2_FREEMAP_LEVEL0_SIZE (4MB).
d0fcd3ec 904 */
5cebbe36 905 if (bcmp(live->bitmapq, bmap->bitmapq,
b49f72c3
MD
906 sizeof(bmap->bitmapq)) == 0 &&
907 live->linear >= bmap->linear) {
d0fcd3ec 908 goto next;
5cebbe36 909 }
470dad14
MD
910 if (hammer2_debug & 1) {
911 kprintf("live %016jx %04d.%04x (avail=%d)\n",
912 data_off, bmapindex, live->class, live->avail);
913 }
d0fcd3ec 914
3f01ebaa 915 hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0);
65cacacf
MD
916 live_chain->bref.check.freemap.bigmask = -1;
917 cbinfo->hmp->freemap_relaxed = 0; /* reset heuristic */
470dad14 918 live = &live_chain->data->bmdata[bmapindex];
b5b45574 919
85b1f267
MD
920 h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap,
921 live_chain->bref.key +
922 bmapindex *
923 HAMMER2_FREEMAP_LEVEL0_SIZE);
d0fcd3ec 924next:
3148f677
MD
925 data_off += HAMMER2_FREEMAP_LEVEL0_SIZE;
926 ++bmap;
d0fcd3ec 927 }
e513e77e 928 if (live_chain) {
d0fcd3ec 929 hammer2_chain_unlock(live_chain);
e513e77e
MD
930 hammer2_chain_drop(live_chain);
931 }
932 if (live_parent) {
d0fcd3ec 933 hammer2_chain_unlock(live_parent);
e513e77e
MD
934 hammer2_chain_drop(live_parent);
935 }
c8c0a18a 936 return error;
d0fcd3ec
MD
937}
938
53f84d31
MD
939/*
940 * Merge the bulkfree bitmap against the existing bitmap.
53f84d31 941 */
d0fcd3ec
MD
942static
943void
944h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
7767d389 945 hammer2_off_t data_off, hammer2_bmap_data_t *live,
85b1f267 946 hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base)
d0fcd3ec
MD
947{
948 int bindex;
949 int scount;
3d4f397a 950 hammer2_off_t tmp_off;
5cebbe36
MD
951 hammer2_bitmap_t lmask;
952 hammer2_bitmap_t mmask;
d0fcd3ec 953
3d4f397a
MD
954 tmp_off = data_off;
955
5cebbe36 956 for (bindex = 0; bindex < HAMMER2_BMAP_ELEMENTS; ++bindex) {
6b1f1bef
MD
957 lmask = live->bitmapq[bindex]; /* live */
958 mmask = bmap->bitmapq[bindex]; /* snapshotted bulkfree */
3d4f397a
MD
959 if (lmask == mmask) {
960 tmp_off += HAMMER2_BMAP_INDEX_SIZE;
d0fcd3ec 961 continue;
3d4f397a 962 }
d0fcd3ec 963
5cebbe36
MD
964 for (scount = 0;
965 scount < HAMMER2_BMAP_BITS_PER_ELEMENT;
966 scount += 2) {
d0fcd3ec
MD
967 if ((mmask & 3) == 0) {
968 /*
969 * in-memory 00 live 11 -> 10
970 * live 10 -> 00
b5b45574
MD
971 *
972 * Storage might be marked allocated or
973 * staged and must be remarked staged or
974 * free.
d0fcd3ec
MD
975 */
976 switch (lmask & 3) {
977 case 0: /* 00 */
978 break;
979 case 1: /* 01 */
980 kprintf("hammer2_bulkfree: cannot "
981 "transition m=00/l=01\n");
982 break;
983 case 2: /* 10 -> 00 */
5cebbe36
MD
984 live->bitmapq[bindex] &=
985 ~((hammer2_bitmap_t)2 << scount);
271568b1
MD
986 live->avail +=
987 HAMMER2_FREEMAP_BLOCK_SIZE;
b5b45574
MD
988 if (live->avail >
989 HAMMER2_FREEMAP_LEVEL0_SIZE) {
990 live->avail =
991 HAMMER2_FREEMAP_LEVEL0_SIZE;
992 }
271568b1
MD
993 cbinfo->adj_free +=
994 HAMMER2_FREEMAP_BLOCK_SIZE;
d0fcd3ec 995 ++cbinfo->count_10_00;
0b8efeb7 996 hammer2_io_dedup_assert(
3d4f397a
MD
997 cbinfo->hmp,
998 tmp_off |
999 HAMMER2_FREEMAP_BLOCK_RADIX,
1000 HAMMER2_FREEMAP_BLOCK_SIZE);
d0fcd3ec
MD
1001 break;
1002 case 3: /* 11 -> 10 */
5cebbe36
MD
1003 live->bitmapq[bindex] &=
1004 ~((hammer2_bitmap_t)1 << scount);
d0fcd3ec 1005 ++cbinfo->count_11_10;
0b8efeb7 1006 hammer2_io_dedup_delete(
3d4f397a 1007 cbinfo->hmp,
0b8efeb7 1008 HAMMER2_BREF_TYPE_DATA,
3d4f397a
MD
1009 tmp_off |
1010 HAMMER2_FREEMAP_BLOCK_RADIX,
1011 HAMMER2_FREEMAP_BLOCK_SIZE);
d0fcd3ec
MD
1012 break;
1013 }
b5b45574 1014 } else if ((mmask & 3) == 3) {
d0fcd3ec
MD
1015 /*
1016 * in-memory 11 live 10 -> 11
1017 * live ** -> 11
b5b45574
MD
1018 *
1019 * Storage might be incorrectly marked free
1020 * or staged and must be remarked fully
1021 * allocated.
d0fcd3ec
MD
1022 */
1023 switch (lmask & 3) {
1024 case 0: /* 00 */
b5b45574
MD
1025 ++cbinfo->count_00_11;
1026 cbinfo->adj_free -=
1027 HAMMER2_FREEMAP_BLOCK_SIZE;
1028 live->avail -=
1029 HAMMER2_FREEMAP_BLOCK_SIZE;
1030 if ((int32_t)live->avail < 0)
1031 live->avail = 0;
d0fcd3ec
MD
1032 break;
1033 case 1: /* 01 */
b5b45574 1034 ++cbinfo->count_01_11;
d0fcd3ec
MD
1035 break;
1036 case 2: /* 10 -> 11 */
d0fcd3ec
MD
1037 ++cbinfo->count_10_11;
1038 break;
1039 case 3: /* 11 */
1040 break;
1041 }
b5b45574
MD
1042 live->bitmapq[bindex] |=
1043 ((hammer2_bitmap_t)3 << scount);
d0fcd3ec
MD
1044 }
1045 mmask >>= 2;
1046 lmask >>= 2;
3d4f397a 1047 tmp_off += HAMMER2_FREEMAP_BLOCK_SIZE;
3148f677
MD
1048 }
1049 }
d0fcd3ec
MD
1050
1051 /*
1052 * Determine if the live bitmap is completely free and reset its
1053 * fields if so. Otherwise check to see if we can reduce the linear
1054 * offset.
1055 */
5cebbe36
MD
1056 for (bindex = HAMMER2_BMAP_ELEMENTS - 1; bindex >= 0; --bindex) {
1057 if (live->bitmapq[bindex] != 0)
d0fcd3ec
MD
1058 break;
1059 }
b49f72c3
MD
1060 if (bindex < 0) {
1061 /*
1062 * Completely empty, reset entire segment
1063 */
85b1f267
MD
1064#if 0
1065 kprintf("hammer2: cleanseg %016jx.%04x (%d)\n",
1066 alloc_base, live->class, live->avail);
1067#endif
d0fcd3ec
MD
1068 live->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
1069 live->class = 0;
1070 live->linear = 0;
1071 ++cbinfo->count_l0cleans;
1072 } else if (bindex < 7) {
b5b45574 1073 /*
83815ec6
MD
1074 * Partially full, bitmapq[bindex] != 0. Our bulkfree pass
1075 * does not record enough information to set live->linear
1076 * exactly.
b49f72c3 1077 *
83815ec6
MD
1078 * NOTE: Setting live->linear to a sub-block (16K) boundary
1079 * forces the live code to iterate to the next fully
1080 * free block. It does NOT mean that all blocks above
1081 * live->linear are available.
b49f72c3 1082 *
83815ec6
MD
1083 * Setting live->linear to a fragmentary (less than
1084 * 16K) boundary allows allocations to iterate within
1085 * that sub-block.
b5b45574 1086 */
b49f72c3
MD
1087 if (live->linear < bmap->linear &&
1088 ((live->linear ^ bmap->linear) &
1089 ~HAMMER2_FREEMAP_BLOCK_MASK) == 0) {
83815ec6
MD
1090 /*
1091 * If greater than but still within the same
1092 * sub-block as live we can adjust linear upward.
1093 */
b49f72c3
MD
1094 live->linear = bmap->linear;
1095 ++cbinfo->count_linadjusts;
1096 } else {
83815ec6
MD
1097 /*
1098 * Otherwise adjust to the nearest higher or same
1099 * sub-block boundary. The live system may have
1100 * bounced live->linear around so we cannot make any
1101 * assumptions with regards to available fragmentary
1102 * allocations.
1103 */
b49f72c3
MD
1104 live->linear =
1105 (bmap->linear + HAMMER2_FREEMAP_BLOCK_MASK) &
1106 ~HAMMER2_FREEMAP_BLOCK_MASK;
b5b45574
MD
1107 ++cbinfo->count_linadjusts;
1108 }
1109 } else {
b49f72c3
MD
1110 /*
1111 * Completely full, effectively disable the linear iterator
1112 */
b5b45574 1113 live->linear = HAMMER2_SEGSIZE;
d0fcd3ec
MD
1114 }
1115
1116#if 0
1117 if (bmap->class) {
1118 kprintf("%016jx %04d.%04x (avail=%7d) "
1119 "%08x %08x %08x %08x %08x %08x %08x %08x\n",
1120 (intmax_t)data_off,
1121 (int)((data_off &
1122 HAMMER2_FREEMAP_LEVEL1_MASK) >>
1123 HAMMER2_FREEMAP_LEVEL0_RADIX),
1124 bmap->class,
1125 bmap->avail,
1126 bmap->bitmap[0], bmap->bitmap[1],
1127 bmap->bitmap[2], bmap->bitmap[3],
1128 bmap->bitmap[4], bmap->bitmap[5],
1129 bmap->bitmap[6], bmap->bitmap[7]);
1130 }
1131#endif
3148f677 1132}
125966e8
MD
1133
1134/*
1135 * BULKFREE DEDUP HEURISTIC
1136 *
1137 * WARNING! This code is SMP safe but the heuristic allows SMP collisions.
1138 * All fields must be loaded into locals and validated.
1139 */
1140static
1141int
1142h2_bulkfree_test(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref,
1143 int pri)
1144{
1145 hammer2_dedup_t *dedup;
1146 int best;
1147 int n;
1148 int i;
1149
1150 n = hammer2_icrc32(&bref->data_off, sizeof(bref->data_off));
1151 dedup = cbinfo->dedup + (n & (HAMMER2_DEDUP_HEUR_MASK & ~7));
1152
1153 for (i = best = 0; i < 8; ++i) {
1154 if (dedup[i].data_off == bref->data_off) {
1155 if (dedup[i].ticks < pri)
1156 dedup[i].ticks = pri;
1157 if (pri == 1)
1158 cbinfo->count_dedup_factor += dedup[i].ticks;
1159 return 1;
1160 }
1161 if (dedup[i].ticks < dedup[best].ticks)
1162 best = i;
1163 }
1164 dedup[best].data_off = bref->data_off;
1165 dedup[best].ticks = pri;
1166
1167 return 0;
1168}