2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Ilya Dryomov <idryomov@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 #include <crypto/sha2/sha2.h>
39 #define DEDUP_BUF (64 * 1024)
41 /* Sorted list of block CRCs - light version for dedup-simulate */
42 struct sim_dedup_entry_rb_tree;
43 RB_HEAD(sim_dedup_entry_rb_tree, sim_dedup_entry) sim_dedup_tree =
44 RB_INITIALIZER(&sim_dedup_tree);
45 RB_PROTOTYPE2(sim_dedup_entry_rb_tree, sim_dedup_entry, rb_entry,
46 rb_sim_dedup_entry_compare, hammer_crc_t);
48 struct sim_dedup_entry {
50 u_int64_t ref_blks; /* number of blocks referenced */
51 u_int64_t ref_size; /* size of data referenced */
52 RB_ENTRY(sim_dedup_entry) rb_entry;
55 /* Sorted list of HAMMER B-Tree keys */
56 struct dedup_entry_rb_tree;
57 struct sha_dedup_entry_rb_tree;
59 RB_HEAD(dedup_entry_rb_tree, dedup_entry) dedup_tree =
60 RB_INITIALIZER(&dedup_tree);
61 RB_PROTOTYPE2(dedup_entry_rb_tree, dedup_entry, rb_entry,
62 rb_dedup_entry_compare, hammer_crc_t);
64 RB_PROTOTYPE(sha_dedup_entry_rb_tree, sha_dedup_entry, fict_entry,
65 rb_sha_dedup_entry_compare);
68 struct hammer_btree_leaf_elm leaf;
74 RB_HEAD(sha_dedup_entry_rb_tree, sha_dedup_entry) fict_root;
77 RB_ENTRY(dedup_entry) rb_entry;
80 #define HAMMER_DEDUP_ENTRY_FICTITIOUS 0x0001
82 struct sha_dedup_entry {
83 struct hammer_btree_leaf_elm leaf;
86 u_int8_t sha_hash[SHA256_DIGEST_LENGTH];
87 RB_ENTRY(sha_dedup_entry) fict_entry;
91 * Pass2 list - contains entries that were not dedup'ed because ioctl failed
93 STAILQ_HEAD(, pass2_dedup_entry) pass2_dedup_queue =
94 STAILQ_HEAD_INITIALIZER(pass2_dedup_queue);
96 struct pass2_dedup_entry {
97 struct hammer_btree_leaf_elm leaf;
98 STAILQ_ENTRY(pass2_dedup_entry) sq_entry;
101 #define DEDUP_PASS2 0x0001 /* process_btree_elm() mode */
103 static int SigInfoFlag;
104 static int64_t DedupDataReads;
105 static int64_t DedupCurrentRecords;
106 static int64_t DedupTotalRecords;
107 static u_int32_t DedupCrcStart;
108 static u_int32_t DedupCrcEnd;
109 static u_int64_t MemoryUse;
111 /* PFS global ids - we deal with just one PFS at a run */
113 struct hammer_ioc_pseudofs_rw glob_pfs;
116 * Global accounting variables
118 * Last three don't have to be 64-bit, just to be safe..
120 u_int64_t dedup_alloc_size = 0;
121 u_int64_t dedup_ref_size = 0;
122 u_int64_t dedup_skipped_size = 0;
123 u_int64_t dedup_crc_failures = 0;
124 u_int64_t dedup_sha_failures = 0;
125 u_int64_t dedup_underflows = 0;
126 u_int64_t dedup_successes_count = 0;
127 u_int64_t dedup_successes_bytes = 0;
129 static int rb_sim_dedup_entry_compare(struct sim_dedup_entry *sim_de1,
130 struct sim_dedup_entry *sim_de2);
131 static int rb_dedup_entry_compare(struct dedup_entry *de1,
132 struct dedup_entry *de2);
133 static int rb_sha_dedup_entry_compare(struct sha_dedup_entry *sha_de1,
134 struct sha_dedup_entry *sha_de2);
135 typedef int (*scan_pfs_cb_t)(hammer_btree_leaf_elm_t scan_leaf, int flags);
136 static void scan_pfs(char *filesystem, scan_pfs_cb_t func, const char *id);
137 static int collect_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags);
138 static int count_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags);
139 static int process_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags);
140 static int upgrade_chksum(hammer_btree_leaf_elm_t leaf, u_int8_t *sha_hash);
141 static void dump_simulated_dedup(void);
142 static void dump_real_dedup(void);
143 static void dedup_usage(int code);
145 RB_GENERATE2(sim_dedup_entry_rb_tree, sim_dedup_entry, rb_entry,
146 rb_sim_dedup_entry_compare, hammer_crc_t, crc);
147 RB_GENERATE2(dedup_entry_rb_tree, dedup_entry, rb_entry,
148 rb_dedup_entry_compare, hammer_crc_t, leaf.data_crc);
149 RB_GENERATE(sha_dedup_entry_rb_tree, sha_dedup_entry, fict_entry,
150 rb_sha_dedup_entry_compare);
153 rb_sim_dedup_entry_compare(struct sim_dedup_entry *sim_de1,
154 struct sim_dedup_entry *sim_de2)
156 if (sim_de1->crc < sim_de2->crc)
158 if (sim_de1->crc > sim_de2->crc)
165 rb_dedup_entry_compare(struct dedup_entry *de1, struct dedup_entry *de2)
167 if (de1->leaf.data_crc < de2->leaf.data_crc)
169 if (de1->leaf.data_crc > de2->leaf.data_crc)
176 rb_sha_dedup_entry_compare(struct sha_dedup_entry *sha_de1,
177 struct sha_dedup_entry *sha_de2)
179 unsigned long *h1 = (unsigned long *)&sha_de1->sha_hash;
180 unsigned long *h2 = (unsigned long *)&sha_de2->sha_hash;
183 for (i = 0; i < SHA256_DIGEST_LENGTH / (int)sizeof(unsigned long); ++i) {
194 * dedup-simulate <filesystem>
197 hammer_cmd_dedup_simulate(char **av, int ac)
199 struct sim_dedup_entry *sim_de;
204 glob_fd = getpfs(&glob_pfs, av[0]);
207 * Collection passes (memory limited)
209 printf("Dedup-simulate running\n");
211 DedupCrcStart = DedupCrcEnd;
216 printf("b-tree pass crc-range %08x-max\n",
220 scan_pfs(av[0], collect_btree_elm, "simu-pass");
223 dump_simulated_dedup();
226 * Calculate simulated dedup ratio and get rid of the tree
228 while ((sim_de = RB_ROOT(&sim_dedup_tree)) != NULL) {
229 assert(sim_de->ref_blks != 0);
230 dedup_ref_size += sim_de->ref_size;
231 dedup_alloc_size += sim_de->ref_size / sim_de->ref_blks;
233 RB_REMOVE(sim_dedup_entry_rb_tree, &sim_dedup_tree, sim_de);
236 if (DedupCrcEnd && VerboseOpt == 0)
238 } while (DedupCrcEnd);
240 printf("Dedup-simulate %s succeeded\n", av[0]);
241 relpfs(glob_fd, &glob_pfs);
243 printf("Simulated dedup ratio = %.2f\n",
244 (dedup_alloc_size != 0) ?
245 (double)dedup_ref_size / dedup_alloc_size : 0);
252 hammer_cmd_dedup(char **av, int ac)
254 struct dedup_entry *de;
255 struct sha_dedup_entry *sha_de;
256 struct pass2_dedup_entry *pass2_de;
265 STAILQ_INIT(&pass2_dedup_queue);
267 glob_fd = getpfs(&glob_pfs, av[0]);
270 * Pre-pass to cache the btree
272 scan_pfs(av[0], count_btree_elm, "pre-pass ");
273 DedupTotalRecords = DedupCurrentRecords;
276 * Collection passes (memory limited)
278 printf("Dedup running\n");
280 DedupCrcStart = DedupCrcEnd;
285 printf("b-tree pass crc-range %08x-max\n",
289 scan_pfs(av[0], process_btree_elm, "main-pass");
291 while ((pass2_de = STAILQ_FIRST(&pass2_dedup_queue)) != NULL) {
292 if (process_btree_elm(&pass2_de->leaf, DEDUP_PASS2))
293 dedup_skipped_size -= pass2_de->leaf.data_len;
295 STAILQ_REMOVE_HEAD(&pass2_dedup_queue, sq_entry);
298 assert(STAILQ_EMPTY(&pass2_dedup_queue));
304 * Calculate dedup ratio and get rid of the trees
306 while ((de = RB_ROOT(&dedup_tree)) != NULL) {
307 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) {
308 while ((sha_de = RB_ROOT(&de->u.fict_root)) != NULL) {
309 assert(sha_de->ref_blks != 0);
310 dedup_ref_size += sha_de->ref_size;
311 dedup_alloc_size += sha_de->ref_size / sha_de->ref_blks;
313 RB_REMOVE(sha_dedup_entry_rb_tree,
314 &de->u.fict_root, sha_de);
317 assert(RB_EMPTY(&de->u.fict_root));
319 assert(de->u.de.ref_blks != 0);
320 dedup_ref_size += de->u.de.ref_size;
321 dedup_alloc_size += de->u.de.ref_size / de->u.de.ref_blks;
324 RB_REMOVE(dedup_entry_rb_tree, &dedup_tree, de);
327 assert(RB_EMPTY(&dedup_tree));
328 if (DedupCrcEnd && VerboseOpt == 0)
330 } while (DedupCrcEnd);
332 printf("Dedup %s succeeded\n", av[0]);
333 relpfs(glob_fd, &glob_pfs);
335 humanize_unsigned(buf, sizeof(buf), dedup_ref_size, "B", 1024);
336 printf("Dedup ratio = %.2f\n"
338 ((dedup_alloc_size != 0) ?
339 (double)dedup_ref_size / dedup_alloc_size : 0),
342 humanize_unsigned(buf, sizeof(buf), dedup_alloc_size, "B", 1024);
343 printf(" %8s allocated\n", buf);
344 humanize_unsigned(buf, sizeof(buf), dedup_skipped_size, "B", 1024);
345 printf(" %8s skipped\n", buf);
346 printf(" %8jd CRC collisions\n"
347 " %8jd SHA collisions\n"
348 " %8jd bigblock underflows\n"
349 " %8jd new dedup records\n"
350 " %8jd new dedup bytes\n",
351 (intmax_t)dedup_crc_failures,
352 (intmax_t)dedup_sha_failures,
353 (intmax_t)dedup_underflows,
354 (intmax_t)dedup_successes_count,
355 (intmax_t)dedup_successes_bytes
360 count_btree_elm(hammer_btree_leaf_elm_t scan_leaf __unused, int flags __unused)
366 collect_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags __unused)
368 struct sim_dedup_entry *sim_de;
371 * If we are using too much memory we have to clean some out, which
372 * will cause the run to use multiple passes. Be careful of integer
375 if (MemoryUse > MemoryLimit) {
376 DedupCrcEnd = DedupCrcStart +
377 (u_int32_t)(DedupCrcEnd - DedupCrcStart - 1) / 2;
379 printf("memory limit crc-range %08x-%08x\n",
380 DedupCrcStart, DedupCrcEnd);
384 sim_de = RB_MAX(sim_dedup_entry_rb_tree,
386 if (sim_de == NULL || sim_de->crc < DedupCrcEnd)
388 RB_REMOVE(sim_dedup_entry_rb_tree,
389 &sim_dedup_tree, sim_de);
390 MemoryUse -= sizeof(*sim_de);
396 * Collect statistics based on the CRC only, do not try to read
397 * any data blocks or run SHA hashes.
399 sim_de = RB_LOOKUP(sim_dedup_entry_rb_tree, &sim_dedup_tree,
400 scan_leaf->data_crc);
402 if (sim_de == NULL) {
403 sim_de = calloc(sizeof(*sim_de), 1);
404 sim_de->crc = scan_leaf->data_crc;
405 RB_INSERT(sim_dedup_entry_rb_tree, &sim_dedup_tree, sim_de);
406 MemoryUse += sizeof(*sim_de);
409 sim_de->ref_blks += 1;
410 sim_de->ref_size += scan_leaf->data_len;
415 validate_dedup_pair(hammer_btree_leaf_elm_t p, hammer_btree_leaf_elm_t q)
417 if ((p->data_offset & HAMMER_OFF_ZONE_MASK) !=
418 (q->data_offset & HAMMER_OFF_ZONE_MASK)) {
421 if (p->data_len != q->data_len) {
428 #define DEDUP_TECH_FAILURE 1
429 #define DEDUP_CMP_FAILURE 2
430 #define DEDUP_INVALID_ZONE 3
431 #define DEDUP_UNDERFLOW 4
432 #define DEDUP_VERS_FAILURE 5
435 deduplicate(hammer_btree_leaf_elm_t p, hammer_btree_leaf_elm_t q)
437 struct hammer_ioc_dedup dedup;
439 bzero(&dedup, sizeof(dedup));
442 * If data_offset fields are the same there is no need to run ioctl,
443 * candidate is already dedup'ed.
445 if (p->data_offset == q->data_offset) {
449 dedup.elm1 = p->base;
450 dedup.elm2 = q->base;
452 if (ioctl(glob_fd, HAMMERIOC_DEDUP, &dedup) < 0) {
453 if (errno == EOPNOTSUPP) {
454 /* must be at least version 5 */
455 return (DEDUP_VERS_FAILURE);
457 /* Technical failure - locking or w/e */
458 return (DEDUP_TECH_FAILURE);
460 if (dedup.head.flags & HAMMER_IOC_DEDUP_CMP_FAILURE) {
461 return (DEDUP_CMP_FAILURE);
463 if (dedup.head.flags & HAMMER_IOC_DEDUP_INVALID_ZONE) {
464 return (DEDUP_INVALID_ZONE);
466 if (dedup.head.flags & HAMMER_IOC_DEDUP_UNDERFLOW) {
467 return (DEDUP_UNDERFLOW);
470 ++dedup_successes_count;
471 dedup_successes_bytes += p->data_len;
476 process_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags)
478 struct dedup_entry *de;
479 struct sha_dedup_entry *sha_de, temp;
480 struct pass2_dedup_entry *pass2_de;
484 * If we are using too much memory we have to clean some out, which
485 * will cause the run to use multiple passes. Be careful of integer
488 while (MemoryUse > MemoryLimit) {
489 DedupCrcEnd = DedupCrcStart +
490 (u_int32_t)(DedupCrcEnd - DedupCrcStart - 1) / 2;
492 printf("memory limit crc-range %08x-%08x\n",
493 DedupCrcStart, DedupCrcEnd);
498 de = RB_MAX(dedup_entry_rb_tree, &dedup_tree);
499 if (de == NULL || de->leaf.data_crc < DedupCrcEnd)
501 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) {
502 while ((sha_de = RB_ROOT(&de->u.fict_root)) !=
504 RB_REMOVE(sha_dedup_entry_rb_tree,
505 &de->u.fict_root, sha_de);
506 MemoryUse -= sizeof(*sha_de);
510 RB_REMOVE(dedup_entry_rb_tree, &dedup_tree, de);
511 MemoryUse -= sizeof(*de);
517 * Collect statistics based on the CRC. Colliding CRCs usually
518 * cause a SHA sub-tree to be created under the de.
520 * Trivial case if de not found.
522 de = RB_LOOKUP(dedup_entry_rb_tree, &dedup_tree, scan_leaf->data_crc);
524 de = calloc(sizeof(*de), 1);
525 de->leaf = *scan_leaf;
526 RB_INSERT(dedup_entry_rb_tree, &dedup_tree, de);
527 MemoryUse += sizeof(*de);
532 * Found entry in CRC tree
534 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) {
536 * Optimize the case where a CRC failure results in multiple
537 * SHA entries. If we unconditionally issue a data-read a
538 * degenerate situation where a colliding CRC's second SHA
539 * entry contains the lion's share of the deduplication
540 * candidates will result in excessive data block reads.
542 * Deal with the degenerate case by looking for a matching
543 * data_offset/data_len in the SHA elements we already have
544 * before reading the data block and generating a new SHA.
546 RB_FOREACH(sha_de, sha_dedup_entry_rb_tree, &de->u.fict_root) {
547 if (sha_de->leaf.data_offset ==
548 scan_leaf->data_offset &&
549 sha_de->leaf.data_len == scan_leaf->data_len) {
550 memcpy(temp.sha_hash, sha_de->sha_hash,
551 SHA256_DIGEST_LENGTH);
557 * Entry in CRC tree is fictitious, so we already had problems
558 * with this CRC. Upgrade (compute SHA) the candidate and
559 * dive into SHA subtree. If upgrade fails insert the candidate
560 * into Pass2 list (it will be processed later).
562 if (sha_de == NULL) {
563 if (upgrade_chksum(scan_leaf, temp.sha_hash))
566 sha_de = RB_FIND(sha_dedup_entry_rb_tree,
567 &de->u.fict_root, &temp);
571 * Nothing in SHA subtree so far, so this is a new
572 * 'dataset'. Insert new entry into SHA subtree.
574 if (sha_de == NULL) {
575 sha_de = calloc(sizeof(*sha_de), 1);
576 sha_de->leaf = *scan_leaf;
577 memcpy(sha_de->sha_hash, temp.sha_hash,
578 SHA256_DIGEST_LENGTH);
579 RB_INSERT(sha_dedup_entry_rb_tree, &de->u.fict_root,
581 MemoryUse += sizeof(*sha_de);
582 goto upgrade_stats_sha;
586 * Found entry in SHA subtree, it means we have a potential
587 * dedup pair. Validate it (zones have to match and data_len
588 * field have to be the same too. If validation fails, treat
589 * it as a SHA collision (jump to sha256_failure).
591 if (validate_dedup_pair(&sha_de->leaf, scan_leaf))
595 * We have a valid dedup pair (SHA match, validated).
597 * In case of technical failure (dedup pair was good, but
598 * ioctl failed anyways) insert the candidate into Pass2 list
599 * (we will try to dedup it after we are done with the rest of
602 * If ioctl fails because either of blocks is in the non-dedup
603 * zone (we can dedup only in LARGE_DATA and SMALL_DATA) don't
604 * bother with the candidate and terminate early.
606 * If ioctl fails because of bigblock underflow replace the
607 * leaf node that found dedup entry represents with scan_leaf.
609 error = deduplicate(&sha_de->leaf, scan_leaf);
612 goto upgrade_stats_sha;
613 case DEDUP_TECH_FAILURE:
615 case DEDUP_CMP_FAILURE:
617 case DEDUP_INVALID_ZONE:
618 goto terminate_early;
619 case DEDUP_UNDERFLOW:
621 sha_de->leaf = *scan_leaf;
622 memcpy(sha_de->sha_hash, temp.sha_hash,
623 SHA256_DIGEST_LENGTH);
624 goto upgrade_stats_sha;
625 case DEDUP_VERS_FAILURE:
627 "HAMMER filesystem must be at least "
628 "version 5 to dedup\n");
631 fprintf(stderr, "Unknown error\n");
632 goto terminate_early;
636 * Ooh la la.. SHA-256 collision. Terminate early, there's
637 * nothing we can do here.
640 ++dedup_sha_failures;
641 goto terminate_early;
644 * Candidate CRC is good for now (we found an entry in CRC
645 * tree and it's not fictitious). This means we have a
646 * potential dedup pair.
648 if (validate_dedup_pair(&de->leaf, scan_leaf))
652 * We have a valid dedup pair (CRC match, validated)
654 error = deduplicate(&de->leaf, scan_leaf);
658 case DEDUP_TECH_FAILURE:
660 case DEDUP_CMP_FAILURE:
662 case DEDUP_INVALID_ZONE:
663 goto terminate_early;
664 case DEDUP_UNDERFLOW:
666 de->leaf = *scan_leaf;
668 case DEDUP_VERS_FAILURE:
670 "HAMMER filesystem must be at least "
671 "version 5 to dedup\n");
674 fprintf(stderr, "Unknown error\n");
675 goto terminate_early;
680 * We got a CRC collision - either ioctl failed because of
681 * the comparison failure or validation of the potential
682 * dedup pair went bad. In all cases insert both blocks
683 * into SHA subtree (this requires checksum upgrade) and mark
684 * entry that corresponds to this CRC in the CRC tree
685 * fictitious, so that all futher operations with this CRC go
686 * through SHA subtree.
688 ++dedup_crc_failures;
691 * Insert block that was represented by now fictitious dedup
692 * entry (create a new SHA entry and preserve stats of the
693 * old CRC one). If checksum upgrade fails insert the
694 * candidate into Pass2 list and return - keep both trees
697 sha_de = calloc(sizeof(*sha_de), 1);
698 sha_de->leaf = de->leaf;
699 sha_de->ref_blks = de->u.de.ref_blks;
700 sha_de->ref_size = de->u.de.ref_size;
701 if (upgrade_chksum(&sha_de->leaf, sha_de->sha_hash)) {
705 MemoryUse += sizeof(*sha_de);
707 RB_INIT(&de->u.fict_root);
709 * Here we can insert without prior checking because the tree
710 * is empty at this point
712 RB_INSERT(sha_dedup_entry_rb_tree, &de->u.fict_root, sha_de);
715 * Mark entry in CRC tree fictitious
717 de->flags |= HAMMER_DEDUP_ENTRY_FICTITIOUS;
720 * Upgrade checksum of the candidate and insert it into
721 * SHA subtree. If upgrade fails insert the candidate into
724 if (upgrade_chksum(scan_leaf, temp.sha_hash)) {
727 sha_de = RB_FIND(sha_dedup_entry_rb_tree, &de->u.fict_root,
730 /* There is an entry with this SHA already, but the only
731 * RB-tree element at this point is that entry we just
732 * added. We know for sure these blocks are different
733 * (this is crc_failure branch) so treat it as SHA
738 sha_de = calloc(sizeof(*sha_de), 1);
739 sha_de->leaf = *scan_leaf;
740 memcpy(sha_de->sha_hash, temp.sha_hash, SHA256_DIGEST_LENGTH);
741 RB_INSERT(sha_dedup_entry_rb_tree, &de->u.fict_root, sha_de);
742 MemoryUse += sizeof(*sha_de);
743 goto upgrade_stats_sha;
747 de->u.de.ref_blks += 1;
748 de->u.de.ref_size += scan_leaf->data_len;
752 sha_de->ref_blks += 1;
753 sha_de->ref_size += scan_leaf->data_len;
758 * If in pass2 mode don't insert anything, fall through to
761 if ((flags & DEDUP_PASS2) == 0) {
762 pass2_de = calloc(sizeof(*pass2_de), 1);
763 pass2_de->leaf = *scan_leaf;
764 STAILQ_INSERT_TAIL(&pass2_dedup_queue, pass2_de, sq_entry);
765 dedup_skipped_size += scan_leaf->data_len;
771 * Early termination path. Fixup stats.
773 dedup_alloc_size += scan_leaf->data_len;
774 dedup_ref_size += scan_leaf->data_len;
779 upgrade_chksum(hammer_btree_leaf_elm_t leaf, u_int8_t *sha_hash)
781 struct hammer_ioc_data data;
782 char *buf = malloc(DEDUP_BUF);
786 bzero(&data, sizeof(data));
787 data.elm = leaf->base;
789 data.size = DEDUP_BUF;
792 if (ioctl(glob_fd, HAMMERIOC_GET_DATA, &data) < 0) {
793 fprintf(stderr, "Get-data failed: %s\n", strerror(errno));
797 DedupDataReads += leaf->data_len;
799 if (data.leaf.data_len != leaf->data_len) {
804 if (data.leaf.base.btype == HAMMER_BTREE_TYPE_RECORD &&
805 data.leaf.base.rec_type == HAMMER_RECTYPE_DATA) {
807 SHA256_Update(&ctx, (void *)buf, data.leaf.data_len);
808 SHA256_Final(sha_hash, &ctx);
817 sigInfo(int signo __unused)
823 scan_pfs(char *filesystem, scan_pfs_cb_t func, const char *id)
825 struct hammer_ioc_mirror_rw mirror;
826 hammer_ioc_mrecord_any_t mrec;
827 struct hammer_btree_leaf_elm elm;
828 char *buf = malloc(DEDUP_BUF);
835 DedupCurrentRecords = 0;
836 signal(SIGINFO, sigInfo);
838 bzero(&mirror, sizeof(mirror));
839 hammer_key_beg_init(&mirror.key_beg);
840 hammer_key_end_init(&mirror.key_end);
842 mirror.tid_beg = glob_pfs.ondisk->sync_beg_tid;
843 mirror.tid_end = glob_pfs.ondisk->sync_end_tid;
844 mirror.head.flags |= HAMMER_IOC_MIRROR_NODATA; /* we want only keys */
846 mirror.size = DEDUP_BUF;
847 mirror.pfs_id = glob_pfs.pfs_id;
848 mirror.shared_uuid = glob_pfs.ondisk->shared_uuid;
850 if (VerboseOpt && DedupCrcStart == 0) {
851 printf("%s %s: objspace %016jx:%04x %016jx:%04x\n",
853 (uintmax_t)mirror.key_beg.obj_id,
854 mirror.key_beg.localization,
855 (uintmax_t)mirror.key_end.obj_id,
856 mirror.key_end.localization);
857 printf("%s %s: pfs_id %d\n",
858 id, filesystem, glob_pfs.pfs_id);
865 mirror.pfs_id = glob_pfs.pfs_id;
866 mirror.shared_uuid = glob_pfs.ondisk->shared_uuid;
867 if (ioctl(glob_fd, HAMMERIOC_MIRROR_READ, &mirror) < 0) {
868 fprintf(stderr, "Mirror-read %s failed: %s\n",
869 filesystem, strerror(errno));
872 if (mirror.head.flags & HAMMER_IOC_HEAD_ERROR) {
873 fprintf(stderr, "Mirror-read %s fatal error %d\n",
874 filesystem, mirror.head.error);
879 while (offset < mirror.count) {
880 mrec = (void *)((char *)buf + offset);
881 bytes = HAMMER_HEAD_DOALIGN(mrec->head.rec_size);
882 if (offset + bytes > mirror.count) {
883 fprintf(stderr, "Misaligned record\n");
886 assert((mrec->head.type &
887 HAMMER_MRECF_TYPE_MASK) ==
888 HAMMER_MREC_TYPE_REC);
890 elm = mrec->rec.leaf;
891 if (elm.base.btype != HAMMER_BTREE_TYPE_RECORD)
893 if (elm.base.rec_type != HAMMER_RECTYPE_DATA)
895 ++DedupCurrentRecords;
896 if (DedupCrcStart != DedupCrcEnd) {
897 if (elm.data_crc < DedupCrcStart)
900 elm.data_crc >= DedupCrcEnd) {
907 mirror.key_beg = mirror.key_cur;
909 fprintf(stderr, "Interrupted\n");
913 if (DedupTotalRecords) {
914 humanize_unsigned(buf1, sizeof(buf1),
917 humanize_unsigned(buf2, sizeof(buf2),
918 dedup_successes_bytes,
920 fprintf(stderr, "%s count %7jd/%jd "
922 "ioread %s newddup %s\n",
924 (intmax_t)DedupCurrentRecords,
925 (intmax_t)DedupTotalRecords,
926 (int)(DedupCurrentRecords * 100 /
928 (int)(DedupCurrentRecords * 10000 /
929 DedupTotalRecords % 100),
932 fprintf(stderr, "%s count %-7jd\n",
934 (intmax_t)DedupCurrentRecords);
938 } while (mirror.count != 0);
940 signal(SIGINFO, SIG_IGN);
946 dump_simulated_dedup(void)
948 struct sim_dedup_entry *sim_de;
950 printf("=== Dumping simulated dedup entries:\n");
951 RB_FOREACH(sim_de, sim_dedup_entry_rb_tree, &sim_dedup_tree) {
952 printf("\tcrc=%08x cnt=%ju size=%ju\n",
954 (intmax_t)sim_de->ref_blks,
955 (intmax_t)sim_de->ref_size);
957 printf("end of dump ===\n");
961 dump_real_dedup(void)
963 struct dedup_entry *de;
964 struct sha_dedup_entry *sha_de;
967 printf("=== Dumping dedup entries:\n");
968 RB_FOREACH(de, dedup_entry_rb_tree, &dedup_tree) {
969 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) {
970 printf("\tcrc=%08x fictitious\n", de->leaf.data_crc);
972 RB_FOREACH(sha_de, sha_dedup_entry_rb_tree, &de->u.fict_root) {
973 printf("\t\tcrc=%08x cnt=%ju size=%ju\n\t"
975 sha_de->leaf.data_crc,
976 (intmax_t)sha_de->ref_blks,
977 (intmax_t)sha_de->ref_size);
978 for (i = 0; i < SHA256_DIGEST_LENGTH; ++i)
979 printf("%02x", sha_de->sha_hash[i]);
983 printf("\tcrc=%08x cnt=%ju size=%ju\n",
985 (intmax_t)de->u.de.ref_blks,
986 (intmax_t)de->u.de.ref_size);
989 printf("end of dump ===\n");
993 dedup_usage(int code)
996 "hammer dedup-simulate <filesystem>\n"
997 "hammer dedup <filesystem>\n"