HAMMER Utility - Enhance the mirroring and show code
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 2 Mar 2010 22:34:40 +0000 (14:34 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 2 Mar 2010 22:34:40 +0000 (14:34 -0800)
* mirror-read will now run the histogram just like mirror-stream does.

* improve versbose output when doing a mirror-read or mirror-stream.

* hammer show now validates the mirror_tid in the B-Tree and produces
  a 'B' indication with the 'M' flag if it finds a bad mirror_tid
  chaining.

sbin/hammer/cmd_mirror.c
sbin/hammer/cmd_show.c
sbin/hammer/hammer.8

index 792d1b7..0a22254 100644 (file)
@@ -47,7 +47,8 @@ static int read_mrecords(int fd, char *buf, u_int size,
                         hammer_ioc_mrecord_head_t pickup);
 static int generate_histogram(int fd, const char *filesystem,
                         histogram_t *histogram_ary,
-                        struct hammer_ioc_mirror_rw *mirror_base);
+                        struct hammer_ioc_mirror_rw *mirror_base,
+                        int *repeatp);
 static hammer_ioc_mrecord_any_t read_mrecord(int fdin, int *errorp,
                         hammer_ioc_mrecord_head_t pickup);
 static void write_mrecord(int fdout, u_int32_t type,
@@ -70,6 +71,9 @@ static void mirror_usage(int code);
  * The HAMMER VFS does most of the work, we add a few new mrecord
  * types to negotiate the TID ranges and verify that the entire
  * stream made it to the destination.
+ *
+ * streaming will be 0 for mirror-read, 1 for mirror-stream.  The code will
+ * set up a fake value of -1 when running the histogram for mirror-read.
  */
 void
 hammer_cmd_mirror_read(char **av, int ac, int streaming)
@@ -91,6 +95,8 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming)
        int histogram;
        int histindex;
        int histmax;
+       int repeat = 0;
+       int sameline;
        int64_t total_bytes;
        time_t base_t = time(NULL);
        struct timeval bwtv;
@@ -107,6 +113,7 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming)
        histindex = 0;
        histmax = 0;
        histogram_ary = NULL;
+       sameline = 0;
 
 again:
        bzero(&mirror, sizeof(mirror));
@@ -115,10 +122,12 @@ again:
 
        fd = getpfs(&pfs, filesystem);
 
-       if (streaming && VerboseOpt && VerboseOpt < 2) {
-               fprintf(stderr, "\nRunning");
+       if (streaming >= 0 && VerboseOpt && VerboseOpt < 2) {
+               fprintf(stderr, "%cRunning  \b\b", (sameline ? '\r' : '\n'));
                fflush(stderr);
+               sameline = 1;
        }
+       sameline = 1;
        total_bytes = 0;
        gettimeofday(&bwtv, NULL);
        bwcount = 0;
@@ -136,8 +145,8 @@ again:
         * first.  Use the target's current snapshot TID as our default
         * begin TID.
         */
-       mirror.tid_beg = 0;
        if (TwoWayPipeOpt) {
+               mirror.tid_beg = 0;
                n = validate_mrec_header(fd, 0, 0, pfs.pfs_id, &pickup,
                                         NULL, &mirror.tid_beg);
                if (n < 0) {    /* got TERM record */
@@ -145,6 +154,10 @@ again:
                        return;
                }
                ++mirror.tid_beg;
+       } else if (streaming && histogram) {
+               mirror.tid_beg = histogram_ary[histindex].tid + 1;
+       } else {
+               mirror.tid_beg = 0;
        }
 
        /*
@@ -167,7 +180,7 @@ again:
         *     now picking it up again.  Do another histogram.
         */
 #if 0
-       if (TwoWayPipeOpt && streaming && histogram && histindex == histend) {
+       if (streaming && histogram && histindex == histend) {
                if (mirror.tid_end - mirror.tid_beg > BULK_MINIMUM)
                        histogram = 0;
        }
@@ -179,15 +192,24 @@ again:
         * restarted without having to start over.
         */
        if (histogram == 0 && BulkOpt == 0) {
-               if (VerboseOpt)
+               if (VerboseOpt && repeat == 0) {
                        fprintf(stderr, "\n");
+                       sameline = 0;
+               }
                histmax = generate_histogram(fd, filesystem,
-                                            &histogram_ary, &mirror);
+                                            &histogram_ary, &mirror,
+                                            &repeat);
                histindex = 0;
                histogram = 1;
+
+               /*
+                * Just stream the histogram, then stop
+                */
+               if (streaming == 0)
+                       streaming = -1;
        }
 
-       if (TwoWayPipeOpt && streaming && histogram) {
+       if (streaming && histogram) {
                ++histindex;
                mirror.tid_end = histogram_ary[histindex].tid;
                estbytes = histogram_ary[histindex-1].bytes;
@@ -201,9 +223,9 @@ again:
 
        /*
         * A cycle file overrides the beginning TID only if we are
-        * not operating in two-way mode.
+        * not operating in two-way or histogram mode.
         */
-       if (TwoWayPipeOpt == 0) {
+       if (TwoWayPipeOpt == 0 && histogram == 0) {
                hammer_get_cycle(&mirror.key_beg, &mirror.tid_beg);
        }
 
@@ -235,7 +257,9 @@ again:
        if (mirror.tid_beg >= mirror.tid_end) {
                if (streaming == 0 || VerboseOpt >= 2)
                        fprintf(stderr, "Mirror-read: No work to do\n");
+               sleep(DelayOpt);
                didwork = 0;
+               histogram = 0;
                goto done;
        }
        didwork = 1;
@@ -278,12 +302,13 @@ again:
                total_bytes += mirror.count;
                if (streaming && VerboseOpt) {
                        fprintf(stderr,
-                               "\robj=%016jx tids=%016jx:%016jx %11jd",
+                               "\rscan obj=%016jx tids=%016jx:%016jx %11jd",
                                (uintmax_t)mirror.key_cur.obj_id,
                                (uintmax_t)mirror.tid_beg,
                                (uintmax_t)mirror.tid_end,
                                (intmax_t)total_bytes);
                        fflush(stderr);
+                       sameline = 0;
                }
                mirror.key_beg = mirror.key_cur;
 
@@ -303,9 +328,10 @@ again:
        } while (mirror.count != 0);
 
 done:
-       if (streaming && VerboseOpt) {
+       if (streaming && VerboseOpt && sameline == 0) {
                fprintf(stderr, "\n");
                fflush(stderr);
+               sameline = 1;
        }
 
        /*
@@ -363,26 +389,34 @@ done:
                time_t t2;
 
                /*
-                * Two way streaming tries to break down large bulk
-                * transfers into smaller ones so it can sync the
-                * transaction id on the slave.  This way if we get
-                * interrupted a restart doesn't have to start from
-                * scratch.
+                * Try to break down large bulk transfers into smaller ones
+                * so it can sync the transaction id on the slave.  This
+                * way if we get interrupted a restart doesn't have to
+                * start from scratch.
                 */
-               if (TwoWayPipeOpt && streaming && histogram) {
+               if (streaming && histogram) {
                        if (histindex != histmax) {
-                               if (VerboseOpt && VerboseOpt < 2)
+                               if (VerboseOpt && VerboseOpt < 2 &&
+                                   streaming >= 0) {
                                        fprintf(stderr, " (bulk incremental)");
+                               }
+                               relpfs(fd, &pfs);
                                goto again;
                        }
                }
 
-               if (VerboseOpt) {
+               if (VerboseOpt && streaming >= 0) {
                        fprintf(stderr, " W");
                        fflush(stderr);
                }
                pfs.ondisk->sync_end_tid = mirror.tid_end;
-               if (ioctl(fd, HAMMERIOC_WAI_PSEUDOFS, &pfs) < 0) {
+               if (streaming < 0) {
+                       /*
+                        * Fake streaming mode when using a histogram to
+                        * break up a mirror-read, do not wait on source.
+                        */
+                       streaming = 0;
+               } else if (ioctl(fd, HAMMERIOC_WAI_PSEUDOFS, &pfs) < 0) {
                        fprintf(stderr, "Mirror-read %s: cannot stream: %s\n",
                                filesystem, strerror(errno));
                } else {
@@ -428,7 +462,8 @@ done:
 static int
 generate_histogram(int fd, const char *filesystem,
                   histogram_t *histogram_ary,
-                  struct hammer_ioc_mirror_rw *mirror_base)
+                  struct hammer_ioc_mirror_rw *mirror_base,
+                  int *repeatp)
 {
        struct hammer_ioc_mirror_rw mirror;
        union hammer_ioc_mrecord_any *mrec;
@@ -460,11 +495,13 @@ generate_histogram(int fd, const char *filesystem,
        tid_bytes = malloc(sizeof(*tid_bytes) * (HIST_COUNT + 2));
        bzero(tid_bytes, sizeof(tid_bytes));
 
-       fprintf(stderr, "Prescan to break up bulk transfer");
-       if (VerboseOpt > 1)
-               fprintf(stderr, " (%juMB chunks)",
-                       (uintmax_t)(SplitupOpt / (1024 * 1024)));
-       fprintf(stderr, "\n");
+       if (*repeatp == 0) {
+               fprintf(stderr, "Prescan to break up bulk transfer");
+               if (VerboseOpt > 1)
+                       fprintf(stderr, " (%juMB chunks)",
+                               (uintmax_t)(SplitupOpt / (1024 * 1024)));
+               fprintf(stderr, "\n");
+       }
 
        /*
         * Note: (tid_beg,tid_end), range is inclusive of both beg & end.
@@ -556,7 +593,7 @@ generate_histogram(int fd, const char *filesystem,
                        }
                }
                if (VerboseOpt > 1) {
-                       if (accum > SplitupOpt) {
+                       if (*repeatp == 0 && accum > SplitupOpt) {
                                fprintf(stderr, ".");
                                fflush(stderr);
                                accum = 0;
@@ -593,19 +630,23 @@ generate_histogram(int fd, const char *filesystem,
        (*histogram_ary)[res].tid = tid_end;
        (*histogram_ary)[res].bytes = -1;
 
-       if (VerboseOpt > 1)
-               fprintf(stderr, "\n");  /* newline after ... */
-       assert(res <= HIST_COUNT);
-       fprintf(stderr, "Prescan %d chunks, total %ju MBytes (",
-               res, (uintmax_t)total / (1024 * 1024));
-       for (i = 0; i < res && i < 3; ++i) {
-               if (i)
-                       fprintf(stderr, ", ");
-               fprintf(stderr, "%ju", (uintmax_t)(*histogram_ary)[i].bytes);
+       if (*repeatp == 0) {
+               if (VerboseOpt > 1)
+                       fprintf(stderr, "\n");  /* newline after ... */
+               fprintf(stderr, "Prescan %d chunks, total %ju MBytes (",
+                       res, (uintmax_t)total / (1024 * 1024));
+               for (i = 0; i < res && i < 3; ++i) {
+                       if (i)
+                               fprintf(stderr, ", ");
+                       fprintf(stderr, "%ju",
+                               (uintmax_t)(*histogram_ary)[i].bytes);
+               }
+               if (i < res)
+                       fprintf(stderr, ", ...");
+               fprintf(stderr, ")\n");
        }
-       if (i < res)
-               fprintf(stderr, ", ...");
-       fprintf(stderr, ")\n");
+       assert(res <= HIST_COUNT);
+       *repeatp = 1;
 
        free(tid_bytes);
        return(res);
@@ -879,6 +920,7 @@ hammer_cmd_mirror_dump(void)
 
        mrec = read_mrecord(0, &error, &pickup);
 
+again:
        /*
         * Read and process bulk records
         */
@@ -953,6 +995,13 @@ hammer_cmd_mirror_dump(void)
                fprintf(stderr, "Mirror-dump: Did not get termination "
                                "sync record\n");
        }
+
+       /*
+        * Continue with more batches until EOF.
+        */
+       mrec = read_mrecord(0, &error, &pickup);
+       if (mrec)
+               goto again;
 }
 
 void
index 577aaea..7019a2b 100644 (file)
@@ -40,6 +40,7 @@
 #define FLAG_TOOFARRIGHT       0x0002
 #define FLAG_BADTYPE           0x0004
 #define FLAG_BADCHILDPARENT    0x0008
+#define FLAG_BADMIRRORTID      0x0010
 
 typedef struct btree_search {
        u_int32_t       lo;
@@ -47,7 +48,7 @@ typedef struct btree_search {
 } *btree_search_t;
 
 static void print_btree_node(hammer_off_t node_offset, btree_search_t search,
-                       int depth, int spike,
+                       int depth, int spike, hammer_tid_t mirror_tid,
                        hammer_base_elm_t left_bound,
                        hammer_base_elm_t right_bound);
 static const char *check_data_crc(hammer_btree_elm_t elm);
@@ -99,15 +100,15 @@ hammer_cmd_show(hammer_off_t node_offset, u_int32_t lo, int64_t obj_id,
                printf("show %016jx lo %08x obj_id %016jx depth %d\n",
                        (uintmax_t)node_offset, lo, (uintmax_t)obj_id, depth);
        }
-       print_btree_node(node_offset, searchp, depth,
-                       0, left_bound, right_bound);
-       print_btree_node(node_offset, searchp, depth,
-                       1, left_bound, right_bound);
+       print_btree_node(node_offset, searchp, depth, 0, HAMMER_MAX_TID,
+                        left_bound, right_bound);
+       print_btree_node(node_offset, searchp, depth, 1, HAMMER_MAX_TID,
+                        left_bound, right_bound);
 }
 
 static void
 print_btree_node(hammer_off_t node_offset, btree_search_t search,
-               int depth, int spike,
+               int depth, int spike, hammer_tid_t mirror_tid,
                hammer_base_elm_t left_bound, hammer_base_elm_t right_bound)
 {
        struct buffer_info *buffer = NULL;
@@ -117,6 +118,7 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search,
        int flags;
        int maxcount;
        char badc;
+       char badm;
        const char *ext;
 
        node = get_node(node_offset, &buffer);
@@ -126,10 +128,18 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search,
        else
                badc = 'B';
 
+       if (node->mirror_tid <= mirror_tid) {
+               badm = ' ';
+       } else {
+               badm = 'M';
+               badc = 'B';
+       }
+
        if (spike == 0) {
-               printf("%c   NODE %016jx cnt=%02d p=%016jx "
+               printf("%c%c   NODE %016jx cnt=%02d p=%016jx "
                       "type=%c depth=%d",
                       badc,
+                      badm,
                       (uintmax_t)node_offset, node->count,
                       (uintmax_t)node->parent,
                       (node->type ? node->type : '?'), depth);
@@ -205,6 +215,7 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search,
                        if (elm->internal.subtree_offset) {
                                print_btree_node(elm->internal.subtree_offset,
                                                 search, depth + 1, spike,
+                                                elm->internal.mirror_tid,
                                                 &elm[0].base, &elm[1].base);
                                /*
                                 * Cause show to iterate after seeking to
@@ -235,7 +246,9 @@ print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
        if (flags & FLAG_BADTYPE)
                flagstr[4] = 'T';
        if (flags & FLAG_BADCHILDPARENT)
-               flagstr[4] = 'C';
+               flagstr[5] = 'C';
+       if (flags & FLAG_BADMIRRORTID)
+               flagstr[6] = 'M';
 
        printf("%s\t%s %2d %c ",
               flagstr, label, i,
@@ -310,6 +323,8 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset,
                                flags |= FLAG_BADCHILDPARENT;
                        rel_buffer(buffer);
                }
+               if (elm->internal.mirror_tid > node->mirror_tid)
+                       flags |= FLAG_BADMIRRORTID;
 
                switch(btype) {
                case HAMMER_BTREE_TYPE_INTERNAL:
@@ -334,6 +349,14 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset,
                }
                break;
        case HAMMER_BTREE_TYPE_LEAF:
+               if (elm->base.create_tid &&
+                   elm->base.create_tid > node->mirror_tid) {
+                       flags |= FLAG_BADMIRRORTID;
+               }
+               if (elm->base.delete_tid &&
+                   elm->base.delete_tid > node->mirror_tid) {
+                       flags |= FLAG_BADMIRRORTID;
+               }
                switch(btype) {
                case HAMMER_BTREE_TYPE_RECORD:
                        if (left_bound == NULL || right_bound == NULL)
index 1e8a544..1b8d108 100644 (file)
@@ -289,11 +289,13 @@ Any errors will show up with a
 in column 1 along with various
 other error flags.
 .Pp
-If you specify a localization and object id field,
+If you specify a localization field or a localization:obj_id field,
 .Ar lo Ns Cm \&: Ns Ar objid ,
 the dump will
 search for the key printing nodes as it recurses down, and then
-will iterate forwards.
+will iterate forwards.  These fields are specified in HEX.
+Note that the pfsid is the top 16 bits of the 32 bit localization
+field so pfs #1 would be 00010000.
 .Pp
 If you use
 .Fl q