From 39e88285cb81d7847bb54a08e0f50a61be890e08 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 2 Mar 2010 14:34:40 -0800 Subject: [PATCH] HAMMER Utility - Enhance the mirroring and show code * mirror-read will now run the histogram just like mirror-stream does. * improve versbose output when doing a mirror-read or mirror-stream. * hammer show now validates the mirror_tid in the B-Tree and produces a 'B' indication with the 'M' flag if it finds a bad mirror_tid chaining. --- sbin/hammer/cmd_mirror.c | 129 +++++++++++++++++++++++++++------------ sbin/hammer/cmd_show.c | 39 +++++++++--- sbin/hammer/hammer.8 | 6 +- 3 files changed, 124 insertions(+), 50 deletions(-) diff --git a/sbin/hammer/cmd_mirror.c b/sbin/hammer/cmd_mirror.c index 792d1b7962..0a22254f9c 100644 --- a/sbin/hammer/cmd_mirror.c +++ b/sbin/hammer/cmd_mirror.c @@ -47,7 +47,8 @@ static int read_mrecords(int fd, char *buf, u_int size, hammer_ioc_mrecord_head_t pickup); static int generate_histogram(int fd, const char *filesystem, histogram_t *histogram_ary, - struct hammer_ioc_mirror_rw *mirror_base); + struct hammer_ioc_mirror_rw *mirror_base, + int *repeatp); static hammer_ioc_mrecord_any_t read_mrecord(int fdin, int *errorp, hammer_ioc_mrecord_head_t pickup); static void write_mrecord(int fdout, u_int32_t type, @@ -70,6 +71,9 @@ static void mirror_usage(int code); * The HAMMER VFS does most of the work, we add a few new mrecord * types to negotiate the TID ranges and verify that the entire * stream made it to the destination. + * + * streaming will be 0 for mirror-read, 1 for mirror-stream. The code will + * set up a fake value of -1 when running the histogram for mirror-read. */ void hammer_cmd_mirror_read(char **av, int ac, int streaming) @@ -91,6 +95,8 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming) int histogram; int histindex; int histmax; + int repeat = 0; + int sameline; int64_t total_bytes; time_t base_t = time(NULL); struct timeval bwtv; @@ -107,6 +113,7 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming) histindex = 0; histmax = 0; histogram_ary = NULL; + sameline = 0; again: bzero(&mirror, sizeof(mirror)); @@ -115,10 +122,12 @@ again: fd = getpfs(&pfs, filesystem); - if (streaming && VerboseOpt && VerboseOpt < 2) { - fprintf(stderr, "\nRunning"); + if (streaming >= 0 && VerboseOpt && VerboseOpt < 2) { + fprintf(stderr, "%cRunning \b\b", (sameline ? '\r' : '\n')); fflush(stderr); + sameline = 1; } + sameline = 1; total_bytes = 0; gettimeofday(&bwtv, NULL); bwcount = 0; @@ -136,8 +145,8 @@ again: * first. Use the target's current snapshot TID as our default * begin TID. */ - mirror.tid_beg = 0; if (TwoWayPipeOpt) { + mirror.tid_beg = 0; n = validate_mrec_header(fd, 0, 0, pfs.pfs_id, &pickup, NULL, &mirror.tid_beg); if (n < 0) { /* got TERM record */ @@ -145,6 +154,10 @@ again: return; } ++mirror.tid_beg; + } else if (streaming && histogram) { + mirror.tid_beg = histogram_ary[histindex].tid + 1; + } else { + mirror.tid_beg = 0; } /* @@ -167,7 +180,7 @@ again: * now picking it up again. Do another histogram. */ #if 0 - if (TwoWayPipeOpt && streaming && histogram && histindex == histend) { + if (streaming && histogram && histindex == histend) { if (mirror.tid_end - mirror.tid_beg > BULK_MINIMUM) histogram = 0; } @@ -179,15 +192,24 @@ again: * restarted without having to start over. */ if (histogram == 0 && BulkOpt == 0) { - if (VerboseOpt) + if (VerboseOpt && repeat == 0) { fprintf(stderr, "\n"); + sameline = 0; + } histmax = generate_histogram(fd, filesystem, - &histogram_ary, &mirror); + &histogram_ary, &mirror, + &repeat); histindex = 0; histogram = 1; + + /* + * Just stream the histogram, then stop + */ + if (streaming == 0) + streaming = -1; } - if (TwoWayPipeOpt && streaming && histogram) { + if (streaming && histogram) { ++histindex; mirror.tid_end = histogram_ary[histindex].tid; estbytes = histogram_ary[histindex-1].bytes; @@ -201,9 +223,9 @@ again: /* * A cycle file overrides the beginning TID only if we are - * not operating in two-way mode. + * not operating in two-way or histogram mode. */ - if (TwoWayPipeOpt == 0) { + if (TwoWayPipeOpt == 0 && histogram == 0) { hammer_get_cycle(&mirror.key_beg, &mirror.tid_beg); } @@ -235,7 +257,9 @@ again: if (mirror.tid_beg >= mirror.tid_end) { if (streaming == 0 || VerboseOpt >= 2) fprintf(stderr, "Mirror-read: No work to do\n"); + sleep(DelayOpt); didwork = 0; + histogram = 0; goto done; } didwork = 1; @@ -278,12 +302,13 @@ again: total_bytes += mirror.count; if (streaming && VerboseOpt) { fprintf(stderr, - "\robj=%016jx tids=%016jx:%016jx %11jd", + "\rscan obj=%016jx tids=%016jx:%016jx %11jd", (uintmax_t)mirror.key_cur.obj_id, (uintmax_t)mirror.tid_beg, (uintmax_t)mirror.tid_end, (intmax_t)total_bytes); fflush(stderr); + sameline = 0; } mirror.key_beg = mirror.key_cur; @@ -303,9 +328,10 @@ again: } while (mirror.count != 0); done: - if (streaming && VerboseOpt) { + if (streaming && VerboseOpt && sameline == 0) { fprintf(stderr, "\n"); fflush(stderr); + sameline = 1; } /* @@ -363,26 +389,34 @@ done: time_t t2; /* - * Two way streaming tries to break down large bulk - * transfers into smaller ones so it can sync the - * transaction id on the slave. This way if we get - * interrupted a restart doesn't have to start from - * scratch. + * Try to break down large bulk transfers into smaller ones + * so it can sync the transaction id on the slave. This + * way if we get interrupted a restart doesn't have to + * start from scratch. */ - if (TwoWayPipeOpt && streaming && histogram) { + if (streaming && histogram) { if (histindex != histmax) { - if (VerboseOpt && VerboseOpt < 2) + if (VerboseOpt && VerboseOpt < 2 && + streaming >= 0) { fprintf(stderr, " (bulk incremental)"); + } + relpfs(fd, &pfs); goto again; } } - if (VerboseOpt) { + if (VerboseOpt && streaming >= 0) { fprintf(stderr, " W"); fflush(stderr); } pfs.ondisk->sync_end_tid = mirror.tid_end; - if (ioctl(fd, HAMMERIOC_WAI_PSEUDOFS, &pfs) < 0) { + if (streaming < 0) { + /* + * Fake streaming mode when using a histogram to + * break up a mirror-read, do not wait on source. + */ + streaming = 0; + } else if (ioctl(fd, HAMMERIOC_WAI_PSEUDOFS, &pfs) < 0) { fprintf(stderr, "Mirror-read %s: cannot stream: %s\n", filesystem, strerror(errno)); } else { @@ -428,7 +462,8 @@ done: static int generate_histogram(int fd, const char *filesystem, histogram_t *histogram_ary, - struct hammer_ioc_mirror_rw *mirror_base) + struct hammer_ioc_mirror_rw *mirror_base, + int *repeatp) { struct hammer_ioc_mirror_rw mirror; union hammer_ioc_mrecord_any *mrec; @@ -460,11 +495,13 @@ generate_histogram(int fd, const char *filesystem, tid_bytes = malloc(sizeof(*tid_bytes) * (HIST_COUNT + 2)); bzero(tid_bytes, sizeof(tid_bytes)); - fprintf(stderr, "Prescan to break up bulk transfer"); - if (VerboseOpt > 1) - fprintf(stderr, " (%juMB chunks)", - (uintmax_t)(SplitupOpt / (1024 * 1024))); - fprintf(stderr, "\n"); + if (*repeatp == 0) { + fprintf(stderr, "Prescan to break up bulk transfer"); + if (VerboseOpt > 1) + fprintf(stderr, " (%juMB chunks)", + (uintmax_t)(SplitupOpt / (1024 * 1024))); + fprintf(stderr, "\n"); + } /* * Note: (tid_beg,tid_end), range is inclusive of both beg & end. @@ -556,7 +593,7 @@ generate_histogram(int fd, const char *filesystem, } } if (VerboseOpt > 1) { - if (accum > SplitupOpt) { + if (*repeatp == 0 && accum > SplitupOpt) { fprintf(stderr, "."); fflush(stderr); accum = 0; @@ -593,19 +630,23 @@ generate_histogram(int fd, const char *filesystem, (*histogram_ary)[res].tid = tid_end; (*histogram_ary)[res].bytes = -1; - if (VerboseOpt > 1) - fprintf(stderr, "\n"); /* newline after ... */ - assert(res <= HIST_COUNT); - fprintf(stderr, "Prescan %d chunks, total %ju MBytes (", - res, (uintmax_t)total / (1024 * 1024)); - for (i = 0; i < res && i < 3; ++i) { - if (i) - fprintf(stderr, ", "); - fprintf(stderr, "%ju", (uintmax_t)(*histogram_ary)[i].bytes); + if (*repeatp == 0) { + if (VerboseOpt > 1) + fprintf(stderr, "\n"); /* newline after ... */ + fprintf(stderr, "Prescan %d chunks, total %ju MBytes (", + res, (uintmax_t)total / (1024 * 1024)); + for (i = 0; i < res && i < 3; ++i) { + if (i) + fprintf(stderr, ", "); + fprintf(stderr, "%ju", + (uintmax_t)(*histogram_ary)[i].bytes); + } + if (i < res) + fprintf(stderr, ", ..."); + fprintf(stderr, ")\n"); } - if (i < res) - fprintf(stderr, ", ..."); - fprintf(stderr, ")\n"); + assert(res <= HIST_COUNT); + *repeatp = 1; free(tid_bytes); return(res); @@ -879,6 +920,7 @@ hammer_cmd_mirror_dump(void) mrec = read_mrecord(0, &error, &pickup); +again: /* * Read and process bulk records */ @@ -953,6 +995,13 @@ hammer_cmd_mirror_dump(void) fprintf(stderr, "Mirror-dump: Did not get termination " "sync record\n"); } + + /* + * Continue with more batches until EOF. + */ + mrec = read_mrecord(0, &error, &pickup); + if (mrec) + goto again; } void diff --git a/sbin/hammer/cmd_show.c b/sbin/hammer/cmd_show.c index 577aaea8bc..7019a2b41e 100644 --- a/sbin/hammer/cmd_show.c +++ b/sbin/hammer/cmd_show.c @@ -40,6 +40,7 @@ #define FLAG_TOOFARRIGHT 0x0002 #define FLAG_BADTYPE 0x0004 #define FLAG_BADCHILDPARENT 0x0008 +#define FLAG_BADMIRRORTID 0x0010 typedef struct btree_search { u_int32_t lo; @@ -47,7 +48,7 @@ typedef struct btree_search { } *btree_search_t; static void print_btree_node(hammer_off_t node_offset, btree_search_t search, - int depth, int spike, + int depth, int spike, hammer_tid_t mirror_tid, hammer_base_elm_t left_bound, hammer_base_elm_t right_bound); static const char *check_data_crc(hammer_btree_elm_t elm); @@ -99,15 +100,15 @@ hammer_cmd_show(hammer_off_t node_offset, u_int32_t lo, int64_t obj_id, printf("show %016jx lo %08x obj_id %016jx depth %d\n", (uintmax_t)node_offset, lo, (uintmax_t)obj_id, depth); } - print_btree_node(node_offset, searchp, depth, - 0, left_bound, right_bound); - print_btree_node(node_offset, searchp, depth, - 1, left_bound, right_bound); + print_btree_node(node_offset, searchp, depth, 0, HAMMER_MAX_TID, + left_bound, right_bound); + print_btree_node(node_offset, searchp, depth, 1, HAMMER_MAX_TID, + left_bound, right_bound); } static void print_btree_node(hammer_off_t node_offset, btree_search_t search, - int depth, int spike, + int depth, int spike, hammer_tid_t mirror_tid, hammer_base_elm_t left_bound, hammer_base_elm_t right_bound) { struct buffer_info *buffer = NULL; @@ -117,6 +118,7 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search, int flags; int maxcount; char badc; + char badm; const char *ext; node = get_node(node_offset, &buffer); @@ -126,10 +128,18 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search, else badc = 'B'; + if (node->mirror_tid <= mirror_tid) { + badm = ' '; + } else { + badm = 'M'; + badc = 'B'; + } + if (spike == 0) { - printf("%c NODE %016jx cnt=%02d p=%016jx " + printf("%c%c NODE %016jx cnt=%02d p=%016jx " "type=%c depth=%d", badc, + badm, (uintmax_t)node_offset, node->count, (uintmax_t)node->parent, (node->type ? node->type : '?'), depth); @@ -205,6 +215,7 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search, if (elm->internal.subtree_offset) { print_btree_node(elm->internal.subtree_offset, search, depth + 1, spike, + elm->internal.mirror_tid, &elm[0].base, &elm[1].base); /* * Cause show to iterate after seeking to @@ -235,7 +246,9 @@ print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type, if (flags & FLAG_BADTYPE) flagstr[4] = 'T'; if (flags & FLAG_BADCHILDPARENT) - flagstr[4] = 'C'; + flagstr[5] = 'C'; + if (flags & FLAG_BADMIRRORTID) + flagstr[6] = 'M'; printf("%s\t%s %2d %c ", flagstr, label, i, @@ -310,6 +323,8 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset, flags |= FLAG_BADCHILDPARENT; rel_buffer(buffer); } + if (elm->internal.mirror_tid > node->mirror_tid) + flags |= FLAG_BADMIRRORTID; switch(btype) { case HAMMER_BTREE_TYPE_INTERNAL: @@ -334,6 +349,14 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset, } break; case HAMMER_BTREE_TYPE_LEAF: + if (elm->base.create_tid && + elm->base.create_tid > node->mirror_tid) { + flags |= FLAG_BADMIRRORTID; + } + if (elm->base.delete_tid && + elm->base.delete_tid > node->mirror_tid) { + flags |= FLAG_BADMIRRORTID; + } switch(btype) { case HAMMER_BTREE_TYPE_RECORD: if (left_bound == NULL || right_bound == NULL) diff --git a/sbin/hammer/hammer.8 b/sbin/hammer/hammer.8 index 1e8a54439b..1b8d10811a 100644 --- a/sbin/hammer/hammer.8 +++ b/sbin/hammer/hammer.8 @@ -289,11 +289,13 @@ Any errors will show up with a in column 1 along with various other error flags. .Pp -If you specify a localization and object id field, +If you specify a localization field or a localization:obj_id field, .Ar lo Ns Cm \&: Ns Ar objid , the dump will search for the key printing nodes as it recurses down, and then -will iterate forwards. +will iterate forwards. These fields are specified in HEX. +Note that the pfsid is the top 16 bits of the 32 bit localization +field so pfs #1 would be 00010000. .Pp If you use .Fl q -- 2.41.0