From e7f926a5040427edaba8910c6992d8ee7af836f4 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 17 Aug 2009 11:05:09 -0700 Subject: [PATCH] HAMMER Util - Add new features, fix history retention bug in prune * The prune code was not retaining the fine-grained history between the last snapshot and current. * Add a new PFS config variable called 'prune-min' which may be used to set the minimum fine-grained history retention, in seconds. The hammer prune code will not prune inbetween any snapshots that are within the fine-grained retention period. * The mirror-stream directive now automatically loops and reconnects if the network connection fails. * The mirror-stream directive now attempts to break-up a large initial bulk transfer into smaller transfers, inserting synchronization points so a failure in the middle of the large bulk transfer does not require starting from scratch again. * The show directive (show btree) now takes an optional localization[:objid] argument (specified in hex). If specified the directive will search the B-Tree for the key, printing nodes as it goes, and then continue with a normal iteration. --- sbin/hammer/cmd_mirror.c | 236 ++++++++++++++++++++++++++++++------ sbin/hammer/cmd_pseudofs.c | 76 ++++++++++++ sbin/hammer/cmd_show.c | 93 ++++++++++++-- sbin/hammer/cmd_softprune.c | 137 +++++++++++++++++---- sbin/hammer/hammer.8 | 28 ++++- sbin/hammer/hammer.c | 13 +- sbin/hammer/hammer.h | 4 +- 7 files changed, 511 insertions(+), 76 deletions(-) diff --git a/sbin/hammer/cmd_mirror.c b/sbin/hammer/cmd_mirror.c index 36192b7a2c..9c6b7129da 100644 --- a/sbin/hammer/cmd_mirror.c +++ b/sbin/hammer/cmd_mirror.c @@ -40,12 +40,15 @@ static int read_mrecords(int fd, char *buf, u_int size, hammer_ioc_mrecord_head_t pickup); +static int generate_histogram(int fd, const char *filesystem, + hammer_tid_t **histogram_ary, + struct hammer_ioc_mirror_rw *mirror_base); static hammer_ioc_mrecord_any_t read_mrecord(int fdin, int *errorp, hammer_ioc_mrecord_head_t pickup); static void write_mrecord(int fdout, u_int32_t type, hammer_ioc_mrecord_any_t mrec, int bytes); -static void generate_mrec_header(int fd, int fdout, int pfs_id, - hammer_tid_t *tid_begp, hammer_tid_t *tid_endp); +static void generate_mrec_header(int fd, int pfs_id, + union hammer_ioc_mrecord_any *mrec_tmp); static int validate_mrec_header(int fd, int fdin, int is_target, int pfs_id, struct hammer_ioc_mrecord_head *pickup, hammer_tid_t *tid_begp, hammer_tid_t *tid_endp); @@ -55,6 +58,8 @@ static ssize_t writebw(int fd, const void *buf, size_t nbytes, static int getyn(void); static void mirror_usage(int code); +#define BULK_MINIMUM 20000 + /* * Generate a mirroring data stream from the specific source over the * entire key range, but restricted to the specified transaction range. @@ -72,6 +77,7 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming) struct hammer_ioc_mrecord_head pickup; hammer_ioc_mrecord_any_t mrec; hammer_tid_t sync_tid; + hammer_tid_t *histogram_ary; const char *filesystem; char *buf = malloc(SERIALBUF_SIZE); int interrupted = 0; @@ -79,6 +85,7 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming) int fd; int n; int didwork; + int histogram; int64_t total_bytes; time_t base_t = time(NULL); struct timeval bwtv; @@ -90,6 +97,8 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming) pickup.signature = 0; pickup.type = 0; + histogram = -1; + histogram_ary = NULL; again: bzero(&mirror, sizeof(mirror)); @@ -107,9 +116,12 @@ again: bwcount = 0; /* - * Send initial header for the purpose of determining shared-uuid. + * Send initial header for the purpose of determining the + * shared-uuid. */ - generate_mrec_header(fd, 1, pfs.pfs_id, NULL, NULL); + generate_mrec_header(fd, pfs.pfs_id, &mrec_tmp); + write_mrecord(1, HAMMER_MREC_TYPE_PFSD, + &mrec_tmp, sizeof(mrec_tmp.pfs)); /* * In 2-way mode the target will send us a PFS info packet @@ -132,15 +144,60 @@ again: * has a larger begin sync. tid_end is set to the latest source * TID whos flush cycle has completed. */ - generate_mrec_header(fd, 1, pfs.pfs_id, - &mirror.tid_beg, &mirror.tid_end); + generate_mrec_header(fd, pfs.pfs_id, &mrec_tmp); + if (mirror.tid_beg < mrec_tmp.pfs.pfsd.sync_beg_tid) + mirror.tid_beg = mrec_tmp.pfs.pfsd.sync_beg_tid; + mirror.tid_end = mrec_tmp.pfs.pfsd.sync_end_tid; + mirror.ubuf = buf; + mirror.size = SERIALBUF_SIZE; + mirror.pfs_id = pfs.pfs_id; + mirror.shared_uuid = pfs.ondisk->shared_uuid; - /* XXX streaming mode support w/ cycle or command line arg */ /* - * A cycle file overrides the beginning TID + * XXX If the histogram is exhausted and the TID delta is large + * the stream might have been offline for a while and is + * now picking it up again. Do another histogram. */ - hammer_get_cycle(&mirror.key_beg, &mirror.tid_beg); +#if 0 + if (TwoWayPipeOpt && streaming && histogram == 0) { + if (mirror.tid_end - mirror.tid_beg > BULK_MINIMUM) + histogram = -1; + } +#endif + /* + * Initial bulk startup control, try to do some incremental + * mirroring in order to allow the stream to be killed and + * restarted without having to start over. + */ + if (histogram < 0) { + if (VerboseOpt) + fprintf(stderr, "\n"); + histogram = generate_histogram(fd, filesystem, + &histogram_ary, &mirror); + } + + if (TwoWayPipeOpt && streaming && histogram > 0) { + mirror.tid_end = histogram_ary[--histogram]; + mrec_tmp.pfs.pfsd.sync_end_tid = mirror.tid_end; + } + + write_mrecord(1, HAMMER_MREC_TYPE_PFSD, + &mrec_tmp, sizeof(mrec_tmp.pfs)); + + /* + * A cycle file overrides the beginning TID only if we are + * not operating in two-way mode. + */ + if (TwoWayPipeOpt == 0) { + hammer_get_cycle(&mirror.key_beg, &mirror.tid_beg); + } + + /* + * An additional argument overrides the beginning TID regardless + * of what mode we are in. This is not recommending if operating + * in two-way mode. + */ if (ac == 2) mirror.tid_beg = strtoull(av[1], NULL, 0); @@ -202,12 +259,19 @@ again: } total_bytes += mirror.count; if (streaming && VerboseOpt) { - fprintf(stderr, "\r%016llx %11lld", - mirror.key_cur.obj_id, + fprintf(stderr, + "\robj=%016llx tids=%016llx:%016llx %11lld", + (long long)mirror.key_cur.obj_id, + (long long)mirror.tid_beg, + (long long)mirror.tid_end, total_bytes); fflush(stderr); } mirror.key_beg = mirror.key_cur; + + /* + * Deal with time limit option + */ if (TimeoutOpt && (unsigned)(time(NULL) - base_t) > (unsigned)TimeoutOpt) { fprintf(stderr, @@ -273,6 +337,19 @@ done: time_t t1 = time(NULL); time_t t2; + /* + * Two way streaming tries to break down large bulk + * transfers into smaller ones so it can sync the + * transaction id on the slave. This way if we get + * interrupted a restart doesn't have to start from + * scratch. + */ + if (TwoWayPipeOpt && streaming && histogram > 0) { + if (VerboseOpt) + fprintf(stderr, " (bulk incremental)"); + goto again; + } + if (VerboseOpt) { fprintf(stderr, " W"); fflush(stderr); @@ -304,6 +381,91 @@ done: fprintf(stderr, "Mirror-read %s succeeded\n", filesystem); } +/* + * Ok, this isn't really a histogram. What we are trying to do + * here is find the first tid_end for the scan that returns + * at least some data. The frontend of the TID space will generally + * return nothing so we can't just divide out the full mirroring + * range. Once we find the point where a real data stream starts + * to get generated we can divide out the range from that point. + * + * When starting a new mirroring operation completely from scratch + * this code will take some time to run, but once some mirroring + * data is synchronized on the target you will be able to interrupt + * the stream and restart it and the later invocations of this + * code will be such that it should run much faster. + */ +static int +generate_histogram(int fd, const char *filesystem, + hammer_tid_t **histogram_ary, + struct hammer_ioc_mirror_rw *mirror_base) +{ + struct hammer_ioc_mirror_rw mirror; + hammer_tid_t tid_beg; + hammer_tid_t tid_end; + hammer_tid_t tid_half; + int i; + + mirror = *mirror_base; + tid_beg = mirror.tid_beg; + tid_end = mirror.tid_end; + + if (*histogram_ary) + free(*histogram_ary); + if (tid_beg + BULK_MINIMUM >= tid_end) + return(0); + + if (VerboseOpt) + fprintf(stderr, "Doing Range Test\n"); + while (tid_end - tid_beg > BULK_MINIMUM) { + tid_half = tid_beg + (tid_end - tid_beg) * 2 / 3; + mirror.count = 0; + mirror.tid_beg = tid_beg; + mirror.tid_end = tid_half; + + if (VerboseOpt > 1) { + fprintf(stderr, "RangeTest %016llx/%016llx - %016llx (%lld) ", + (long long)tid_beg, + (long long)tid_end, + (long long)tid_half, + (long long)(tid_half - tid_beg)); + } + fflush(stderr); + if (ioctl(fd, HAMMERIOC_MIRROR_READ, &mirror) < 0) { + fprintf(stderr, "Mirror-read %s failed: %s\n", + filesystem, strerror(errno)); + exit(1); + } + if (mirror.head.flags & HAMMER_IOC_HEAD_ERROR) { + fprintf(stderr, + "Mirror-read %s fatal error %d\n", + filesystem, mirror.head.error); + exit(1); + } + if (VerboseOpt > 1) + fprintf(stderr, "%d\n", mirror.count); + if (mirror.count > SERIALBUF_SIZE / 2) { + tid_end = tid_half; + } else { + tid_beg = tid_half; + } + } + + tid_end = mirror_base->tid_end; + fprintf(stderr, "histogram range %016llx - %016llx\n", + (long long)tid_beg, (long long)tid_end); + + /* + * The final array generates our incremental ending tids in + * reverse order. The caller also picks them off in reverse order. + */ + *histogram_ary = malloc(sizeof(hammer_tid_t) * 20); + for (i = 0; i < 20; ++i) { + (*histogram_ary)[i] = tid_end - (tid_end - tid_beg) / 20 * i; + } + return(20); +} + static void create_pfs(const char *filesystem, uuid_t *s_uuid) { @@ -439,8 +601,12 @@ again: */ mirror.tid_beg = 0; if (TwoWayPipeOpt) { - generate_mrec_header(fd, 1, pfs.pfs_id, - &mirror.tid_beg, &mirror.tid_end); + generate_mrec_header(fd, pfs.pfs_id, &mrec_tmp); + if (mirror.tid_beg < mrec_tmp.pfs.pfsd.sync_beg_tid) + mirror.tid_beg = mrec_tmp.pfs.pfsd.sync_beg_tid; + mirror.tid_end = mrec_tmp.pfs.pfsd.sync_end_tid; + write_mrecord(1, HAMMER_MREC_TYPE_PFSD, + &mrec_tmp, sizeof(mrec_tmp.pfs)); } /* @@ -658,13 +824,14 @@ hammer_cmd_mirror_copy(char **av, int ac, int streaming) if (ac != 2) mirror_usage(1); + TwoWayPipeOpt = 1; + +again: if (pipe(fds) < 0) { perror("pipe"); exit(1); } - TwoWayPipeOpt = 1; - /* * Source */ @@ -767,6 +934,19 @@ hammer_cmd_mirror_copy(char **av, int ac, int streaming) ; while (waitpid(pid2, NULL, 0) <= 0) ; + + /* + * If the link is lost restart + */ + if (streaming) { + if (VerboseOpt) { + fprintf(stderr, "\nLost Link\n"); + fflush(stderr); + } + sleep(DelayOpt); + goto again; + } + } /* @@ -1005,17 +1185,16 @@ write_mrecord(int fdout, u_int32_t type, hammer_ioc_mrecord_any_t mrec, * originating filesytem. */ static void -generate_mrec_header(int fd, int fdout, int pfs_id, - hammer_tid_t *tid_begp, hammer_tid_t *tid_endp) +generate_mrec_header(int fd, int pfs_id, + union hammer_ioc_mrecord_any *mrec_tmp) { struct hammer_ioc_pseudofs_rw pfs; - union hammer_ioc_mrecord_any mrec_tmp; bzero(&pfs, sizeof(pfs)); - bzero(&mrec_tmp, sizeof(mrec_tmp)); + bzero(mrec_tmp, sizeof(*mrec_tmp)); pfs.pfs_id = pfs_id; - pfs.ondisk = &mrec_tmp.pfs.pfsd; - pfs.bytes = sizeof(mrec_tmp.pfs.pfsd); + pfs.ondisk = &mrec_tmp->pfs.pfsd; + pfs.bytes = sizeof(mrec_tmp->pfs.pfsd); if (ioctl(fd, HAMMERIOC_GET_PSEUDOFS, &pfs) != 0) { fprintf(stderr, "Mirror-read: not a HAMMER fs/pseudofs!\n"); exit(1); @@ -1024,20 +1203,7 @@ generate_mrec_header(int fd, int fdout, int pfs_id, fprintf(stderr, "Mirror-read: HAMMER pfs version mismatch!\n"); exit(1); } - - /* - * sync_beg_tid - lowest TID on source after which a full history - * is available. - * - * sync_end_tid - highest fully synchronized TID from source. - */ - if (tid_begp && *tid_begp < mrec_tmp.pfs.pfsd.sync_beg_tid) - *tid_begp = mrec_tmp.pfs.pfsd.sync_beg_tid; - if (tid_endp) - *tid_endp = mrec_tmp.pfs.pfsd.sync_end_tid; - mrec_tmp.pfs.version = pfs.version; - write_mrecord(fdout, HAMMER_MREC_TYPE_PFSD, - &mrec_tmp, sizeof(mrec_tmp.pfs)); + mrec_tmp->pfs.version = pfs.version; } /* diff --git a/sbin/hammer/cmd_pseudofs.c b/sbin/hammer/cmd_pseudofs.c index 67933ec7a1..ea91500189 100644 --- a/sbin/hammer/cmd_pseudofs.c +++ b/sbin/hammer/cmd_pseudofs.c @@ -41,6 +41,7 @@ static void init_pfsd(hammer_pseudofs_data_t pfsd, int is_slave); static void dump_pfsd(hammer_pseudofs_data_t pfsd); static void pseudofs_usage(int code); static int getyn(void); +static int timetosecs(char *str); /* * Calculate the pfs_id given a path to a directory or a @@PFS or @@%llx:%d @@ -484,6 +485,21 @@ dump_pfsd(hammer_pseudofs_data_t pfsd) printf(" label=\"%s\"\n", pfsd->label); if (pfsd->snapshots[0]) printf(" snapshots=\"%s\"\n", pfsd->snapshots); + if (pfsd->prune_min < (60 * 60 * 24)) { + printf(" prune-min=%02d:%02d:%02d\n", + pfsd->prune_min / 60 / 60 % 24, + pfsd->prune_min / 60 % 60, + pfsd->prune_min % 60); + } else if (pfsd->prune_min % (60 * 60 * 24)) { + printf(" prune-min=%dd/%02d:%02d:%02d\n", + pfsd->prune_min / 60 / 60 / 24, + pfsd->prune_min / 60 / 60 % 24, + pfsd->prune_min / 60 % 60, + pfsd->prune_min % 60); + } else { + printf(" prune-min=%dd\n", pfsd->prune_min / 60 / 60 / 24); + } + if (pfsd->mirror_flags & HAMMER_PFSD_SLAVE) { printf(" operating as a SLAVE\n"); if (pfsd->snapshots[0] == 0) @@ -564,6 +580,14 @@ parse_pfsd_options(char **av, int ac, hammer_pseudofs_data_t pfsd) "%s", ptr); } else if (strcmp(cmd, "snapshots-clear") == 0) { pfsd->snapshots[0] = 0; + } else if (strcmp(cmd, "prune-min") == 0) { + pfsd->prune_min = timetosecs(ptr); + if (pfsd->prune_min < 0) { + fprintf(stderr, + "option %s: illegal time spec, " + "use Nd or [Nd/]hh[:mm[:ss]]\n", ptr); + exit(1); + } } else { fprintf(stderr, "invalid option: %s\n", cmd); exit(1); @@ -599,6 +623,7 @@ pseudofs_usage(int code) " label=\"string\"\n" " snapshots=\"/path\"\n" " snapshots-clear\n" + " prune-min=[Nd/][hh[:mm[:ss]]]\n" ); exit(code); } @@ -625,3 +650,54 @@ getyn(void) return(0); } +/* + * Convert time in the form [Nd/][hh[:mm[:ss]]] to seconds. + * + * Return -1 if a parse error occurs. + * Return 0x7FFFFFFF if the time exceeds the maximum allowed. + */ +static +int +timetosecs(char *str) +{ + int days = 0; + int hrs = 0; + int mins = 0; + int secs = 0; + int n; + long long v; + char *ptr; + + n = strtol(str, &ptr, 10); + if (n < 0) + return(-1); + if (*ptr == 'd') { + days = n; + ++ptr; + if (*ptr == '/') + n = strtol(ptr + 1, &ptr, 10); + else + n = 0; + } + if (n < 0) + return(-1); + hrs = n; + if (*ptr == ':') { + n = strtol(ptr + 1, &ptr, 10); + if (n < 0) + return(-1); + mins = n; + if (*ptr == ':') { + n = strtol(ptr + 1, &ptr, 10); + if (n < 0) + return(-1); + secs = n; + } + } + if (*ptr) + return(-1); + v = days * 24 * 60 * 60 + hrs * 60 * 60 + mins * 60 + secs; + if (v > 0x7FFFFFFF) + v = 0x7FFFFFFF; + return((int)v); +} diff --git a/sbin/hammer/cmd_show.c b/sbin/hammer/cmd_show.c index 8c1c082c7e..1e3953e2c0 100644 --- a/sbin/hammer/cmd_show.c +++ b/sbin/hammer/cmd_show.c @@ -41,13 +41,19 @@ #define FLAG_BADTYPE 0x0004 #define FLAG_BADCHILDPARENT 0x0008 -static void print_btree_node(hammer_off_t node_offset, int depth, int spike, +typedef struct btree_search { + u_int32_t lo; + int64_t obj_id; +} *btree_search_t; + +static void print_btree_node(hammer_off_t node_offset, btree_search_t search, + int depth, int spike, hammer_base_elm_t left_bound, hammer_base_elm_t right_bound); static const char *check_data_crc(hammer_btree_elm_t elm); static void print_record(hammer_btree_elm_t elm); static void print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type, - int flags, const char *label); + int flags, const char *label, const char *ext); static int print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset, hammer_btree_elm_t elm, u_int8_t btype, hammer_base_elm_t left_bound, @@ -55,10 +61,13 @@ static int print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset, static void print_bigblock_fill(hammer_off_t offset); void -hammer_cmd_show(hammer_off_t node_offset, int depth, +hammer_cmd_show(hammer_off_t node_offset, u_int32_t lo, int64_t obj_id, + int depth, hammer_base_elm_t left_bound, hammer_base_elm_t right_bound) { struct volume_info *volume; + struct btree_search search; + btree_search_t searchp; int zone; if (node_offset == (hammer_off_t)-1) { @@ -79,14 +88,27 @@ hammer_cmd_show(hammer_off_t node_offset, int depth, } rel_volume(volume); } - printf("show %016llx depth %d\n", node_offset, depth); - print_btree_node(node_offset, depth, 0, left_bound, right_bound); - print_btree_node(node_offset, depth, 1, left_bound, right_bound); + + if (lo == 0 && obj_id == (int64_t)HAMMER_MIN_OBJID) { + searchp = NULL; + printf("show %016llx depth %d\n", node_offset, depth); + } else { + search.lo = lo; + search.obj_id = obj_id; + searchp = &search; + printf("show %016llx lo %08x obj_id %016llx depth %d\n", + node_offset, lo, (long long)obj_id, depth); + } + print_btree_node(node_offset, searchp, depth, + 0, left_bound, right_bound); + print_btree_node(node_offset, searchp, depth, + 1, left_bound, right_bound); } static void -print_btree_node(hammer_off_t node_offset, int depth, int spike, - hammer_base_elm_t left_bound, hammer_base_elm_t right_bound) +print_btree_node(hammer_off_t node_offset, btree_search_t search, + int depth, int spike, + hammer_base_elm_t left_bound, hammer_base_elm_t right_bound) { struct buffer_info *buffer = NULL; hammer_node_ondisk_t node; @@ -95,6 +117,7 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike, int flags; int maxcount; char badc; + const char *ext; node = get_node(node_offset, &buffer); @@ -121,17 +144,42 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike, for (i = 0; i < node->count && i < maxcount; ++i) { elm = &node->elms[i]; + + if (node->type != HAMMER_BTREE_TYPE_INTERNAL) { + ext = NULL; + if (search && + elm->base.localization == search->lo && + elm->base.obj_id == search->obj_id) { + ext = " *"; + } + } else if (search) { + ext = " *"; + if (elm->base.localization > search->lo || + (elm->base.localization == search->lo && + elm->base.obj_id > search->obj_id)) { + ext = NULL; + } + if (elm[1].base.localization < search->lo || + (elm[1].base.localization == search->lo && + elm[1].base.obj_id < search->obj_id)) { + ext = NULL; + } + } else { + ext = NULL; + } + flags = print_elm_flags(node, node_offset, elm, elm->base.btype, left_bound, right_bound); - print_btree_elm(elm, i, node->type, flags, "ELM"); + print_btree_elm(elm, i, node->type, flags, "ELM", ext); } if (node->type == HAMMER_BTREE_TYPE_INTERNAL) { elm = &node->elms[i]; + flags = print_elm_flags(node, node_offset, elm, 'I', left_bound, right_bound); - print_btree_elm(elm, i, node->type, flags, "RBN"); + print_btree_elm(elm, i, node->type, flags, "RBN", NULL); } printf(" }\n"); } @@ -141,10 +189,27 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike, switch(node->type) { case HAMMER_BTREE_TYPE_INTERNAL: + if (search) { + if (elm->base.localization > search->lo || + (elm->base.localization == search->lo && + elm->base.obj_id > search->obj_id)) { + break; + } + if (elm[1].base.localization < search->lo || + (elm[1].base.localization == search->lo && + elm[1].base.obj_id < search->obj_id)) { + break; + } + } if (elm->internal.subtree_offset) { print_btree_node(elm->internal.subtree_offset, - depth + 1, spike, + search, depth + 1, spike, &elm[0].base, &elm[1].base); + /* + * Cause show to iterate after seeking to + * the lo:objid + */ + search = NULL; } break; default: @@ -157,7 +222,7 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike, static void print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type, - int flags, const char *label) + int flags, const char *label, const char *ext) { char flagstr[8] = { 0, '-', '-', '-', '-', '-', '-', 0 }; @@ -190,8 +255,12 @@ print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type, printf("suboff=%016llx", elm->internal.subtree_offset); if (QuietOpt < 3) printf(" mirror %016llx", elm->internal.mirror_tid); + if (ext) + printf(" %s", ext); break; case HAMMER_BTREE_TYPE_LEAF: + if (ext) + printf(" %s", ext); switch(elm->base.btype) { case HAMMER_BTREE_TYPE_RECORD: if (QuietOpt < 3) diff --git a/sbin/hammer/cmd_softprune.c b/sbin/hammer/cmd_softprune.c index ef0707a1b3..e5c6621cc6 100644 --- a/sbin/hammer/cmd_softprune.c +++ b/sbin/hammer/cmd_softprune.c @@ -42,6 +42,7 @@ struct softprune { char *filesystem; struct hammer_ioc_prune prune; int maxelms; + int prune_min; }; static void softprune_usage(int code); @@ -50,7 +51,8 @@ static void hammer_softprune_scandir(struct softprune **basep, const char *dirname); static struct softprune *hammer_softprune_addentry(struct softprune **basep, struct hammer_ioc_prune *template, - const char *dirpath, + const char *dirpath, const char *denname, + struct stat *st, const char *linkbuf, const char *tidptr); static void hammer_softprune_finalize(struct softprune *scan); @@ -62,6 +64,7 @@ void hammer_cmd_softprune(char **av, int ac, int everything_opt) { struct hammer_ioc_prune template; + struct hammer_ioc_pseudofs_rw pfs; struct softprune *base, *scan; int fd; int rcode; @@ -71,6 +74,12 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt) if (TimeoutOpt > 0) alarm(TimeoutOpt); + bzero(&pfs, sizeof(pfs)); + pfs.bytes = sizeof(*pfs.ondisk); + pfs.ondisk = malloc(pfs.bytes); + bzero(pfs.ondisk, pfs.bytes); + pfs.pfs_id = -1; + /* * NOTE: To restrict to a single file XXX we have to set * the localization the same (not yet implemented). Typically @@ -97,13 +106,13 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt) */ if (everything_opt) { const char *dummylink = ""; - scan = hammer_softprune_addentry(&base, &template, *av, + scan = hammer_softprune_addentry(&base, &template, + *av, NULL, NULL, dummylink, dummylink); if (scan == NULL) softprune_usage(1); scan->prune.nelms = 0; scan->prune.head.flags |= HAMMER_IOC_PRUNE_ALL; - } else { hammer_softprune_scandir(&base, &template, *av); ++av; @@ -127,6 +136,28 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt) * Issue the prunes */ for (scan = base; scan; scan = scan->next) { + /* + * Open the filesystem for ioctl calls and extract the + * PFS. + */ + fd = open(scan->filesystem, O_RDONLY); + if (fd < 0) { + warn("Unable to open %s", scan->filesystem); + rcode = 1; + continue; + } + + if (ioctl(fd, HAMMERIOC_GET_PSEUDOFS, &pfs) < 0) { + warn("Filesystem %s is not HAMMER", scan->filesystem); + rcode = 1; + close(fd); + continue; + } + scan->prune_min = pfs.ondisk->prune_min; + + /* + * Finalize operations + */ hammer_softprune_finalize(scan); if (everything_opt) { printf("Prune %s: EVERYTHING\n", @@ -139,17 +170,21 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt) (scan->prune.head.flags & HAMMER_IOC_PRUNE_ALL) == 0) { continue; } - fd = open(scan->filesystem, O_RDONLY); - if (fd < 0) { - warn("Unable to open %s", scan->filesystem); - rcode = 1; - continue; - } - printf("objspace %016llx:%04x %016llx:%04x\n", + + printf("Prune %s: objspace %016llx:%04x %016llx:%04x " + "pfs_id %d\n", + scan->filesystem, scan->prune.key_beg.obj_id, scan->prune.key_beg.localization, scan->prune.key_end.obj_id, - scan->prune.key_end.localization); + scan->prune.key_end.localization, + pfs.pfs_id); + printf("Prune %s: prune_min is %dd/%02d:%02d:%02d\n", + scan->filesystem, + pfs.ondisk->prune_min / (24 * 60 * 60), + pfs.ondisk->prune_min / 60 / 60 % 24, + pfs.ondisk->prune_min / 60 % 60, + pfs.ondisk->prune_min % 60); RunningIoctl = 1; if (ioctl(fd, HAMMERIOC_PRUNE, &scan->prune) < 0) { @@ -224,7 +259,8 @@ hammer_softprune_scandir(struct softprune **basep, if ((ptr = strrchr(linkbuf, '@')) && ptr > linkbuf && ptr[-1] == '@') { hammer_softprune_addentry(basep, template, - dirname, linkbuf, ptr - 1); + dirname, den->d_name, &st, + linkbuf, ptr - 1); } } free(linkbuf); @@ -234,13 +270,14 @@ hammer_softprune_scandir(struct softprune **basep, /* * Add the softlink to the appropriate softprune structure, creating a new - * if necessary. + * one if necessary. */ static struct softprune * hammer_softprune_addentry(struct softprune **basep, struct hammer_ioc_prune *template, - const char *dirpath, + const char *dirpath, const char *denname __unused, + struct stat *st, const char *linkbuf, const char *tidptr) { struct hammer_ioc_prune_elm *elm; @@ -248,6 +285,9 @@ hammer_softprune_addentry(struct softprune **basep, struct statfs fs; char *fspath; + /* + * Calculate filesystem path. + */ if (linkbuf[0] == '/') { asprintf(&fspath, "%*.*s", (tidptr - linkbuf), (tidptr - linkbuf), linkbuf); @@ -300,10 +340,15 @@ hammer_softprune_addentry(struct softprune **basep, scan->prune.elms = realloc(scan->prune.elms, sizeof(*elm) * scan->maxelms); } + + /* + * NOTE: Temporarily store the snapshot timestamp in mod_tid. + * This will be cleaned up in the finalization phase. + */ elm = &scan->prune.elms[scan->prune.nelms]; elm->beg_tid = strtoull(tidptr + 2, NULL, 0); elm->end_tid = 0; - elm->mod_tid = 0; + elm->mod_tid = (st) ? st->st_ctime : 0; ++scan->prune.nelms; return(scan); } @@ -332,6 +377,8 @@ static void hammer_softprune_finalize(struct softprune *scan) { struct hammer_ioc_prune_elm *elm; + time_t t; + long delta; int i; /* @@ -371,18 +418,66 @@ hammer_softprune_finalize(struct softprune *scan) */ elm->end_tid = elm[-1].beg_tid; } - elm->mod_tid = elm->end_tid - elm->beg_tid; + } + + /* + * If a minimum retention time (in seconds) is configured for the + * PFS, remove any snapshots from the pruning list that are within + * the period. + */ + if (scan->prune_min) { + t = time(NULL); + for (i = scan->prune.nelms - 1; i >= 0; --i) { + elm = &scan->prune.elms[i]; + if (elm->mod_tid == 0) + continue; + delta = (long)(t - (time_t)elm->mod_tid); + if (delta < scan->prune_min) + break; + } + ++i; + if (i) { + printf("Prune %s: prune_min: Will not clean between " + "the teeth of the first %d snapshots\n", + scan->filesystem, i); + bcopy(&scan->prune.elms[i], &scan->prune.elms[0], + (scan->prune.nelms - i) * sizeof(scan->prune.elms[0])); + scan->prune.elms[0].end_tid = HAMMER_MAX_TID; + scan->prune.nelms -= i; + } + } + + /* + * Remove the first entry. This entry represents the prune from + * the most recent snapshot to current. We wish to retain the + * fine-grained history for this region. + */ + if (scan->prune.nelms) { + bcopy(&scan->prune.elms[1], &scan->prune.elms[0], + (scan->prune.nelms - 1) * sizeof(scan->prune.elms[0])); + --scan->prune.nelms; } /* * Add a final element to prune everything from transaction id * 0 to the lowest transaction id (aka last so far). */ - assert(scan->prune.nelms < scan->maxelms); - elm = &scan->prune.elms[scan->prune.nelms++]; - elm->beg_tid = 1; - elm->end_tid = elm[-1].beg_tid; - elm->mod_tid = elm->end_tid - elm->beg_tid; + if (scan->prune.nelms) { + assert(scan->prune.nelms < scan->maxelms); + elm = &scan->prune.elms[scan->prune.nelms]; + elm->beg_tid = 1; + elm->end_tid = elm[-1].beg_tid; + ++scan->prune.nelms; + } + + /* + * Adjust mod_tid to what the ioctl() expects. + */ + for (i = 0; i < scan->prune.nelms; ++i) { + elm = &scan->prune.elms[i]; + elm->mod_tid = elm->end_tid - elm->beg_tid; + printf("TID %016llx - %016llx\n", elm->beg_tid, elm->end_tid); + } } static diff --git a/sbin/hammer/hammer.8 b/sbin/hammer/hammer.8 index b41ed46750..2680651354 100644 --- a/sbin/hammer/hammer.8 +++ b/sbin/hammer/hammer.8 @@ -198,13 +198,17 @@ This command needs the .Fl f flag. .\" ==== show ==== -.It Ar show +.It Ar show Op Ar lo:objid Dump the B-tree. By default this command will validate all B-Tree linkages and CRCs, including data CRCs, and will report the most verbose information it can dig up. Any errors will show up with a 'B' in column 1 along with various other error flags. .Pp +If you specify a localization and object id field the dump will +search for the key printing nodes as it recurses down, and then +will iterate forwards. +.Pp If you use .Fl q the command will report less information about the inode contents. @@ -684,6 +688,14 @@ is an appropriate label. You can control snapshot retention on your slave independent of the master. .It snapshots-clear Zero out the snapshots directory path for this PFS. +.It prune-min=Nd +.It prune-min=Nd/hh[:mm[:ss]] +.It prune-min=hh[:mm[:ss]] +Set the minimum fine-grained data retention period. +.Nm HAMMER +always retains fine-grained history up to the first snapshot. +You can extend the retention period beyond the first snapshot by +specifying it with this option. .El .\" ==== pfs-upgrade ==== .It Ar pfs-upgrade Ar dirpath @@ -771,14 +783,24 @@ you want to create a compatible PFS slave for the target or not. .It Ar mirror-stream Ar [[user@]host:]filesystem Ar [[user@]host:]filesystem This command works similarly to .Ar mirror-copy -but does not exit unless the pipe is broken. -This command will resume the mirroring operation whenever the master is synced. +but does not exit after the initial mirroring completes. +The mirroring operation will resume as changes continue to be made to the +master. The command is commonly used with .Fl i Ar delay and .Fl b Ar bandwidth options to keep the mirroring target in sync with the source on a continuing basis. +.Pp +If the pipe is broken the command will automatically retry after sleeping +for a short while. +.Pp +This command also detects the initial-mirroring case and spends some +time scanning the B-Tree to find good break points, allowing the initial +bulk mirroring operation to be broken down into about 20 separate pieces. +This means that the user can kill and restart the operation and it will +not have to start from scratch once it has gotten past the first chunk. .\" ==== version ==== .It Ar version Ar filesystem This command returns the diff --git a/sbin/hammer/hammer.c b/sbin/hammer/hammer.c index ea18a3c60b..6d7ab5abc7 100644 --- a/sbin/hammer/hammer.c +++ b/sbin/hammer/hammer.c @@ -52,6 +52,7 @@ int TwoWayPipeOpt; int TimeoutOpt; int DelayOpt = 5; int ForceYesOpt = 0; +int ForceOpt; int RunningIoctl; int DidInterrupt; u_int64_t BandwidthOpt; @@ -67,7 +68,7 @@ main(int ac, char **av) int ch; int cacheSize = 0; - while ((ch = getopt(ac, av, "b:c:dhf:i:qrs:t:v2yC:")) != -1) { + while ((ch = getopt(ac, av, "b:c:dhf:i:qrs:t:v2yC:F")) != -1) { switch(ch) { case '2': TwoWayPipeOpt = 1; @@ -166,6 +167,9 @@ main(int ac, char **av) } hammer_cache_set(cacheSize); break; + case 'F': + ForceOpt = 1; + break; default: usage(1); /* not reached */ @@ -369,12 +373,13 @@ main(int ac, char **av) } if (strcmp(av[0], "show") == 0) { - hammer_off_t node_offset = (hammer_off_t)-1; + u_int32_t lo = 0; + int64_t obj_id = (int64_t)HAMMER_MIN_OBJID; hammer_parsedevs(blkdevs); if (ac > 1) - sscanf(av[1], "%llx", &node_offset); - hammer_cmd_show(node_offset, 0, NULL, NULL); + sscanf(av[1], "%08x:%llx", &lo, &obj_id); + hammer_cmd_show(-1, lo, obj_id, 0, NULL, NULL); exit(0); } if (strcmp(av[0], "blockmap") == 0) { diff --git a/sbin/hammer/hammer.h b/sbin/hammer/hammer.h index d055888534..1084e8af10 100644 --- a/sbin/hammer/hammer.h +++ b/sbin/hammer/hammer.h @@ -69,11 +69,13 @@ extern int DelayOpt; extern int ForceYesOpt; extern int RunningIoctl; extern int DidInterrupt; +extern int ForceOpt; extern u_int64_t BandwidthOpt; extern const char *LinkPath; extern const char *CyclePath; -void hammer_cmd_show(hammer_tid_t node_offset, int depth, +void hammer_cmd_show(hammer_tid_t node_offset, u_int32_t lo, + int64_t obj_id, int depth, hammer_base_elm_t left_bound, hammer_base_elm_t right_bound); void hammer_cmd_prune(char **av, int ac); void hammer_cmd_softprune(char **av, int ac, int everything_opt); -- 2.41.0