HAMMER Util - Add new features, fix history retention bug in prune
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 17 Aug 2009 18:05:09 +0000 (11:05 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 17 Aug 2009 18:05:09 +0000 (11:05 -0700)
* The prune code was not retaining the fine-grained history between
  the last snapshot and current.

* Add a new PFS config variable called 'prune-min' which may be used
  to set the minimum fine-grained history retention, in seconds.  The
  hammer prune code will not prune inbetween any snapshots that are
  within the fine-grained retention period.

* The mirror-stream directive now automatically loops and reconnects
  if the network connection fails.

* The mirror-stream directive now attempts to break-up a large initial
  bulk transfer into smaller transfers, inserting synchronization points
  so a failure in the middle of the large bulk transfer does not require
  starting from scratch again.

* The show directive (show btree) now takes an optional localization[:objid]
  argument (specified in hex).  If specified the directive will search
  the B-Tree for the key, printing nodes as it goes, and then continue
  with a normal iteration.

sbin/hammer/cmd_mirror.c
sbin/hammer/cmd_pseudofs.c
sbin/hammer/cmd_show.c
sbin/hammer/cmd_softprune.c
sbin/hammer/hammer.8
sbin/hammer/hammer.c
sbin/hammer/hammer.h

index 36192b7..9c6b712 100644 (file)
 
 static int read_mrecords(int fd, char *buf, u_int size,
                         hammer_ioc_mrecord_head_t pickup);
+static int generate_histogram(int fd, const char *filesystem,
+                        hammer_tid_t **histogram_ary,
+                        struct hammer_ioc_mirror_rw *mirror_base);
 static hammer_ioc_mrecord_any_t read_mrecord(int fdin, int *errorp,
                         hammer_ioc_mrecord_head_t pickup);
 static void write_mrecord(int fdout, u_int32_t type,
                         hammer_ioc_mrecord_any_t mrec, int bytes);
-static void generate_mrec_header(int fd, int fdout, int pfs_id,
-                        hammer_tid_t *tid_begp, hammer_tid_t *tid_endp);
+static void generate_mrec_header(int fd, int pfs_id,
+                        union hammer_ioc_mrecord_any *mrec_tmp);
 static int validate_mrec_header(int fd, int fdin, int is_target, int pfs_id,
                         struct hammer_ioc_mrecord_head *pickup,
                         hammer_tid_t *tid_begp, hammer_tid_t *tid_endp);
@@ -55,6 +58,8 @@ static ssize_t writebw(int fd, const void *buf, size_t nbytes,
 static int getyn(void);
 static void mirror_usage(int code);
 
+#define BULK_MINIMUM   20000
+
 /*
  * Generate a mirroring data stream from the specific source over the
  * entire key range, but restricted to the specified transaction range.
@@ -72,6 +77,7 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming)
        struct hammer_ioc_mrecord_head pickup;
        hammer_ioc_mrecord_any_t mrec;
        hammer_tid_t sync_tid;
+       hammer_tid_t *histogram_ary;
        const char *filesystem;
        char *buf = malloc(SERIALBUF_SIZE);
        int interrupted = 0;
@@ -79,6 +85,7 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming)
        int fd;
        int n;
        int didwork;
+       int histogram;
        int64_t total_bytes;
        time_t base_t = time(NULL);
        struct timeval bwtv;
@@ -90,6 +97,8 @@ hammer_cmd_mirror_read(char **av, int ac, int streaming)
 
        pickup.signature = 0;
        pickup.type = 0;
+       histogram = -1;
+       histogram_ary = NULL;
 
 again:
        bzero(&mirror, sizeof(mirror));
@@ -107,9 +116,12 @@ again:
        bwcount = 0;
 
        /*
-        * Send initial header for the purpose of determining shared-uuid.
+        * Send initial header for the purpose of determining the
+        * shared-uuid.
         */
-       generate_mrec_header(fd, 1, pfs.pfs_id, NULL, NULL);
+       generate_mrec_header(fd, pfs.pfs_id, &mrec_tmp);
+       write_mrecord(1, HAMMER_MREC_TYPE_PFSD,
+                     &mrec_tmp, sizeof(mrec_tmp.pfs));
 
        /*
         * In 2-way mode the target will send us a PFS info packet
@@ -132,15 +144,60 @@ again:
         * has a larger begin sync.  tid_end is set to the latest source
         * TID whos flush cycle has completed.
         */
-       generate_mrec_header(fd, 1, pfs.pfs_id,
-                            &mirror.tid_beg, &mirror.tid_end);
+       generate_mrec_header(fd, pfs.pfs_id, &mrec_tmp);
+       if (mirror.tid_beg < mrec_tmp.pfs.pfsd.sync_beg_tid)
+               mirror.tid_beg = mrec_tmp.pfs.pfsd.sync_beg_tid;
+       mirror.tid_end = mrec_tmp.pfs.pfsd.sync_end_tid;
+       mirror.ubuf = buf;
+       mirror.size = SERIALBUF_SIZE;
+       mirror.pfs_id = pfs.pfs_id;
+       mirror.shared_uuid = pfs.ondisk->shared_uuid;
 
-       /* XXX streaming mode support w/ cycle or command line arg */
        /*
-        * A cycle file overrides the beginning TID
+        * XXX If the histogram is exhausted and the TID delta is large
+        *     the stream might have been offline for a while and is
+        *     now picking it up again.  Do another histogram.
         */
-       hammer_get_cycle(&mirror.key_beg, &mirror.tid_beg);
+#if 0
+       if (TwoWayPipeOpt && streaming && histogram == 0) {
+               if (mirror.tid_end - mirror.tid_beg > BULK_MINIMUM)
+                       histogram = -1;
+       }
+#endif
 
+       /*
+        * Initial bulk startup control, try to do some incremental
+        * mirroring in order to allow the stream to be killed and
+        * restarted without having to start over.
+        */
+       if (histogram < 0) {
+               if (VerboseOpt)
+                       fprintf(stderr, "\n");
+               histogram = generate_histogram(fd, filesystem,
+                                              &histogram_ary, &mirror);
+       }
+
+       if (TwoWayPipeOpt && streaming && histogram > 0) {
+               mirror.tid_end = histogram_ary[--histogram];
+               mrec_tmp.pfs.pfsd.sync_end_tid = mirror.tid_end;
+       }
+
+       write_mrecord(1, HAMMER_MREC_TYPE_PFSD,
+                     &mrec_tmp, sizeof(mrec_tmp.pfs));
+
+       /*
+        * A cycle file overrides the beginning TID only if we are
+        * not operating in two-way mode.
+        */
+       if (TwoWayPipeOpt == 0) {
+               hammer_get_cycle(&mirror.key_beg, &mirror.tid_beg);
+       }
+
+       /*
+        * An additional argument overrides the beginning TID regardless
+        * of what mode we are in.  This is not recommending if operating
+        * in two-way mode.
+        */
        if (ac == 2)
                mirror.tid_beg = strtoull(av[1], NULL, 0);
 
@@ -202,12 +259,19 @@ again:
                }
                total_bytes += mirror.count;
                if (streaming && VerboseOpt) {
-                       fprintf(stderr, "\r%016llx %11lld",
-                               mirror.key_cur.obj_id,
+                       fprintf(stderr,
+                               "\robj=%016llx tids=%016llx:%016llx %11lld",
+                               (long long)mirror.key_cur.obj_id,
+                               (long long)mirror.tid_beg,
+                               (long long)mirror.tid_end,
                                total_bytes);
                        fflush(stderr);
                }
                mirror.key_beg = mirror.key_cur;
+
+               /*
+                * Deal with time limit option
+                */
                if (TimeoutOpt &&
                    (unsigned)(time(NULL) - base_t) > (unsigned)TimeoutOpt) {
                        fprintf(stderr,
@@ -273,6 +337,19 @@ done:
                time_t t1 = time(NULL);
                time_t t2;
 
+               /*
+                * Two way streaming tries to break down large bulk
+                * transfers into smaller ones so it can sync the
+                * transaction id on the slave.  This way if we get
+                * interrupted a restart doesn't have to start from
+                * scratch.
+                */
+               if (TwoWayPipeOpt && streaming && histogram > 0) {
+                       if (VerboseOpt)
+                               fprintf(stderr, " (bulk incremental)");
+                       goto again;
+               }
+
                if (VerboseOpt) {
                        fprintf(stderr, " W");
                        fflush(stderr);
@@ -304,6 +381,91 @@ done:
        fprintf(stderr, "Mirror-read %s succeeded\n", filesystem);
 }
 
+/*
+ * Ok, this isn't really a histogram.  What we are trying to do
+ * here is find the first tid_end for the scan that returns
+ * at least some data.  The frontend of the TID space will generally
+ * return nothing so we can't just divide out the full mirroring
+ * range.  Once we find the point where a real data stream starts
+ * to get generated we can divide out the range from that point.
+ *
+ * When starting a new mirroring operation completely from scratch
+ * this code will take some time to run, but once some mirroring
+ * data is synchronized on the target you will be able to interrupt
+ * the stream and restart it and the later invocations of this
+ * code will be such that it should run much faster.
+ */
+static int
+generate_histogram(int fd, const char *filesystem,
+                  hammer_tid_t **histogram_ary,
+                  struct hammer_ioc_mirror_rw *mirror_base)
+{
+       struct hammer_ioc_mirror_rw mirror;
+       hammer_tid_t tid_beg;
+       hammer_tid_t tid_end;
+       hammer_tid_t tid_half;
+       int i;
+
+       mirror = *mirror_base;
+       tid_beg = mirror.tid_beg;
+       tid_end = mirror.tid_end;
+
+       if (*histogram_ary)
+               free(*histogram_ary);
+       if (tid_beg + BULK_MINIMUM >= tid_end)
+               return(0);
+
+       if (VerboseOpt)
+               fprintf(stderr, "Doing Range Test\n");
+       while (tid_end - tid_beg > BULK_MINIMUM) {
+               tid_half = tid_beg + (tid_end - tid_beg) * 2 / 3;
+               mirror.count = 0;
+               mirror.tid_beg = tid_beg;
+               mirror.tid_end = tid_half;
+
+               if (VerboseOpt > 1) {
+                       fprintf(stderr, "RangeTest %016llx/%016llx - %016llx (%lld) ",
+                               (long long)tid_beg,
+                               (long long)tid_end,
+                               (long long)tid_half,
+                               (long long)(tid_half - tid_beg));
+               }
+               fflush(stderr);
+               if (ioctl(fd, HAMMERIOC_MIRROR_READ, &mirror) < 0) {
+                       fprintf(stderr, "Mirror-read %s failed: %s\n",
+                               filesystem, strerror(errno));
+                       exit(1);
+               }
+               if (mirror.head.flags & HAMMER_IOC_HEAD_ERROR) {
+                       fprintf(stderr,
+                               "Mirror-read %s fatal error %d\n",
+                               filesystem, mirror.head.error);
+                       exit(1);
+               }
+               if (VerboseOpt > 1)
+                       fprintf(stderr, "%d\n", mirror.count);
+               if (mirror.count > SERIALBUF_SIZE / 2) {
+                       tid_end = tid_half;
+               } else {
+                       tid_beg = tid_half;
+               }
+       }
+
+       tid_end = mirror_base->tid_end;
+       fprintf(stderr, "histogram range %016llx - %016llx\n",
+               (long long)tid_beg, (long long)tid_end);
+
+       /*
+        * The final array generates our incremental ending tids in
+        * reverse order.  The caller also picks them off in reverse order.
+        */
+       *histogram_ary = malloc(sizeof(hammer_tid_t) * 20);
+       for (i = 0; i < 20; ++i) {
+               (*histogram_ary)[i] = tid_end - (tid_end - tid_beg) / 20 * i;
+       }
+       return(20);
+}
+
 static void
 create_pfs(const char *filesystem, uuid_t *s_uuid)
 {
@@ -439,8 +601,12 @@ again:
         */
        mirror.tid_beg = 0;
        if (TwoWayPipeOpt) {
-               generate_mrec_header(fd, 1, pfs.pfs_id,
-                                    &mirror.tid_beg, &mirror.tid_end);
+               generate_mrec_header(fd, pfs.pfs_id, &mrec_tmp);
+               if (mirror.tid_beg < mrec_tmp.pfs.pfsd.sync_beg_tid)
+                       mirror.tid_beg = mrec_tmp.pfs.pfsd.sync_beg_tid;
+               mirror.tid_end = mrec_tmp.pfs.pfsd.sync_end_tid;
+               write_mrecord(1, HAMMER_MREC_TYPE_PFSD,
+                             &mrec_tmp, sizeof(mrec_tmp.pfs));
        }
 
        /*
@@ -658,13 +824,14 @@ hammer_cmd_mirror_copy(char **av, int ac, int streaming)
        if (ac != 2)
                mirror_usage(1);
 
+       TwoWayPipeOpt = 1;
+
+again:
        if (pipe(fds) < 0) {
                perror("pipe");
                exit(1);
        }
 
-       TwoWayPipeOpt = 1;
-
        /*
         * Source
         */
@@ -767,6 +934,19 @@ hammer_cmd_mirror_copy(char **av, int ac, int streaming)
                ;
        while (waitpid(pid2, NULL, 0) <= 0)
                ;
+
+       /*
+        * If the link is lost restart
+        */
+       if (streaming) {
+               if (VerboseOpt) {
+                       fprintf(stderr, "\nLost Link\n");
+                       fflush(stderr);
+               }
+               sleep(DelayOpt);
+               goto again;
+       }
+
 }
 
 /*
@@ -1005,17 +1185,16 @@ write_mrecord(int fdout, u_int32_t type, hammer_ioc_mrecord_any_t mrec,
  * originating filesytem.
  */
 static void
-generate_mrec_header(int fd, int fdout, int pfs_id,
-                    hammer_tid_t *tid_begp, hammer_tid_t *tid_endp)
+generate_mrec_header(int fd, int pfs_id,
+                    union hammer_ioc_mrecord_any *mrec_tmp)
 {
        struct hammer_ioc_pseudofs_rw pfs;
-       union hammer_ioc_mrecord_any mrec_tmp;
 
        bzero(&pfs, sizeof(pfs));
-       bzero(&mrec_tmp, sizeof(mrec_tmp));
+       bzero(mrec_tmp, sizeof(*mrec_tmp));
        pfs.pfs_id = pfs_id;
-       pfs.ondisk = &mrec_tmp.pfs.pfsd;
-       pfs.bytes = sizeof(mrec_tmp.pfs.pfsd);
+       pfs.ondisk = &mrec_tmp->pfs.pfsd;
+       pfs.bytes = sizeof(mrec_tmp->pfs.pfsd);
        if (ioctl(fd, HAMMERIOC_GET_PSEUDOFS, &pfs) != 0) {
                fprintf(stderr, "Mirror-read: not a HAMMER fs/pseudofs!\n");
                exit(1);
@@ -1024,20 +1203,7 @@ generate_mrec_header(int fd, int fdout, int pfs_id,
                fprintf(stderr, "Mirror-read: HAMMER pfs version mismatch!\n");
                exit(1);
        }
-
-       /*
-        * sync_beg_tid - lowest TID on source after which a full history
-        *                is available.
-        *
-        * sync_end_tid - highest fully synchronized TID from source.
-        */
-       if (tid_begp && *tid_begp < mrec_tmp.pfs.pfsd.sync_beg_tid)
-               *tid_begp = mrec_tmp.pfs.pfsd.sync_beg_tid;
-       if (tid_endp)
-               *tid_endp = mrec_tmp.pfs.pfsd.sync_end_tid;
-       mrec_tmp.pfs.version = pfs.version;
-       write_mrecord(fdout, HAMMER_MREC_TYPE_PFSD,
-                     &mrec_tmp, sizeof(mrec_tmp.pfs));
+       mrec_tmp->pfs.version = pfs.version;
 }
 
 /*
index 67933ec..ea91500 100644 (file)
@@ -41,6 +41,7 @@ static void init_pfsd(hammer_pseudofs_data_t pfsd, int is_slave);
 static void dump_pfsd(hammer_pseudofs_data_t pfsd);
 static void pseudofs_usage(int code);
 static int getyn(void);
+static int timetosecs(char *str);
 
 /*
  * Calculate the pfs_id given a path to a directory or a @@PFS or @@%llx:%d
@@ -484,6 +485,21 @@ dump_pfsd(hammer_pseudofs_data_t pfsd)
        printf("    label=\"%s\"\n", pfsd->label);
        if (pfsd->snapshots[0])
                printf("    snapshots=\"%s\"\n", pfsd->snapshots);
+       if (pfsd->prune_min < (60 * 60 * 24)) {
+               printf("    prune-min=%02d:%02d:%02d\n",
+                       pfsd->prune_min / 60 / 60 % 24,
+                       pfsd->prune_min / 60 % 60,
+                       pfsd->prune_min % 60);
+       } else if (pfsd->prune_min % (60 * 60 * 24)) {
+               printf("    prune-min=%dd/%02d:%02d:%02d\n",
+                       pfsd->prune_min / 60 / 60 / 24,
+                       pfsd->prune_min / 60 / 60 % 24,
+                       pfsd->prune_min / 60 % 60,
+                       pfsd->prune_min % 60);
+       } else {
+               printf("    prune-min=%dd\n", pfsd->prune_min / 60 / 60 / 24);
+       }
+
        if (pfsd->mirror_flags & HAMMER_PFSD_SLAVE) {
                printf("    operating as a SLAVE\n");
                if (pfsd->snapshots[0] == 0)
@@ -564,6 +580,14 @@ parse_pfsd_options(char **av, int ac, hammer_pseudofs_data_t pfsd)
                                 "%s", ptr);
                } else if (strcmp(cmd, "snapshots-clear") == 0) {
                        pfsd->snapshots[0] = 0;
+               } else if (strcmp(cmd, "prune-min") == 0) {
+                       pfsd->prune_min = timetosecs(ptr);
+                       if (pfsd->prune_min < 0) {
+                               fprintf(stderr,
+                                       "option %s: illegal time spec, "
+                                       "use Nd or [Nd/]hh[:mm[:ss]]\n", ptr);
+                               exit(1);
+                       }
                } else {
                        fprintf(stderr, "invalid option: %s\n", cmd);
                        exit(1);
@@ -599,6 +623,7 @@ pseudofs_usage(int code)
                "    label=\"string\"\n"
                "    snapshots=\"/path\"\n"
                "    snapshots-clear\n"
+               "    prune-min=[Nd/][hh[:mm[:ss]]]\n"
        );
        exit(code);
 }
@@ -625,3 +650,54 @@ getyn(void)
        return(0);
 }
 
+/*
+ * Convert time in the form [Nd/][hh[:mm[:ss]]] to seconds.
+ *
+ * Return -1 if a parse error occurs.
+ * Return 0x7FFFFFFF if the time exceeds the maximum allowed.
+ */
+static
+int
+timetosecs(char *str)
+{
+       int days = 0;
+       int hrs = 0;
+       int mins = 0;
+       int secs = 0;
+       int n;
+       long long v;
+       char *ptr;
+
+       n = strtol(str, &ptr, 10);
+       if (n < 0)
+               return(-1);
+       if (*ptr == 'd') {
+               days = n;
+               ++ptr;
+               if (*ptr == '/')
+                   n = strtol(ptr + 1, &ptr, 10);
+               else
+                   n = 0;
+       }
+       if (n < 0)
+               return(-1);
+       hrs = n;
+       if (*ptr == ':') {
+               n = strtol(ptr + 1, &ptr, 10);
+               if (n < 0)
+                       return(-1);
+               mins = n;
+               if (*ptr == ':') {
+                       n = strtol(ptr + 1, &ptr, 10);
+                       if (n < 0)
+                               return(-1);
+                       secs = n;
+               }
+       }
+       if (*ptr)
+               return(-1);
+       v = days * 24 * 60 * 60 + hrs *  60 * 60 + mins * 60 + secs;
+       if (v > 0x7FFFFFFF)
+               v = 0x7FFFFFFF;
+       return((int)v);
+}
index 8c1c082..1e3953e 100644 (file)
 #define FLAG_BADTYPE           0x0004
 #define FLAG_BADCHILDPARENT    0x0008
 
-static void print_btree_node(hammer_off_t node_offset, int depth, int spike,
+typedef struct btree_search {
+       u_int32_t       lo;
+       int64_t         obj_id;
+} *btree_search_t;
+
+static void print_btree_node(hammer_off_t node_offset, btree_search_t search,
+                       int depth, int spike,
                        hammer_base_elm_t left_bound,
                        hammer_base_elm_t right_bound);
 static const char *check_data_crc(hammer_btree_elm_t elm);
 static void print_record(hammer_btree_elm_t elm);
 static void print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
-                       int flags, const char *label);
+                       int flags, const char *label, const char *ext);
 static int print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset,
                        hammer_btree_elm_t elm, u_int8_t btype,
                        hammer_base_elm_t left_bound,
@@ -55,10 +61,13 @@ static int print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset,
 static void print_bigblock_fill(hammer_off_t offset);
 
 void
-hammer_cmd_show(hammer_off_t node_offset, int depth,
+hammer_cmd_show(hammer_off_t node_offset, u_int32_t lo, int64_t obj_id,
+               int depth,
                hammer_base_elm_t left_bound, hammer_base_elm_t right_bound)
 {
        struct volume_info *volume;
+       struct btree_search search;
+       btree_search_t searchp;
        int zone;
 
        if (node_offset == (hammer_off_t)-1) {
@@ -79,14 +88,27 @@ hammer_cmd_show(hammer_off_t node_offset, int depth,
                }
                rel_volume(volume);
        }
-       printf("show %016llx depth %d\n", node_offset, depth);
-       print_btree_node(node_offset, depth, 0, left_bound, right_bound);
-       print_btree_node(node_offset, depth, 1, left_bound, right_bound);
+
+       if (lo == 0 && obj_id == (int64_t)HAMMER_MIN_OBJID) {
+               searchp = NULL;
+               printf("show %016llx depth %d\n", node_offset, depth);
+       } else {
+               search.lo = lo;
+               search.obj_id = obj_id;
+               searchp = &search;
+               printf("show %016llx lo %08x obj_id %016llx depth %d\n",
+                       node_offset, lo, (long long)obj_id, depth);
+       }
+       print_btree_node(node_offset, searchp, depth,
+                       0, left_bound, right_bound);
+       print_btree_node(node_offset, searchp, depth,
+                       1, left_bound, right_bound);
 }
 
 static void
-print_btree_node(hammer_off_t node_offset, int depth, int spike,
-                hammer_base_elm_t left_bound, hammer_base_elm_t right_bound)
+print_btree_node(hammer_off_t node_offset, btree_search_t search,
+               int depth, int spike,
+               hammer_base_elm_t left_bound, hammer_base_elm_t right_bound)
 {
        struct buffer_info *buffer = NULL;
        hammer_node_ondisk_t node;
@@ -95,6 +117,7 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike,
        int flags;
        int maxcount;
        char badc;
+       const char *ext;
 
        node = get_node(node_offset, &buffer);
 
@@ -121,17 +144,42 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike,
 
                for (i = 0; i < node->count && i < maxcount; ++i) {
                        elm = &node->elms[i];
+
+                       if (node->type != HAMMER_BTREE_TYPE_INTERNAL) {
+                               ext = NULL;
+                               if (search &&
+                                   elm->base.localization == search->lo &&
+                                    elm->base.obj_id == search->obj_id) {
+                                       ext = " *";
+                               }
+                       } else if (search) {
+                               ext = " *";
+                               if (elm->base.localization > search->lo ||
+                                   (elm->base.localization == search->lo &&
+                                    elm->base.obj_id > search->obj_id)) {
+                                       ext = NULL;
+                               }
+                               if (elm[1].base.localization < search->lo ||
+                                   (elm[1].base.localization == search->lo &&
+                                    elm[1].base.obj_id < search->obj_id)) {
+                                       ext = NULL;
+                               }
+                       } else {
+                               ext = NULL;
+                       }
+
                        flags = print_elm_flags(node, node_offset,
                                                elm, elm->base.btype,
                                                left_bound, right_bound);
-                       print_btree_elm(elm, i, node->type, flags, "ELM");
+                       print_btree_elm(elm, i, node->type, flags, "ELM", ext);
                }
                if (node->type == HAMMER_BTREE_TYPE_INTERNAL) {
                        elm = &node->elms[i];
+
                        flags = print_elm_flags(node, node_offset,
                                                elm, 'I',
                                                left_bound, right_bound);
-                       print_btree_elm(elm, i, node->type, flags, "RBN");
+                       print_btree_elm(elm, i, node->type, flags, "RBN", NULL);
                }
                printf("    }\n");
        }
@@ -141,10 +189,27 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike,
 
                switch(node->type) {
                case HAMMER_BTREE_TYPE_INTERNAL:
+                       if (search) {
+                               if (elm->base.localization > search->lo ||
+                                   (elm->base.localization == search->lo &&
+                                    elm->base.obj_id > search->obj_id)) {
+                                       break;
+                               }
+                               if (elm[1].base.localization < search->lo ||
+                                   (elm[1].base.localization == search->lo &&
+                                    elm[1].base.obj_id < search->obj_id)) {
+                                       break;
+                               }
+                       }
                        if (elm->internal.subtree_offset) {
                                print_btree_node(elm->internal.subtree_offset,
-                                                depth + 1, spike,
+                                                search, depth + 1, spike,
                                                 &elm[0].base, &elm[1].base);
+                               /*
+                                * Cause show to iterate after seeking to
+                                * the lo:objid
+                                */
+                               search = NULL;
                        }
                        break;
                default:
@@ -157,7 +222,7 @@ print_btree_node(hammer_off_t node_offset, int depth, int spike,
 static
 void
 print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
-               int flags, const char *label)
+               int flags, const char *label, const char *ext)
 {
        char flagstr[8] = { 0, '-', '-', '-', '-', '-', '-', 0 };
 
@@ -190,8 +255,12 @@ print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
                printf("suboff=%016llx", elm->internal.subtree_offset);
                if (QuietOpt < 3)
                        printf(" mirror %016llx", elm->internal.mirror_tid);
+               if (ext)
+                       printf(" %s", ext);
                break;
        case HAMMER_BTREE_TYPE_LEAF:
+               if (ext)
+                       printf(" %s", ext);
                switch(elm->base.btype) {
                case HAMMER_BTREE_TYPE_RECORD:
                        if (QuietOpt < 3)
index ef0707a..e5c6621 100644 (file)
@@ -42,6 +42,7 @@ struct softprune {
        char *filesystem;
        struct hammer_ioc_prune prune;
        int maxelms;
+       int prune_min;
 };
 
 static void softprune_usage(int code);
@@ -50,7 +51,8 @@ static void hammer_softprune_scandir(struct softprune **basep,
                         const char *dirname);
 static struct softprune *hammer_softprune_addentry(struct softprune **basep,
                         struct hammer_ioc_prune *template,
-                        const char *dirpath,
+                        const char *dirpath, const char *denname,
+                        struct stat *st,
                         const char *linkbuf, const char *tidptr);
 static void hammer_softprune_finalize(struct softprune *scan);
 
@@ -62,6 +64,7 @@ void
 hammer_cmd_softprune(char **av, int ac, int everything_opt)
 {
        struct hammer_ioc_prune template;
+       struct hammer_ioc_pseudofs_rw pfs;
        struct softprune *base, *scan;
        int fd;
        int rcode;
@@ -71,6 +74,12 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt)
        if (TimeoutOpt > 0)
                alarm(TimeoutOpt);
 
+       bzero(&pfs, sizeof(pfs));
+       pfs.bytes = sizeof(*pfs.ondisk);
+       pfs.ondisk = malloc(pfs.bytes);
+       bzero(pfs.ondisk, pfs.bytes);
+       pfs.pfs_id = -1;
+
        /*
         * NOTE: To restrict to a single file XXX we have to set
         * the localization the same (not yet implemented).  Typically
@@ -97,13 +106,13 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt)
         */
        if (everything_opt) {
                const char *dummylink = "";
-               scan = hammer_softprune_addentry(&base, &template, *av,
+               scan = hammer_softprune_addentry(&base, &template,
+                                                *av, NULL, NULL,
                                                 dummylink, dummylink);
                if (scan == NULL)
                        softprune_usage(1);
                scan->prune.nelms = 0;
                scan->prune.head.flags |= HAMMER_IOC_PRUNE_ALL;
-
        } else {
                hammer_softprune_scandir(&base, &template, *av);
                ++av;
@@ -127,6 +136,28 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt)
         * Issue the prunes
         */
        for (scan = base; scan; scan = scan->next) {
+               /*
+                * Open the filesystem for ioctl calls and extract the
+                * PFS.
+                */
+               fd = open(scan->filesystem, O_RDONLY);
+               if (fd < 0) {
+                       warn("Unable to open %s", scan->filesystem);
+                       rcode = 1;
+                       continue;
+               }
+
+               if (ioctl(fd, HAMMERIOC_GET_PSEUDOFS, &pfs) < 0) {
+                       warn("Filesystem %s is not HAMMER", scan->filesystem);
+                       rcode = 1;
+                       close(fd);
+                       continue;
+               }
+               scan->prune_min = pfs.ondisk->prune_min;
+
+               /*
+                * Finalize operations
+                */
                hammer_softprune_finalize(scan);
                if (everything_opt) {
                        printf("Prune %s: EVERYTHING\n",
@@ -139,17 +170,21 @@ hammer_cmd_softprune(char **av, int ac, int everything_opt)
                    (scan->prune.head.flags & HAMMER_IOC_PRUNE_ALL) == 0) {
                        continue;
                }
-               fd = open(scan->filesystem, O_RDONLY);
-               if (fd < 0) {
-                       warn("Unable to open %s", scan->filesystem);
-                       rcode = 1;
-                       continue;
-               }
-               printf("objspace %016llx:%04x %016llx:%04x\n",
+
+               printf("Prune %s: objspace %016llx:%04x %016llx:%04x "
+                      "pfs_id %d\n",
+                      scan->filesystem,
                       scan->prune.key_beg.obj_id,
                       scan->prune.key_beg.localization,
                       scan->prune.key_end.obj_id,
-                      scan->prune.key_end.localization);
+                      scan->prune.key_end.localization,
+                      pfs.pfs_id);
+               printf("Prune %s: prune_min is %dd/%02d:%02d:%02d\n",
+                      scan->filesystem,
+                       pfs.ondisk->prune_min / (24 * 60 * 60),
+                       pfs.ondisk->prune_min / 60 / 60 % 24,
+                       pfs.ondisk->prune_min / 60 % 60,
+                       pfs.ondisk->prune_min % 60);
 
                RunningIoctl = 1;
                if (ioctl(fd, HAMMERIOC_PRUNE, &scan->prune) < 0) {
@@ -224,7 +259,8 @@ hammer_softprune_scandir(struct softprune **basep,
                if ((ptr = strrchr(linkbuf, '@')) &&
                    ptr > linkbuf && ptr[-1] == '@') {
                        hammer_softprune_addentry(basep, template,
-                                                 dirname, linkbuf, ptr - 1);
+                                                 dirname, den->d_name, &st,
+                                                 linkbuf, ptr - 1);
                }
        }
        free(linkbuf);
@@ -234,13 +270,14 @@ hammer_softprune_scandir(struct softprune **basep,
 
 /*
  * Add the softlink to the appropriate softprune structure, creating a new
- * if necessary.
+ * one if necessary.
  */
 static
 struct softprune *
 hammer_softprune_addentry(struct softprune **basep,
                         struct hammer_ioc_prune *template,
-                        const char *dirpath,
+                        const char *dirpath, const char *denname __unused,
+                        struct stat *st,
                         const char *linkbuf, const char *tidptr)
 {
        struct hammer_ioc_prune_elm *elm;
@@ -248,6 +285,9 @@ hammer_softprune_addentry(struct softprune **basep,
        struct statfs fs;
        char *fspath;
 
+       /*
+        * Calculate filesystem path.
+        */
        if (linkbuf[0] == '/') {
                asprintf(&fspath, "%*.*s",
                         (tidptr - linkbuf), (tidptr - linkbuf), linkbuf);
@@ -300,10 +340,15 @@ hammer_softprune_addentry(struct softprune **basep,
                scan->prune.elms = realloc(scan->prune.elms,
                                           sizeof(*elm) * scan->maxelms);
        }
+
+       /*
+        * NOTE: Temporarily store the snapshot timestamp in mod_tid.
+        *       This will be cleaned up in the finalization phase.
+        */
        elm = &scan->prune.elms[scan->prune.nelms];
        elm->beg_tid = strtoull(tidptr + 2, NULL, 0);
        elm->end_tid = 0;
-       elm->mod_tid = 0;
+       elm->mod_tid = (st) ? st->st_ctime : 0;
        ++scan->prune.nelms;
        return(scan);
 }
@@ -332,6 +377,8 @@ static void
 hammer_softprune_finalize(struct softprune *scan)
 {
        struct hammer_ioc_prune_elm *elm;
+       time_t t;
+       long delta;
        int i;
 
        /*
@@ -371,18 +418,66 @@ hammer_softprune_finalize(struct softprune *scan)
                         */
                        elm->end_tid = elm[-1].beg_tid;
                }
-               elm->mod_tid = elm->end_tid - elm->beg_tid;
+       }
+
+       /*
+        * If a minimum retention time (in seconds) is configured for the
+        * PFS, remove any snapshots from the pruning list that are within
+        * the period.
+        */
+       if (scan->prune_min) {
+               t = time(NULL);
+               for (i = scan->prune.nelms - 1; i >= 0; --i) {
+                       elm = &scan->prune.elms[i];
+                       if (elm->mod_tid == 0)
+                               continue;
+                       delta = (long)(t - (time_t)elm->mod_tid);
+                       if (delta < scan->prune_min)
+                               break;
+               }
+               ++i;
+               if (i) {
+                       printf("Prune %s: prune_min: Will not clean between "
+                              "the teeth of the first %d snapshots\n",
+                              scan->filesystem, i);
+                       bcopy(&scan->prune.elms[i], &scan->prune.elms[0],
+                             (scan->prune.nelms - i) * sizeof(scan->prune.elms[0]));
+                       scan->prune.elms[0].end_tid = HAMMER_MAX_TID;
+                       scan->prune.nelms -= i;
+               }
+       }
+
+       /*
+        * Remove the first entry.  This entry represents the prune from
+        * the most recent snapshot to current.  We wish to retain the
+        * fine-grained history for this region.
+        */
+       if (scan->prune.nelms) {
+               bcopy(&scan->prune.elms[1], &scan->prune.elms[0],
+                     (scan->prune.nelms - 1) * sizeof(scan->prune.elms[0]));
+               --scan->prune.nelms;
        }
 
        /*
         * Add a final element to prune everything from transaction id
         * 0 to the lowest transaction id (aka last so far).
         */
-       assert(scan->prune.nelms < scan->maxelms);
-       elm = &scan->prune.elms[scan->prune.nelms++];
-       elm->beg_tid = 1;
-       elm->end_tid = elm[-1].beg_tid;
-       elm->mod_tid = elm->end_tid - elm->beg_tid;
+       if (scan->prune.nelms) {
+               assert(scan->prune.nelms < scan->maxelms);
+               elm = &scan->prune.elms[scan->prune.nelms];
+               elm->beg_tid = 1;
+               elm->end_tid = elm[-1].beg_tid;
+               ++scan->prune.nelms;
+       }
+
+       /*
+        * Adjust mod_tid to what the ioctl() expects.
+        */
+       for (i = 0; i < scan->prune.nelms; ++i) {
+               elm = &scan->prune.elms[i];
+               elm->mod_tid = elm->end_tid - elm->beg_tid;
+               printf("TID %016llx - %016llx\n", elm->beg_tid, elm->end_tid);
+       }
 }
 
 static
index b41ed46..2680651 100644 (file)
@@ -198,13 +198,17 @@ This command needs the
 .Fl f
 flag.
 .\" ==== show ====
-.It Ar show
+.It Ar show Op Ar lo:objid
 Dump the B-tree.  By default this command will validate all B-Tree
 linkages and CRCs, including data CRCs, and will report the most verbose
 information it can dig up.
 Any errors will show up with a 'B' in column 1 along with various
 other error flags.
 .Pp
+If you specify a localization and object id field the dump will
+search for the key printing nodes as it recurses down, and then
+will iterate forwards.
+.Pp
 If you use
 .Fl q
 the command will report less information about the inode contents.
@@ -684,6 +688,14 @@ is an appropriate label.
 You can control snapshot retention on your slave independent of the master.
 .It snapshots-clear
 Zero out the snapshots directory path for this PFS.
+.It prune-min=Nd
+.It prune-min=Nd/hh[:mm[:ss]]
+.It prune-min=hh[:mm[:ss]]
+Set the minimum fine-grained data retention period.
+.Nm HAMMER
+always retains fine-grained history up to the first snapshot.
+You can extend the retention period beyond the first snapshot by
+specifying it with this option.
 .El
 .\" ==== pfs-upgrade ====
 .It Ar pfs-upgrade Ar dirpath
@@ -771,14 +783,24 @@ you want to create a compatible PFS slave for the target or not.
 .It Ar mirror-stream Ar [[user@]host:]filesystem Ar [[user@]host:]filesystem
 This command works similarly to
 .Ar mirror-copy
-but does not exit unless the pipe is broken.
-This command will resume the mirroring operation whenever the master is synced.
+but does not exit after the initial mirroring completes.
+The mirroring operation will resume as changes continue to be made to the
+master.
 The command is commonly used with
 .Fl i Ar delay
 and
 .Fl b Ar bandwidth
 options to keep the mirroring target in sync with the source on a continuing
 basis.
+.Pp
+If the pipe is broken the command will automatically retry after sleeping
+for a short while.
+.Pp
+This command also detects the initial-mirroring case and spends some
+time scanning the B-Tree to find good break points, allowing the initial
+bulk mirroring operation to be broken down into about 20 separate pieces.
+This means that the user can kill and restart the operation and it will
+not have to start from scratch once it has gotten past the first chunk.
 .\" ==== version ====
 .It Ar version Ar filesystem
 This command returns the
index ea18a3c..6d7ab5a 100644 (file)
@@ -52,6 +52,7 @@ int TwoWayPipeOpt;
 int TimeoutOpt;
 int DelayOpt = 5;
 int ForceYesOpt = 0;
+int ForceOpt;
 int RunningIoctl;
 int DidInterrupt;
 u_int64_t BandwidthOpt;
@@ -67,7 +68,7 @@ main(int ac, char **av)
        int ch;
        int cacheSize = 0;
 
-       while ((ch = getopt(ac, av, "b:c:dhf:i:qrs:t:v2yC:")) != -1) {
+       while ((ch = getopt(ac, av, "b:c:dhf:i:qrs:t:v2yC:F")) != -1) {
                switch(ch) {
                case '2':
                        TwoWayPipeOpt = 1;
@@ -166,6 +167,9 @@ main(int ac, char **av)
                        }
                        hammer_cache_set(cacheSize);
                        break;
+               case 'F':
+                       ForceOpt = 1;
+                       break;
                default:
                        usage(1);
                        /* not reached */
@@ -369,12 +373,13 @@ main(int ac, char **av)
        }
 
        if (strcmp(av[0], "show") == 0) {
-               hammer_off_t node_offset = (hammer_off_t)-1;
+               u_int32_t lo = 0;
+               int64_t obj_id = (int64_t)HAMMER_MIN_OBJID;
 
                hammer_parsedevs(blkdevs);
                if (ac > 1)
-                       sscanf(av[1], "%llx", &node_offset);
-               hammer_cmd_show(node_offset, 0, NULL, NULL);
+                       sscanf(av[1], "%08x:%llx", &lo, &obj_id);
+               hammer_cmd_show(-1, lo, obj_id, 0, NULL, NULL);
                exit(0);
        }
        if (strcmp(av[0], "blockmap") == 0) {
index d055888..1084e8a 100644 (file)
@@ -69,11 +69,13 @@ extern int DelayOpt;
 extern int ForceYesOpt;
 extern int RunningIoctl;
 extern int DidInterrupt;
+extern int ForceOpt;
 extern u_int64_t BandwidthOpt;
 extern const char *LinkPath;
 extern const char *CyclePath;
 
-void hammer_cmd_show(hammer_tid_t node_offset, int depth,
+void hammer_cmd_show(hammer_tid_t node_offset, u_int32_t lo,
+               int64_t obj_id, int depth,
                hammer_base_elm_t left_bound, hammer_base_elm_t right_bound);
 void hammer_cmd_prune(char **av, int ac);
 void hammer_cmd_softprune(char **av, int ac, int everything_opt);