device mapper - implement dumping
authorAlex Hornung <ahornung@gmail.com>
Thu, 21 Oct 2010 06:47:46 +0000 (07:47 +0100)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 24 Oct 2010 16:34:56 +0000 (09:34 -0700)
* Implement dmdump and dump routines for the three main targets (linear,
  stripe and crypt).

* The top-level dmpdump will call all the required dump() methods in the
  targets just as it does with strategy() calls. The lower level
  target-specific dump routines will then redirect (after processing,
  etc) these requests to the underlying device's dump routines.

* This should provide quite reliable dumping even through device mapper,
  although it is more error-prone than the equivalent dumping on normal
  disks as there's a lot more going on behind the scenes.

sys/dev/disk/dm/device-mapper.c
sys/dev/disk/dm/dm.h
sys/dev/disk/dm/dm_pdev.c
sys/dev/disk/dm/dm_target.c
sys/dev/disk/dm/dm_target_crypt.c
sys/dev/disk/dm/dm_target_linear.c
sys/dev/disk/dm/dm_target_stripe.c

index df64705..38c84d7 100644 (file)
@@ -55,6 +55,7 @@ static        d_open_t        dmopen;
 static d_close_t       dmclose;
 static d_psize_t       dmsize;
 static d_strategy_t    dmstrategy;
+static d_dump_t        dmdump;
 
 /* attach and detach routines */
 void dmattach(int);
@@ -80,6 +81,7 @@ struct dev_ops dm_ops = {
        .d_ioctl        = dmioctl,
        .d_strategy     = dmstrategy,
        .d_psize        = dmsize,
+       .d_dump         = dmdump,
 /* D_DISK */
 };
 
@@ -499,6 +501,89 @@ dmstrategy(struct dev_strategy_args *ap)
 }
 
 static int
+dmdump(struct dev_dump_args *ap)
+{
+       cdev_t dev = ap->a_head.a_dev;
+       dm_dev_t *dmv;
+       dm_table_t  *tbl;
+       dm_table_entry_t *table_en;
+       uint32_t dev_type;
+       uint64_t buf_start, buf_len, issued_len;
+       uint64_t table_start, table_end;
+       uint64_t start, end, data_offset;
+       off_t offset;
+       size_t length;
+       int error = 0;
+
+       buf_start = ap->a_offset;
+       buf_len = ap->a_length;
+
+       tbl = NULL; 
+
+       table_end = 0;
+       dev_type = 0;
+       issued_len = 0;
+
+       if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) {
+               return EIO;
+       } 
+
+       /* Select active table */
+       tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);
+
+
+       /*
+        * Find out what tables I want to select.
+        */
+       SLIST_FOREACH(table_en, tbl, next) {
+               /*
+                * I need need number of bytes not blocks.
+                */
+               table_start = table_en->start * DEV_BSIZE;
+               table_end = table_start + (table_en->length) * DEV_BSIZE;
+
+               /*
+                * Calculate the start and end
+                */
+               start = MAX(table_start, buf_start);
+               end = MIN(table_end, buf_start + buf_len);
+
+               if (ap->a_length == 0) {
+                       if (table_en->target->dump == NULL) {
+                               error = ENXIO;
+                               goto out;
+                       }
+
+                       table_en->target->dump(table_en, NULL, 0, 0);
+               } else if (start < end) {
+                       data_offset = start - buf_start;
+                       offset = start - table_start;
+                       length = end - start;
+
+                       if (table_en->target->dump == NULL) {
+                               error = ENXIO;
+                               goto out;
+                       }
+
+                       table_en->target->dump(table_en,
+                           (char *)ap->a_virtual + data_offset,
+                           length, offset);
+
+                       issued_len += end - start;
+               }
+       }
+
+       if (issued_len < buf_len)
+               error = EINVAL;
+
+out:
+       dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE);
+       dm_dev_unbusy(dmv);
+
+       return error;
+}
+
+static int
 dmsize(struct dev_psize_args *ap)
 {
        cdev_t dev = ap->a_head.a_dev;
index 4c6abe7..9d8819f 100644 (file)
@@ -46,6 +46,7 @@
 #include <sys/queue.h>
 
 #include <sys/device.h>
+#include <sys/diskslice.h>
 #include <sys/disklabel.h>
 
 #include <libprop/proplib.h>
@@ -108,6 +109,7 @@ typedef struct dm_table_head {
 
 typedef struct dm_pdev {
        char name[MAX_DEV_NAME];
+       struct partinfo pdev_pinfo; /* partinfo of the underlying device */
 
        struct vnode *pdev_vnode;
        int ref_cnt; /* reference counter for users ofthis pdev */
@@ -232,7 +234,8 @@ typedef struct dm_target {
        char * (*status)(void *);
        int (*strategy)(dm_table_entry_t *, struct buf *);
        int (*upcall)(dm_table_entry_t *, struct buf *);
-       
+       int (*dump)(dm_table_entry_t *, void *data, size_t length, off_t offset);
+
        uint32_t version[3];
        int ref_cnt;
        
@@ -315,6 +318,7 @@ int dm_target_linear_strategy(dm_table_entry_t *, struct buf *);
 int dm_target_linear_deps(dm_table_entry_t *, prop_array_t);
 int dm_target_linear_destroy(dm_table_entry_t *);
 int dm_target_linear_upcall(dm_table_entry_t *, struct buf *);
+int dm_target_linear_dump(dm_table_entry_t *, void *, size_t, off_t);
 
 /* dm_target_crypt.c */
 int dm_target_crypt_init(dm_dev_t *, void**, char *);
@@ -323,6 +327,7 @@ int dm_target_crypt_strategy(dm_table_entry_t *, struct buf *);
 int dm_target_crypt_deps(dm_table_entry_t *, prop_array_t);
 int dm_target_crypt_destroy(dm_table_entry_t *);
 int dm_target_crypt_upcall(dm_table_entry_t *, struct buf *);
+int dm_target_crypt_dump(dm_table_entry_t *, void *, size_t, off_t);
 
 /* Generic function used to convert char to string */
 uint64_t atoi64(const char *);
@@ -342,6 +347,7 @@ int dm_target_stripe_strategy(dm_table_entry_t *, struct buf *);
 int dm_target_stripe_deps(dm_table_entry_t *, prop_array_t);
 int dm_target_stripe_destroy(dm_table_entry_t *);
 int dm_target_stripe_upcall(dm_table_entry_t *, struct buf *);
+int dm_target_stripe_dump(dm_table_entry_t *, void *, size_t, off_t);
 
 /* dm_target_snapshot.c */
 int dm_target_snapshot_init(dm_dev_t *, void**, char *);
@@ -391,6 +397,7 @@ int dm_pdev_decr(dm_pdev_t *);
 int dm_pdev_destroy(void);
 int dm_pdev_init(void);
 dm_pdev_t* dm_pdev_insert(const char *);
+off_t dm_pdev_correct_dump_offset(dm_pdev_t *, off_t);
 
 extern int dm_debug_level;
 MALLOC_DECLARE(M_DM);
index 0dede14..5631808 100644 (file)
@@ -101,6 +101,19 @@ dm_dk_lookup(const char *dev_name, struct vnode **vpp)
        return 0;
 }
 
+off_t
+dm_pdev_correct_dump_offset(dm_pdev_t *pdev, off_t offset)
+{
+       off_t noffset;
+
+       noffset = pdev->pdev_pinfo.reserved_blocks +
+           pdev->pdev_pinfo.media_offset / pdev->pdev_pinfo.media_blksize;
+       noffset *= DEV_BSIZE;
+       noffset += offset;
+
+       return noffset;
+}
+
 /*
  * Create entry for device with name dev_name and open vnode for it.
  * If entry already exists in global SLIST I will only increment
@@ -138,6 +151,14 @@ dm_pdev_insert(const char *dev_name)
        }
        dmp->ref_cnt = 1;
 
+       /*
+        * Get us the partinfo from the underlying device, it's needed for
+        * dumps.
+        */
+       bzero(&dmp->pdev_pinfo, sizeof(dmp->pdev_pinfo));
+       error = dev_dioctl(dmp->pdev_vnode->v_rdev, DIOCGPART,
+           (void *)&dmp->pdev_pinfo, 0, proc0.p_ucred, NULL);
+
        lockmgr(&dm_pdev_mutex, LK_EXCLUSIVE);
        SLIST_INSERT_HEAD(&dm_pdev_list, dmp, next_pdev);
        lockmgr(&dm_pdev_mutex, LK_RELEASE);
index 507c6d3..326a01c 100644 (file)
@@ -297,6 +297,7 @@ dm_target_init(void)
        dmt->deps = &dm_target_linear_deps;
        dmt->destroy = &dm_target_linear_destroy;
        dmt->upcall = &dm_target_linear_upcall;
+       dmt->dump = &dm_target_linear_dump;
 
        r = dm_target_insert(dmt);
 
@@ -310,6 +311,7 @@ dm_target_init(void)
        dmt3->deps = &dm_target_stripe_deps;
        dmt3->destroy = &dm_target_stripe_destroy;
        dmt3->upcall = &dm_target_stripe_upcall;
+       dmt3->dump = &dm_target_stripe_dump;
 
        r = dm_target_insert(dmt3);
        
@@ -323,6 +325,7 @@ dm_target_init(void)
        dmt5->deps = &dm_target_crypt_deps;
        dmt5->destroy = &dm_target_crypt_destroy;
        dmt5->upcall = &dm_target_crypt_upcall;
+       dmt5->dump = &dm_target_crypt_dump;
 
        r = dm_target_insert(dmt5);
 
index da9b8cd..c1ff6f0 100644 (file)
 
 #include <sys/bio.h>
 #include <sys/buf.h>
+#include <sys/globaldata.h>
+#include <sys/kerneldump.h>
 #include <sys/malloc.h>
 #include <sys/md5.h>
+#include <sys/mutex2.h>
 #include <sys/vnode.h>
 #include <crypto/sha1.h>
 #include <crypto/sha2/sha2.h>
@@ -84,7 +87,7 @@ typedef struct target_crypt_config {
        int     crypto_alg;
        int     crypto_klen;
        u_int8_t        crypto_key[512>>3];
-       
+
        u_int64_t       crypto_sid;
        u_int64_t       block_offset;
        int64_t         iv_offset;
@@ -102,13 +105,30 @@ struct dmtc_helper {
        caddr_t data_buf;
 };
 
+struct dmtc_dump_helper {
+       dm_target_crypt_config_t *priv;
+       void *data;
+       size_t length;
+       off_t offset;
+
+       int sectors;
+       int *ident;
+
+       struct cryptodesc crd[128];
+       struct cryptop crp[128];
+       u_char space[65536];
+};
+
 static void dmtc_crypto_dispatch(void *arg);
+static void dmtc_crypto_dump_start(dm_target_crypt_config_t *priv,
+                               struct dmtc_dump_helper *dump_helper);
 static void dmtc_crypto_read_start(dm_target_crypt_config_t *priv,
                                struct bio *bio);
 static void dmtc_crypto_write_start(dm_target_crypt_config_t *priv,
                                struct bio *bio);
 static void dmtc_bio_read_done(struct bio *bio);
 static void dmtc_bio_write_done(struct bio *bio);
+static int dmtc_crypto_cb_dump_done(struct cryptop *crp);
 static int dmtc_crypto_cb_read_done(struct cryptop *crp);
 static int dmtc_crypto_cb_write_done(struct cryptop *crp);
 
@@ -269,6 +289,7 @@ essiv_ivgen_done(struct cryptop *crp)
        void *free_addr;
        void *opaque;
 
+
        if (crp->crp_etype == EAGAIN)
                return crypto_dispatch(crp);
 
@@ -286,7 +307,7 @@ essiv_ivgen_done(struct cryptop *crp)
        ivpriv = *((struct essiv_ivgen_priv **)crp->crp_opaque);
        crp->crp_opaque += sizeof(void *);
        opaque = *((void **)crp->crp_opaque);
-       
+
        objcache_put(ivpriv->crp_crd_cache, free_addr);
        dmtc_crypto_dispatch(opaque);
        return 0;
@@ -1186,3 +1207,169 @@ dmtc_bio_write_done(struct bio *bio)
        biodone(obio);
 }
 /* END OF STRATEGY WRITE SECTION */
+
+
+
+/* DUMPING MAGIC */
+
+extern int tsleep_crypto_dump;
+
+int
+dm_target_crypt_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset)
+{
+       static struct dmtc_dump_helper dump_helper;
+       dm_target_crypt_config_t *priv;
+       int id;
+       static int first_call = 1;
+
+       priv = table_en->target_config;
+
+       if (first_call) {
+               first_call = 0;
+               dump_reactivate_cpus();
+       }
+
+       /* Magically enable tsleep */
+       tsleep_crypto_dump = 1;
+       id = 0;
+
+       /*
+        * 0 length means flush buffers and return
+        */
+       if (length == 0) {
+               if (priv->pdev->pdev_vnode->v_rdev == NULL) {
+                       tsleep_crypto_dump = 0;
+                       return ENXIO;
+               }
+               dev_ddump(priv->pdev->pdev_vnode->v_rdev,
+                   data, 0, offset, 0);
+               tsleep_crypto_dump = 0;
+               return 0;
+       }
+
+       bzero(&dump_helper, sizeof(dump_helper));
+       dump_helper.priv = priv;
+       dump_helper.data = data;
+       dump_helper.length = length;
+       dump_helper.offset = offset +
+           priv->block_offset * DEV_BSIZE;
+       dump_helper.ident = &id;
+       dmtc_crypto_dump_start(priv, &dump_helper);
+
+       /*
+        * Hackery to make stuff appear synchronous. The crypto callback will
+        * set id to 1 and call wakeup on it. If the request completed
+        * synchronously, id will be 1 and we won't bother to sleep. If not,
+        * the crypto request will complete asynchronously and we sleep until
+        * it's done.
+        */
+       if (id == 0)
+               tsleep(&dump_helper, 0, "cryptdump", 0);
+
+       dump_helper.offset = dm_pdev_correct_dump_offset(priv->pdev,
+           dump_helper.offset);
+
+       dev_ddump(priv->pdev->pdev_vnode->v_rdev,
+           dump_helper.space, 0, dump_helper.offset,
+           dump_helper.length);
+
+       tsleep_crypto_dump = 0;
+       return 0;
+}
+
+static void
+dmtc_crypto_dump_start(dm_target_crypt_config_t *priv, struct dmtc_dump_helper *dump_helper)
+{
+       struct cryptodesc *crd;
+       struct cryptop *crp;
+       struct cryptoini *cri;
+       int i, bytes, sectors;
+       off_t isector;
+
+       cri = &priv->crypto_session;
+
+       bytes = dump_helper->length;
+
+       isector = dump_helper->offset / DEV_BSIZE;      /* ivgen salt base? */
+       sectors = bytes / DEV_BSIZE;            /* Number of sectors */
+       dump_helper->sectors = sectors;
+#if 0
+       kprintf("Dump, bytes = %d, "
+               "sectors = %d, LENGTH=%zu\n", bytes, sectors, dump_helper->length);
+#endif
+       KKASSERT(dump_helper->length <= 65536);
+
+       memcpy(dump_helper->space, dump_helper->data, bytes);
+
+       cpu_sfence();
+
+       for (i = 0; i < sectors; i++) {
+               crp = &dump_helper->crp[i];
+               crd = &dump_helper->crd[i];
+
+               crp->crp_buf = dump_helper->space + i * DEV_BSIZE;
+
+               crp->crp_sid = priv->crypto_sid;
+               crp->crp_ilen = crp->crp_olen = DEV_BSIZE;
+
+               crp->crp_opaque = (void *)dump_helper;
+
+               crp->crp_callback = dmtc_crypto_cb_dump_done;
+               crp->crp_desc = crd;
+               crp->crp_etype = 0;
+               crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_REL |
+                                CRYPTO_F_BATCH;
+
+               crd->crd_alg = priv->crypto_alg;
+
+               crd->crd_skip = 0;
+               crd->crd_len = DEV_BSIZE /* XXX */;
+               crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
+               crd->crd_next = NULL;
+
+               crd->crd_flags |= CRD_F_ENCRYPT;
+
+               /*
+                * Note: last argument is used to generate salt(?) and is
+                *       a 64 bit value, but the original code passed an
+                *       int.  Changing it now will break pre-existing
+                *       crypt volumes.
+                */
+               priv->ivgen->gen_iv(priv, crd->crd_iv, sizeof(crd->crd_iv),
+                                   isector + i, crp);
+       }
+}
+
+static int
+dmtc_crypto_cb_dump_done(struct cryptop *crp)
+{
+       struct dmtc_dump_helper *dump_helper;
+       dm_target_crypt_config_t *priv;
+       int n;
+
+       if (crp->crp_etype == EAGAIN)
+               return crypto_dispatch(crp);
+
+       dump_helper = (struct dmtc_dump_helper *)crp->crp_opaque;
+       KKASSERT(dump_helper != NULL);
+
+       if (crp->crp_etype != 0) {
+               kprintf("dm_target_crypt: dmtc_crypto_cb_dump_done "
+                       "crp_etype = %d\n",
+               crp->crp_etype);
+               return crp->crp_etype;
+       }
+
+       /*
+        * On the last chunk of the encryption we return control
+        */
+       n = atomic_fetchadd_int(&dump_helper->sectors, -1);
+
+       if (n == 1) {
+               priv = (dm_target_crypt_config_t *)dump_helper->priv;
+               atomic_add_int(dump_helper->ident, 1);
+               wakeup(dump_helper);
+       }
+
+       return 0;
+}
index 9b2b261..f929384 100644 (file)
@@ -147,6 +147,23 @@ dm_target_linear_strategy(dm_table_entry_t * table_en, struct buf * bp)
        return 0;
 
 }
+
+int
+dm_target_linear_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset)
+{
+       dm_target_linear_config_t *tlc;
+
+       tlc = table_en->target_config;
+
+       offset += tlc->offset * DEV_BSIZE;
+       offset = dm_pdev_correct_dump_offset(tlc->pdev, offset);
+
+       if (tlc->pdev->pdev_vnode->v_rdev == NULL)
+               return ENXIO;
+
+       return dev_ddump(tlc->pdev->pdev_vnode->v_rdev, data, 0, offset, length);
+}
+
 /*
  * Destroy target specific data. Decrement table pdevs.
  */
index f354181..c010f4b 100644 (file)
@@ -313,6 +313,84 @@ dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
        return 0;
 }
 
+
+int
+dm_target_stripe_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset)
+{
+       dm_target_stripe_config_t *tsc;
+       uint64_t blkno, blkoff;
+       uint64_t stripe, blknr;
+       uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
+       uint64_t off2, len2;
+       int devnr;
+
+       tsc = table_en->target_config;
+       if (tsc == NULL)
+               return 0;
+
+       /* calculate extent of request */
+       KKASSERT(length % DEV_BSIZE == 0);
+
+       blkno = offset / DEV_BSIZE;
+       blkoff = 0;
+       num_blks = length / DEV_BSIZE;
+
+       /*
+        * 0 length means flush buffers and return
+        */
+       if (length == 0) {
+               for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
+                       if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL)
+                               return ENXIO;
+
+                       dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev,
+                           data, 0, offset, 0);
+               }
+               return 0;
+       }
+
+       while (num_blks > 0) {
+               /* blockno to strip piece nr */
+               stripe = blkno / tsc->stripe_chunksize;
+               stripe_off = blkno % tsc->stripe_chunksize;
+
+               /* where we are inside the strip */
+               devnr = stripe % tsc->stripe_num;
+               blknr = stripe / tsc->stripe_num;
+
+               /* how much is left before we hit a boundary */
+               stripe_rest = tsc->stripe_chunksize - stripe_off;
+
+               /* issue this piece on stripe `stripe' */
+               issue_blks = MIN(stripe_rest, num_blks);
+
+#if 0
+               nestiobuf_add(bio, nestbuf, blkoff,
+                               issue_blks * DEV_BSIZE);
+#endif
+               len2 = issue_blks * DEV_BSIZE;
+
+               /* I need number of bytes. */
+               off2 = blknr * tsc->stripe_chunksize + stripe_off;
+               off2 += tsc->stripe_devs[devnr].offset;
+               off2 *= DEV_BSIZE;
+               off2 = dm_pdev_correct_dump_offset(tsc->stripe_devs[devnr].pdev,
+                   off2);
+
+               if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL)
+                       return ENXIO;
+
+               dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev,
+                   (char *)data + blkoff, 0, off2, len2);
+
+               blkno += issue_blks;
+               blkoff += issue_blks * DEV_BSIZE;
+               num_blks -= issue_blks;
+       }
+
+       return 0;
+}
+
 /*
  * Destroy a dm table entry for stripes.
  */