From 5ce955316546b3a8b09bb7573648567368548b04 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Thu, 21 Oct 2010 07:47:46 +0100 Subject: [PATCH] device mapper - implement dumping * Implement dmdump and dump routines for the three main targets (linear, stripe and crypt). * The top-level dmpdump will call all the required dump() methods in the targets just as it does with strategy() calls. The lower level target-specific dump routines will then redirect (after processing, etc) these requests to the underlying device's dump routines. * This should provide quite reliable dumping even through device mapper, although it is more error-prone than the equivalent dumping on normal disks as there's a lot more going on behind the scenes. --- sys/dev/disk/dm/device-mapper.c | 85 +++++++++++++ sys/dev/disk/dm/dm.h | 9 +- sys/dev/disk/dm/dm_pdev.c | 21 ++++ sys/dev/disk/dm/dm_target.c | 3 + sys/dev/disk/dm/dm_target_crypt.c | 191 ++++++++++++++++++++++++++++- sys/dev/disk/dm/dm_target_linear.c | 17 +++ sys/dev/disk/dm/dm_target_stripe.c | 78 ++++++++++++ 7 files changed, 401 insertions(+), 3 deletions(-) diff --git a/sys/dev/disk/dm/device-mapper.c b/sys/dev/disk/dm/device-mapper.c index df64705c33..38c84d791e 100644 --- a/sys/dev/disk/dm/device-mapper.c +++ b/sys/dev/disk/dm/device-mapper.c @@ -55,6 +55,7 @@ static d_open_t dmopen; static d_close_t dmclose; static d_psize_t dmsize; static d_strategy_t dmstrategy; +static d_dump_t dmdump; /* attach and detach routines */ void dmattach(int); @@ -80,6 +81,7 @@ struct dev_ops dm_ops = { .d_ioctl = dmioctl, .d_strategy = dmstrategy, .d_psize = dmsize, + .d_dump = dmdump, /* D_DISK */ }; @@ -498,6 +500,89 @@ dmstrategy(struct dev_strategy_args *ap) return 0; } +static int +dmdump(struct dev_dump_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + dm_dev_t *dmv; + dm_table_t *tbl; + dm_table_entry_t *table_en; + uint32_t dev_type; + uint64_t buf_start, buf_len, issued_len; + uint64_t table_start, table_end; + uint64_t start, end, data_offset; + off_t offset; + size_t length; + int error = 0; + + buf_start = ap->a_offset; + buf_len = ap->a_length; + + tbl = NULL; + + table_end = 0; + dev_type = 0; + issued_len = 0; + + if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { + return EIO; + } + + /* Select active table */ + tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); + + + /* + * Find out what tables I want to select. + */ + SLIST_FOREACH(table_en, tbl, next) { + /* + * I need need number of bytes not blocks. + */ + table_start = table_en->start * DEV_BSIZE; + table_end = table_start + (table_en->length) * DEV_BSIZE; + + /* + * Calculate the start and end + */ + start = MAX(table_start, buf_start); + end = MIN(table_end, buf_start + buf_len); + + if (ap->a_length == 0) { + if (table_en->target->dump == NULL) { + error = ENXIO; + goto out; + } + + table_en->target->dump(table_en, NULL, 0, 0); + } else if (start < end) { + data_offset = start - buf_start; + offset = start - table_start; + length = end - start; + + if (table_en->target->dump == NULL) { + error = ENXIO; + goto out; + } + + table_en->target->dump(table_en, + (char *)ap->a_virtual + data_offset, + length, offset); + + issued_len += end - start; + } + } + + if (issued_len < buf_len) + error = EINVAL; + +out: + dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); + dm_dev_unbusy(dmv); + + return error; +} + static int dmsize(struct dev_psize_args *ap) { diff --git a/sys/dev/disk/dm/dm.h b/sys/dev/disk/dm/dm.h index 4c6abe7fa3..9d8819f9eb 100644 --- a/sys/dev/disk/dm/dm.h +++ b/sys/dev/disk/dm/dm.h @@ -46,6 +46,7 @@ #include #include +#include #include #include @@ -108,6 +109,7 @@ typedef struct dm_table_head { typedef struct dm_pdev { char name[MAX_DEV_NAME]; + struct partinfo pdev_pinfo; /* partinfo of the underlying device */ struct vnode *pdev_vnode; int ref_cnt; /* reference counter for users ofthis pdev */ @@ -232,7 +234,8 @@ typedef struct dm_target { char * (*status)(void *); int (*strategy)(dm_table_entry_t *, struct buf *); int (*upcall)(dm_table_entry_t *, struct buf *); - + int (*dump)(dm_table_entry_t *, void *data, size_t length, off_t offset); + uint32_t version[3]; int ref_cnt; @@ -315,6 +318,7 @@ int dm_target_linear_strategy(dm_table_entry_t *, struct buf *); int dm_target_linear_deps(dm_table_entry_t *, prop_array_t); int dm_target_linear_destroy(dm_table_entry_t *); int dm_target_linear_upcall(dm_table_entry_t *, struct buf *); +int dm_target_linear_dump(dm_table_entry_t *, void *, size_t, off_t); /* dm_target_crypt.c */ int dm_target_crypt_init(dm_dev_t *, void**, char *); @@ -323,6 +327,7 @@ int dm_target_crypt_strategy(dm_table_entry_t *, struct buf *); int dm_target_crypt_deps(dm_table_entry_t *, prop_array_t); int dm_target_crypt_destroy(dm_table_entry_t *); int dm_target_crypt_upcall(dm_table_entry_t *, struct buf *); +int dm_target_crypt_dump(dm_table_entry_t *, void *, size_t, off_t); /* Generic function used to convert char to string */ uint64_t atoi64(const char *); @@ -342,6 +347,7 @@ int dm_target_stripe_strategy(dm_table_entry_t *, struct buf *); int dm_target_stripe_deps(dm_table_entry_t *, prop_array_t); int dm_target_stripe_destroy(dm_table_entry_t *); int dm_target_stripe_upcall(dm_table_entry_t *, struct buf *); +int dm_target_stripe_dump(dm_table_entry_t *, void *, size_t, off_t); /* dm_target_snapshot.c */ int dm_target_snapshot_init(dm_dev_t *, void**, char *); @@ -391,6 +397,7 @@ int dm_pdev_decr(dm_pdev_t *); int dm_pdev_destroy(void); int dm_pdev_init(void); dm_pdev_t* dm_pdev_insert(const char *); +off_t dm_pdev_correct_dump_offset(dm_pdev_t *, off_t); extern int dm_debug_level; MALLOC_DECLARE(M_DM); diff --git a/sys/dev/disk/dm/dm_pdev.c b/sys/dev/disk/dm/dm_pdev.c index 0dede1497d..5631808403 100644 --- a/sys/dev/disk/dm/dm_pdev.c +++ b/sys/dev/disk/dm/dm_pdev.c @@ -101,6 +101,19 @@ dm_dk_lookup(const char *dev_name, struct vnode **vpp) return 0; } +off_t +dm_pdev_correct_dump_offset(dm_pdev_t *pdev, off_t offset) +{ + off_t noffset; + + noffset = pdev->pdev_pinfo.reserved_blocks + + pdev->pdev_pinfo.media_offset / pdev->pdev_pinfo.media_blksize; + noffset *= DEV_BSIZE; + noffset += offset; + + return noffset; +} + /* * Create entry for device with name dev_name and open vnode for it. * If entry already exists in global SLIST I will only increment @@ -138,6 +151,14 @@ dm_pdev_insert(const char *dev_name) } dmp->ref_cnt = 1; + /* + * Get us the partinfo from the underlying device, it's needed for + * dumps. + */ + bzero(&dmp->pdev_pinfo, sizeof(dmp->pdev_pinfo)); + error = dev_dioctl(dmp->pdev_vnode->v_rdev, DIOCGPART, + (void *)&dmp->pdev_pinfo, 0, proc0.p_ucred, NULL); + lockmgr(&dm_pdev_mutex, LK_EXCLUSIVE); SLIST_INSERT_HEAD(&dm_pdev_list, dmp, next_pdev); lockmgr(&dm_pdev_mutex, LK_RELEASE); diff --git a/sys/dev/disk/dm/dm_target.c b/sys/dev/disk/dm/dm_target.c index 507c6d33b4..326a01c6fb 100644 --- a/sys/dev/disk/dm/dm_target.c +++ b/sys/dev/disk/dm/dm_target.c @@ -297,6 +297,7 @@ dm_target_init(void) dmt->deps = &dm_target_linear_deps; dmt->destroy = &dm_target_linear_destroy; dmt->upcall = &dm_target_linear_upcall; + dmt->dump = &dm_target_linear_dump; r = dm_target_insert(dmt); @@ -310,6 +311,7 @@ dm_target_init(void) dmt3->deps = &dm_target_stripe_deps; dmt3->destroy = &dm_target_stripe_destroy; dmt3->upcall = &dm_target_stripe_upcall; + dmt3->dump = &dm_target_stripe_dump; r = dm_target_insert(dmt3); @@ -323,6 +325,7 @@ dm_target_init(void) dmt5->deps = &dm_target_crypt_deps; dmt5->destroy = &dm_target_crypt_destroy; dmt5->upcall = &dm_target_crypt_upcall; + dmt5->dump = &dm_target_crypt_dump; r = dm_target_insert(dmt5); diff --git a/sys/dev/disk/dm/dm_target_crypt.c b/sys/dev/disk/dm/dm_target_crypt.c index da9b8cdc6b..c1ff6f07f0 100644 --- a/sys/dev/disk/dm/dm_target_crypt.c +++ b/sys/dev/disk/dm/dm_target_crypt.c @@ -41,8 +41,11 @@ #include #include +#include +#include #include #include +#include #include #include #include @@ -84,7 +87,7 @@ typedef struct target_crypt_config { int crypto_alg; int crypto_klen; u_int8_t crypto_key[512>>3]; - + u_int64_t crypto_sid; u_int64_t block_offset; int64_t iv_offset; @@ -102,13 +105,30 @@ struct dmtc_helper { caddr_t data_buf; }; +struct dmtc_dump_helper { + dm_target_crypt_config_t *priv; + void *data; + size_t length; + off_t offset; + + int sectors; + int *ident; + + struct cryptodesc crd[128]; + struct cryptop crp[128]; + u_char space[65536]; +}; + static void dmtc_crypto_dispatch(void *arg); +static void dmtc_crypto_dump_start(dm_target_crypt_config_t *priv, + struct dmtc_dump_helper *dump_helper); static void dmtc_crypto_read_start(dm_target_crypt_config_t *priv, struct bio *bio); static void dmtc_crypto_write_start(dm_target_crypt_config_t *priv, struct bio *bio); static void dmtc_bio_read_done(struct bio *bio); static void dmtc_bio_write_done(struct bio *bio); +static int dmtc_crypto_cb_dump_done(struct cryptop *crp); static int dmtc_crypto_cb_read_done(struct cryptop *crp); static int dmtc_crypto_cb_write_done(struct cryptop *crp); @@ -269,6 +289,7 @@ essiv_ivgen_done(struct cryptop *crp) void *free_addr; void *opaque; + if (crp->crp_etype == EAGAIN) return crypto_dispatch(crp); @@ -286,7 +307,7 @@ essiv_ivgen_done(struct cryptop *crp) ivpriv = *((struct essiv_ivgen_priv **)crp->crp_opaque); crp->crp_opaque += sizeof(void *); opaque = *((void **)crp->crp_opaque); - + objcache_put(ivpriv->crp_crd_cache, free_addr); dmtc_crypto_dispatch(opaque); return 0; @@ -1186,3 +1207,169 @@ dmtc_bio_write_done(struct bio *bio) biodone(obio); } /* END OF STRATEGY WRITE SECTION */ + + + +/* DUMPING MAGIC */ + +extern int tsleep_crypto_dump; + +int +dm_target_crypt_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset) +{ + static struct dmtc_dump_helper dump_helper; + dm_target_crypt_config_t *priv; + int id; + static int first_call = 1; + + priv = table_en->target_config; + + if (first_call) { + first_call = 0; + dump_reactivate_cpus(); + } + + /* Magically enable tsleep */ + tsleep_crypto_dump = 1; + id = 0; + + /* + * 0 length means flush buffers and return + */ + if (length == 0) { + if (priv->pdev->pdev_vnode->v_rdev == NULL) { + tsleep_crypto_dump = 0; + return ENXIO; + } + dev_ddump(priv->pdev->pdev_vnode->v_rdev, + data, 0, offset, 0); + tsleep_crypto_dump = 0; + return 0; + } + + bzero(&dump_helper, sizeof(dump_helper)); + dump_helper.priv = priv; + dump_helper.data = data; + dump_helper.length = length; + dump_helper.offset = offset + + priv->block_offset * DEV_BSIZE; + dump_helper.ident = &id; + dmtc_crypto_dump_start(priv, &dump_helper); + + /* + * Hackery to make stuff appear synchronous. The crypto callback will + * set id to 1 and call wakeup on it. If the request completed + * synchronously, id will be 1 and we won't bother to sleep. If not, + * the crypto request will complete asynchronously and we sleep until + * it's done. + */ + if (id == 0) + tsleep(&dump_helper, 0, "cryptdump", 0); + + dump_helper.offset = dm_pdev_correct_dump_offset(priv->pdev, + dump_helper.offset); + + dev_ddump(priv->pdev->pdev_vnode->v_rdev, + dump_helper.space, 0, dump_helper.offset, + dump_helper.length); + + tsleep_crypto_dump = 0; + return 0; +} + +static void +dmtc_crypto_dump_start(dm_target_crypt_config_t *priv, struct dmtc_dump_helper *dump_helper) +{ + struct cryptodesc *crd; + struct cryptop *crp; + struct cryptoini *cri; + int i, bytes, sectors; + off_t isector; + + cri = &priv->crypto_session; + + bytes = dump_helper->length; + + isector = dump_helper->offset / DEV_BSIZE; /* ivgen salt base? */ + sectors = bytes / DEV_BSIZE; /* Number of sectors */ + dump_helper->sectors = sectors; +#if 0 + kprintf("Dump, bytes = %d, " + "sectors = %d, LENGTH=%zu\n", bytes, sectors, dump_helper->length); +#endif + KKASSERT(dump_helper->length <= 65536); + + memcpy(dump_helper->space, dump_helper->data, bytes); + + cpu_sfence(); + + for (i = 0; i < sectors; i++) { + crp = &dump_helper->crp[i]; + crd = &dump_helper->crd[i]; + + crp->crp_buf = dump_helper->space + i * DEV_BSIZE; + + crp->crp_sid = priv->crypto_sid; + crp->crp_ilen = crp->crp_olen = DEV_BSIZE; + + crp->crp_opaque = (void *)dump_helper; + + crp->crp_callback = dmtc_crypto_cb_dump_done; + crp->crp_desc = crd; + crp->crp_etype = 0; + crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_REL | + CRYPTO_F_BATCH; + + crd->crd_alg = priv->crypto_alg; + + crd->crd_skip = 0; + crd->crd_len = DEV_BSIZE /* XXX */; + crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; + crd->crd_next = NULL; + + crd->crd_flags |= CRD_F_ENCRYPT; + + /* + * Note: last argument is used to generate salt(?) and is + * a 64 bit value, but the original code passed an + * int. Changing it now will break pre-existing + * crypt volumes. + */ + priv->ivgen->gen_iv(priv, crd->crd_iv, sizeof(crd->crd_iv), + isector + i, crp); + } +} + +static int +dmtc_crypto_cb_dump_done(struct cryptop *crp) +{ + struct dmtc_dump_helper *dump_helper; + dm_target_crypt_config_t *priv; + int n; + + if (crp->crp_etype == EAGAIN) + return crypto_dispatch(crp); + + dump_helper = (struct dmtc_dump_helper *)crp->crp_opaque; + KKASSERT(dump_helper != NULL); + + if (crp->crp_etype != 0) { + kprintf("dm_target_crypt: dmtc_crypto_cb_dump_done " + "crp_etype = %d\n", + crp->crp_etype); + return crp->crp_etype; + } + + /* + * On the last chunk of the encryption we return control + */ + n = atomic_fetchadd_int(&dump_helper->sectors, -1); + + if (n == 1) { + priv = (dm_target_crypt_config_t *)dump_helper->priv; + atomic_add_int(dump_helper->ident, 1); + wakeup(dump_helper); + } + + return 0; +} diff --git a/sys/dev/disk/dm/dm_target_linear.c b/sys/dev/disk/dm/dm_target_linear.c index 9b2b261f6c..f929384f60 100644 --- a/sys/dev/disk/dm/dm_target_linear.c +++ b/sys/dev/disk/dm/dm_target_linear.c @@ -147,6 +147,23 @@ dm_target_linear_strategy(dm_table_entry_t * table_en, struct buf * bp) return 0; } + +int +dm_target_linear_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset) +{ + dm_target_linear_config_t *tlc; + + tlc = table_en->target_config; + + offset += tlc->offset * DEV_BSIZE; + offset = dm_pdev_correct_dump_offset(tlc->pdev, offset); + + if (tlc->pdev->pdev_vnode->v_rdev == NULL) + return ENXIO; + + return dev_ddump(tlc->pdev->pdev_vnode->v_rdev, data, 0, offset, length); +} + /* * Destroy target specific data. Decrement table pdevs. */ diff --git a/sys/dev/disk/dm/dm_target_stripe.c b/sys/dev/disk/dm/dm_target_stripe.c index f35418183a..c010f4bcb3 100644 --- a/sys/dev/disk/dm/dm_target_stripe.c +++ b/sys/dev/disk/dm/dm_target_stripe.c @@ -313,6 +313,84 @@ dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp) return 0; } + +int +dm_target_stripe_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset) +{ + dm_target_stripe_config_t *tsc; + uint64_t blkno, blkoff; + uint64_t stripe, blknr; + uint32_t stripe_off, stripe_rest, num_blks, issue_blks; + uint64_t off2, len2; + int devnr; + + tsc = table_en->target_config; + if (tsc == NULL) + return 0; + + /* calculate extent of request */ + KKASSERT(length % DEV_BSIZE == 0); + + blkno = offset / DEV_BSIZE; + blkoff = 0; + num_blks = length / DEV_BSIZE; + + /* + * 0 length means flush buffers and return + */ + if (length == 0) { + for (devnr = 0; devnr < tsc->stripe_num; ++devnr) { + if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL) + return ENXIO; + + dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev, + data, 0, offset, 0); + } + return 0; + } + + while (num_blks > 0) { + /* blockno to strip piece nr */ + stripe = blkno / tsc->stripe_chunksize; + stripe_off = blkno % tsc->stripe_chunksize; + + /* where we are inside the strip */ + devnr = stripe % tsc->stripe_num; + blknr = stripe / tsc->stripe_num; + + /* how much is left before we hit a boundary */ + stripe_rest = tsc->stripe_chunksize - stripe_off; + + /* issue this piece on stripe `stripe' */ + issue_blks = MIN(stripe_rest, num_blks); + +#if 0 + nestiobuf_add(bio, nestbuf, blkoff, + issue_blks * DEV_BSIZE); +#endif + len2 = issue_blks * DEV_BSIZE; + + /* I need number of bytes. */ + off2 = blknr * tsc->stripe_chunksize + stripe_off; + off2 += tsc->stripe_devs[devnr].offset; + off2 *= DEV_BSIZE; + off2 = dm_pdev_correct_dump_offset(tsc->stripe_devs[devnr].pdev, + off2); + + if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL) + return ENXIO; + + dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev, + (char *)data + blkoff, 0, off2, len2); + + blkno += issue_blks; + blkoff += issue_blks * DEV_BSIZE; + num_blks -= issue_blks; + } + + return 0; +} + /* * Destroy a dm table entry for stripes. */ -- 2.41.0