dm - misc minor cleanup
[dragonfly.git] / sys / dev / disk / dm / device-mapper.c
1 /*        $NetBSD: device-mapper.c,v 1.22 2010/03/26 15:46:04 jakllsch Exp $ */
2
3 /*
4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Adam Hamsik.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 /*
33  * I want to say thank you to all people who helped me with this project.
34  */
35
36 #include <sys/types.h>
37 #include <sys/param.h>
38
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/device.h>
42 #include <sys/devfs.h>
43 #include <sys/disk.h>
44 #include <sys/disklabel.h>
45 #include <sys/dtype.h>
46 #include <sys/ioccom.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/sysctl.h>
50
51 #include "netbsd-dm.h"
52 #include "dm.h"
53
54 static  d_ioctl_t       dmioctl;
55 static  d_open_t        dmopen;
56 static  d_close_t       dmclose;
57 static  d_psize_t       dmsize;
58 static  d_strategy_t    dmstrategy;
59 static  d_dump_t        dmdump;
60
61 /* attach and detach routines */
62 void dmattach(int);
63 static int dm_modcmd(module_t mod, int cmd, void *unused);
64 static int dmdestroy(void);
65
66 static void dm_doinit(void);
67
68 static int dm_cmd_to_fun(prop_dictionary_t);
69 static int disk_ioctl_switch(cdev_t, u_long, void *);
70 static int dm_ioctl_switch(u_long);
71 #if 0
72 static void dmminphys(struct buf *);
73 #endif
74
75 struct devfs_bitmap dm_minor_bitmap;
76
77 /* ***Variable-definitions*** */
78 struct dev_ops dm_ops = {
79         { "dm", 0, D_DISK | D_MPSAFE },
80         .d_open         = dmopen,
81         .d_close        = dmclose,
82         .d_read         = physread,
83         .d_write        = physwrite,
84         .d_ioctl        = dmioctl,
85         .d_strategy     = dmstrategy,
86         .d_psize        = dmsize,
87         .d_dump         = dmdump,
88 /* D_DISK */
89 };
90
91 MALLOC_DEFINE(M_DM, "dm", "Device Mapper allocations");
92
93 int dm_debug_level = 0;
94
95 extern uint64_t dm_dev_counter;
96
97 static cdev_t dmcdev;
98
99 static moduledata_t dm_mod = {
100     "dm",
101     dm_modcmd,
102     NULL
103 };
104 DECLARE_MODULE(dm, dm_mod, SI_SUB_RAID, SI_ORDER_ANY);
105
106 /*
107  * This array is used to translate cmd to function pointer.
108  *
109  * Interface between libdevmapper and lvm2tools uses different
110  * names for one IOCTL call because libdevmapper do another thing
111  * then. When I run "info" or "mknodes" libdevmapper will send same
112  * ioctl to kernel but will do another things in userspace.
113  *
114  */
115 static struct cmd_function cmd_fn[] = {
116                 { .cmd = "version", .fn = dm_get_version_ioctl},
117                 { .cmd = "targets", .fn = dm_list_versions_ioctl},
118                 { .cmd = "create",  .fn = dm_dev_create_ioctl},
119                 { .cmd = "info",    .fn = dm_dev_status_ioctl},
120                 { .cmd = "mknodes", .fn = dm_dev_status_ioctl},
121                 { .cmd = "names",   .fn = dm_dev_list_ioctl},
122                 { .cmd = "suspend", .fn = dm_dev_suspend_ioctl},
123                 { .cmd = "remove",  .fn = dm_dev_remove_ioctl},
124                 { .cmd = "rename",  .fn = dm_dev_rename_ioctl},
125                 { .cmd = "resume",  .fn = dm_dev_resume_ioctl},
126                 { .cmd = "clear",   .fn = dm_table_clear_ioctl},
127                 { .cmd = "deps",    .fn = dm_table_deps_ioctl},
128                 { .cmd = "reload",  .fn = dm_table_load_ioctl},
129                 { .cmd = "status",  .fn = dm_table_status_ioctl},
130                 { .cmd = "table",   .fn = dm_table_status_ioctl},
131                 {NULL, NULL}
132 };
133
134 /* New module handle routine */
135 static int
136 dm_modcmd(module_t mod, int cmd, void *unused)
137 {
138         int error, bmajor, cmajor;
139
140         error = 0;
141         bmajor = -1;
142         cmajor = -1;
143
144         switch (cmd) {
145         case MOD_LOAD:
146                 devfs_clone_bitmap_init(&dm_minor_bitmap);
147                 dm_doinit();
148                 kprintf("Device Mapper version %d.%d.%d loaded\n",
149                     DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL);
150                 break;
151
152         case MOD_UNLOAD:
153                 /*
154                  * Disable unloading of dm module if there are any devices
155                  * defined in driver. This is probably too strong we need
156                  * to disable auto-unload only if there is mounted dm device
157                  * present.
158                  */
159                 if (dm_dev_counter > 0)
160                         return EBUSY;
161
162                 error = dmdestroy();
163                 if (error)
164                         break;
165                 kprintf("Device Mapper unloaded\n");
166                 break;
167
168         default:
169                 break;
170         }
171
172         return error;
173 }
174
175 /*
176  * dm_detach is called to completely destroy & remove a dm disk device.
177  */
178 int
179 dm_detach(dm_dev_t *dmv)
180 {
181         int minor;
182
183         /* Remove device from list and wait for refcnt to drop to zero */
184         dm_dev_rem(dmv, NULL, NULL, -1);
185
186         /* Destroy active table first.  */
187         dm_table_destroy(&dmv->table_head, DM_TABLE_ACTIVE);
188
189         /* Destroy inactive table if exits, too. */
190         dm_table_destroy(&dmv->table_head, DM_TABLE_INACTIVE);
191
192         dm_table_head_destroy(&dmv->table_head);
193
194         minor = dkunit(dmv->devt);
195         disk_destroy(dmv->diskp);
196         devstat_remove_entry(&dmv->stats);
197
198         release_dev(dmv->devt);
199         devfs_clone_bitmap_put(&dm_minor_bitmap, minor);
200
201         /* Destroy device */
202         (void)dm_dev_free(dmv);
203
204         /* Decrement device counter After removing device */
205         --dm_dev_counter; /* XXX: was atomic 64 */
206
207         return 0;
208 }
209
210 static void
211 dm_doinit(void)
212 {
213         dm_target_init();
214         dm_dev_init();
215         dm_pdev_init();
216         dmcdev = make_dev(&dm_ops, 0, UID_ROOT, GID_OPERATOR, 0640, "mapper/control");
217 }
218
219 /* Destroy routine */
220 static int
221 dmdestroy(void)
222 {
223         destroy_dev(dmcdev);
224
225         dm_dev_destroy();
226         dm_pdev_destroy();
227         dm_target_destroy();
228
229         return 0;
230 }
231
232 static int
233 dmopen(struct dev_open_args *ap)
234 {
235         cdev_t dev = ap->a_head.a_dev;
236         dm_dev_t *dmv;
237
238         /* Shortcut for the control device */
239         if (minor(dev) == 0)
240                 return 0;
241
242         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL)
243                 return ENXIO;
244
245         dmv->is_open = 1;
246         dm_dev_unbusy(dmv);
247
248         aprint_debug("dm open routine called %" PRIu32 "\n",
249             minor(ap->a_head.a_dev));
250         return 0;
251 }
252
253 static int
254 dmclose(struct dev_close_args *ap)
255 {
256         cdev_t dev = ap->a_head.a_dev;
257         dm_dev_t *dmv;
258
259         /* Shortcut for the control device */
260         if (minor(dev) == 0)
261                 return 0;
262
263         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL)
264                 return ENXIO;
265
266         dmv->is_open = 0;
267         dm_dev_unbusy(dmv);
268
269         aprint_debug("dm close routine called %" PRIu32 "\n",
270             minor(ap->a_head.a_dev));
271         return 0;
272 }
273
274
275 static int
276 dmioctl(struct dev_ioctl_args *ap)
277 {
278         cdev_t dev = ap->a_head.a_dev;
279         u_long cmd = ap->a_cmd;
280         void *data = ap->a_data;
281
282         int r, err;
283         prop_dictionary_t dm_dict_in;
284
285         err = r = 0;
286
287         aprint_debug("dmioctl called\n");
288
289         KKASSERT(data != NULL);
290
291         if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) {
292                 struct plistref *pref = (struct plistref *) data;
293
294                 /* Check if we were called with NETBSD_DM_IOCTL ioctl
295                    otherwise quit. */
296                 if ((r = dm_ioctl_switch(cmd)) != 0)
297                         return r;
298
299                 if((r = prop_dictionary_copyin_ioctl(pref, cmd, &dm_dict_in)) != 0)
300                         return r;
301
302                 if ((r = dm_check_version(dm_dict_in)) != 0)
303                         goto cleanup_exit;
304
305                 /* run ioctl routine */
306                 if ((err = dm_cmd_to_fun(dm_dict_in)) != 0)
307                         goto cleanup_exit;
308
309 cleanup_exit:
310                 r = prop_dictionary_copyout_ioctl(pref, cmd, dm_dict_in);
311                 prop_object_release(dm_dict_in);
312         }
313
314         /*
315          * Return the error of the actual command if one one has
316          * happened. Otherwise return 'r' which indicates errors
317          * that occurred during helper operations.
318          */
319         return (err != 0)?err:r;
320 }
321
322 /*
323  * Translate command sent from libdevmapper to func.
324  */
325 static int
326 dm_cmd_to_fun(prop_dictionary_t dm_dict){
327         int i, r;
328         prop_string_t command;
329
330         r = 0;
331
332         if ((command = prop_dictionary_get(dm_dict, DM_IOCTL_COMMAND)) == NULL)
333                 return EINVAL;
334
335         for(i = 0; cmd_fn[i].cmd != NULL; i++)
336                 if (prop_string_equals_cstring(command, cmd_fn[i].cmd))
337                         break;
338
339         if (cmd_fn[i].cmd == NULL)
340                 return EINVAL;
341
342         aprint_debug("ioctl %s called\n", cmd_fn[i].cmd);
343         r = cmd_fn[i].fn(dm_dict);
344
345         return r;
346 }
347
348 /* Call apropriate ioctl handler function. */
349 static int
350 dm_ioctl_switch(u_long cmd)
351 {
352
353         switch(cmd) {
354
355         case NETBSD_DM_IOCTL:
356                 aprint_debug("dm NetBSD_DM_IOCTL called\n");
357                 break;
358         default:
359                  aprint_debug("dm unknown ioctl called\n");
360                  return ENOTTY;
361                  break; /* NOT REACHED */
362         }
363
364          return 0;
365 }
366
367  /*
368   * Check for disk specific ioctls.
369   */
370
371 static int
372 disk_ioctl_switch(cdev_t dev, u_long cmd, void *data)
373 {
374         dm_dev_t *dmv;
375
376         /* disk ioctls make sense only on block devices */
377         if (minor(dev) == 0)
378                 return ENOTTY;
379
380         switch(cmd) {
381         case DIOCGPART:
382         {
383                 struct partinfo *dpart;
384                 u_int64_t size;
385                 dpart = (void *)data;
386                 bzero(dpart, sizeof(*dpart));
387
388                 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL)
389                         return ENODEV;
390                 if (dmv->diskp->d_info.d_media_blksize == 0) {
391                         dm_dev_unbusy(dmv);
392                         return ENOTSUP;
393                 } else {
394                         size = dm_table_size(&dmv->table_head);
395                         dpart->media_offset  = 0;
396                         dpart->media_size    = size * DEV_BSIZE;
397                         dpart->media_blocks  = size;
398                         dpart->media_blksize = DEV_BSIZE;
399                         dpart->fstype = FS_BSDFFS;
400                 }
401                 dm_dev_unbusy(dmv);
402                 break;
403         }
404
405         default:
406                 aprint_debug("unknown disk_ioctl called\n");
407                 return ENOTTY;
408                 break; /* NOT REACHED */
409         }
410
411         return 0;
412 }
413
414 /*
415  * Do all IO operations on dm logical devices.
416  */
417 static int
418 dmstrategy(struct dev_strategy_args *ap)
419 {
420         cdev_t dev = ap->a_head.a_dev;
421         struct bio *bio = ap->a_bio;
422         struct buf *bp = bio->bio_buf;
423         int bypass;
424
425         dm_dev_t *dmv;
426         dm_table_t  *tbl;
427         dm_table_entry_t *table_en;
428         struct buf *nestbuf;
429
430         uint32_t dev_type;
431
432         uint64_t buf_start, buf_len, issued_len;
433         uint64_t table_start, table_end;
434         uint64_t start, end;
435
436         buf_start = bio->bio_offset;
437         buf_len = bp->b_bcount;
438
439         tbl = NULL;
440
441         table_end = 0;
442         dev_type = 0;
443         issued_len = 0;
444
445         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) {
446                 bp->b_error = EIO;
447                 bp->b_resid = bp->b_bcount;
448                 biodone(bio);
449                 return 0;
450         }
451
452         switch(bp->b_cmd) {
453         case BUF_CMD_READ:
454         case BUF_CMD_WRITE:
455         case BUF_CMD_FREEBLKS:
456                 bypass = 0;
457                 break;
458         case BUF_CMD_FLUSH:
459                 bypass = 1;
460                 KKASSERT(buf_len == 0);
461                 break;
462         default:
463                 dm_dev_unbusy(dmv);
464                 bp->b_error = EIO;
465                 bp->b_resid = bp->b_bcount;
466                 biodone(bio);
467                 return 0;
468         }
469
470         if (bypass == 0 &&
471             bounds_check_with_mediasize(bio, DEV_BSIZE,
472                                         dm_table_size(&dmv->table_head)) <= 0) {
473                 dm_dev_unbusy(dmv);
474                 bp->b_resid = bp->b_bcount;
475                 biodone(bio);
476                 return 0;
477         }
478
479         /* Select active table */
480         tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);
481
482         nestiobuf_init(bio);
483         devstat_start_transaction(&dmv->stats);
484
485         /*
486          * Find out what tables I want to select.
487          */
488         SLIST_FOREACH(table_en, tbl, next) {
489                 /*
490                  * I need need number of bytes not blocks.
491                  */
492                 table_start = table_en->start * DEV_BSIZE;
493                 table_end = table_start + (table_en->length) * DEV_BSIZE;
494
495                 /*
496                  * Calculate the start and end
497                  */
498                 start = MAX(table_start, buf_start);
499                 end = MIN(table_end, buf_start + buf_len);
500
501                 aprint_debug("----------------------------------------\n");
502                 aprint_debug("table_start %010" PRIu64", table_end %010"
503                     PRIu64 "\n", table_start, table_end);
504                 aprint_debug("buf_start %010" PRIu64", buf_len %010"
505                     PRIu64"\n", buf_start, buf_len);
506                 aprint_debug("start-buf_start %010"PRIu64", end %010"
507                     PRIu64"\n", start - buf_start, end);
508                 aprint_debug("start %010" PRIu64" , end %010"
509                     PRIu64"\n", start, end);
510                 aprint_debug("\n----------------------------------------\n");
511
512                 if (bypass) {
513                         nestbuf = getpbuf(NULL);
514                         nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;
515
516                         nestiobuf_add(bio, nestbuf, 0, 0, &dmv->stats);
517                         nestbuf->b_bio1.bio_offset = 0;
518                         table_en->target->strategy(table_en, nestbuf);
519                 } else if (start < end) {
520                         nestbuf = getpbuf(NULL);
521                         nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;
522
523                         nestiobuf_add(bio, nestbuf,
524                                       start - buf_start, (end - start),
525                                       &dmv->stats);
526                         issued_len += end - start;
527
528                         nestbuf->b_bio1.bio_offset = (start - table_start);
529                         table_en->target->strategy(table_en, nestbuf);
530                 }
531         }
532
533         if (issued_len < buf_len)
534                 nestiobuf_error(bio, EINVAL);
535         nestiobuf_start(bio);
536         dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE);
537         dm_dev_unbusy(dmv);
538
539         return 0;
540 }
541
542 static int
543 dmdump(struct dev_dump_args *ap)
544 {
545         cdev_t dev = ap->a_head.a_dev;
546         dm_dev_t *dmv;
547         dm_table_t  *tbl;
548         dm_table_entry_t *table_en;
549         uint32_t dev_type;
550         uint64_t buf_start, buf_len, issued_len;
551         uint64_t table_start, table_end;
552         uint64_t start, end, data_offset;
553         off_t offset;
554         size_t length;
555         int error = 0;
556
557         buf_start = ap->a_offset;
558         buf_len = ap->a_length;
559
560         tbl = NULL;
561
562         table_end = 0;
563         dev_type = 0;
564         issued_len = 0;
565
566         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) {
567                 return EIO;
568         }
569
570         /* Select active table */
571         tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);
572
573
574         /*
575          * Find out what tables I want to select.
576          */
577         SLIST_FOREACH(table_en, tbl, next) {
578                 /*
579                  * I need need number of bytes not blocks.
580                  */
581                 table_start = table_en->start * DEV_BSIZE;
582                 table_end = table_start + (table_en->length) * DEV_BSIZE;
583
584                 /*
585                  * Calculate the start and end
586                  */
587                 start = MAX(table_start, buf_start);
588                 end = MIN(table_end, buf_start + buf_len);
589
590                 if (ap->a_length == 0) {
591                         if (table_en->target->dump == NULL) {
592                                 error = ENXIO;
593                                 goto out;
594                         }
595
596                         table_en->target->dump(table_en, NULL, 0, 0);
597                 } else if (start < end) {
598                         data_offset = start - buf_start;
599                         offset = start - table_start;
600                         length = end - start;
601
602                         if (table_en->target->dump == NULL) {
603                                 error = ENXIO;
604                                 goto out;
605                         }
606
607                         table_en->target->dump(table_en,
608                             (char *)ap->a_virtual + data_offset,
609                             length, offset);
610
611                         issued_len += end - start;
612                 }
613         }
614
615         if (issued_len < buf_len)
616                 error = EINVAL;
617
618 out:
619         dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE);
620         dm_dev_unbusy(dmv);
621
622         return error;
623 }
624
625 static int
626 dmsize(struct dev_psize_args *ap)
627 {
628         cdev_t dev = ap->a_head.a_dev;
629         dm_dev_t *dmv;
630         uint64_t size;
631
632         size = 0;
633
634         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL)
635                         return ENOENT;
636
637         size = dm_table_size(&dmv->table_head);
638         dm_dev_unbusy(dmv);
639
640         ap->a_result = (int64_t)size;
641
642         return 0;
643 }
644
645 #if 0
646 static void
647 dmminphys(struct buf *bp)
648 {
649
650         bp->b_bcount = MIN(bp->b_bcount, MAXPHYS);
651 }
652 #endif
653
654 void
655 dmsetdiskinfo(struct disk *disk, dm_table_head_t *head)
656 {
657         struct disk_info info;
658         uint64_t dmp_size;
659
660         dmp_size = dm_table_size(head);
661
662         bzero(&info, sizeof(struct disk_info));
663         info.d_media_blksize = DEV_BSIZE;
664         info.d_media_blocks = dmp_size;
665 #if 0
666         /* this is set by disk_setdiskinfo */
667         info.d_media_size = dmp_size * DEV_BSIZE;
668 #endif
669         info.d_dsflags = DSO_MBRQUIET | DSO_DEVICEMAPPER;
670
671         info.d_secpertrack = 32;
672         info.d_nheads = 64;
673         info.d_secpercyl = info.d_secpertrack * info.d_nheads;
674         info.d_ncylinders = dmp_size / info.d_secpercyl;
675
676         disk_setdiskinfo(disk, &info);
677 }
678
679 TUNABLE_INT("debug.dm_debug", &dm_debug_level);
680 SYSCTL_INT(_debug, OID_AUTO, dm_debug, CTLFLAG_RW, &dm_debug_level,
681                0, "Eanble device mapper debugging");
682