dd637e608045e9f36421fd2afa3711bd4f6b8388
[dragonfly.git] / sys / dev / disk / dm / device-mapper.c
1 /*        $NetBSD: device-mapper.c,v 1.22 2010/03/26 15:46:04 jakllsch Exp $ */
2
3 /*
4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Adam Hamsik.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 /*
33  * I want to say thank you to all people who helped me with this project.
34  */
35
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/ctype.h>
39
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/device.h>
43 #include <sys/disk.h>
44 #include <sys/disklabel.h>
45 #include <sys/dtype.h>
46 #include <sys/ioccom.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/sysctl.h>
50 #include <dev/disk/dm/dm.h>
51
52 #include "netbsd-dm.h"
53
54 static  d_ioctl_t       dmioctl;
55 static  d_open_t        dmopen;
56 static  d_close_t       dmclose;
57 static  d_psize_t       dmsize;
58 static  d_strategy_t    dmstrategy;
59 static  d_dump_t        dmdump;
60
61 /* attach and detach routines */
62 void dmattach(int);
63 static int dm_modcmd(module_t mod, int cmd, void *unused);
64 static int dmdestroy(void);
65
66 static void dm_doinit(void);
67
68 static int dm_cmd_to_fun(prop_dictionary_t);
69 static int disk_ioctl_switch(cdev_t, u_long, void *);
70 static int dm_ioctl_switch(u_long);
71 #if 0
72 static void dmminphys(struct buf *);
73 #endif
74
75 /* ***Variable-definitions*** */
76 struct dev_ops dm_ops = {
77         { "dm", 0, D_DISK | D_MPSAFE },
78         .d_open         = dmopen,
79         .d_close        = dmclose,
80         .d_read         = physread,
81         .d_write        = physwrite,
82         .d_ioctl        = dmioctl,
83         .d_strategy     = dmstrategy,
84         .d_psize        = dmsize,
85         .d_dump         = dmdump,
86 /* D_DISK */
87 };
88
89 MALLOC_DEFINE(M_DM, "dm", "Device Mapper allocations");
90
91 int dm_debug_level = 0;
92
93 extern uint64_t dm_dev_counter;
94
95 static cdev_t dmcdev;
96
97 static moduledata_t dm_mod = {
98     "dm",
99     dm_modcmd,
100     NULL
101 };
102 DECLARE_MODULE(dm, dm_mod, SI_SUB_RAID, SI_ORDER_ANY);
103 MODULE_VERSION(dm, 1);
104
105 /*
106  * This array is used to translate cmd to function pointer.
107  *
108  * Interface between libdevmapper and lvm2tools uses different
109  * names for one IOCTL call because libdevmapper do another thing
110  * then. When I run "info" or "mknodes" libdevmapper will send same
111  * ioctl to kernel but will do another things in userspace.
112  *
113  */
114 static struct cmd_function cmd_fn[] = {
115                 { .cmd = "version", .fn = dm_get_version_ioctl},
116                 { .cmd = "targets", .fn = dm_list_versions_ioctl},
117                 { .cmd = "create",  .fn = dm_dev_create_ioctl},
118                 { .cmd = "info",    .fn = dm_dev_status_ioctl},
119                 { .cmd = "mknodes", .fn = dm_dev_status_ioctl},
120                 { .cmd = "names",   .fn = dm_dev_list_ioctl},
121                 { .cmd = "suspend", .fn = dm_dev_suspend_ioctl},
122                 { .cmd = "remove",  .fn = dm_dev_remove_ioctl},
123                 { .cmd = "remove_all", .fn = dm_dev_remove_all_ioctl},
124                 { .cmd = "rename",  .fn = dm_dev_rename_ioctl},
125                 { .cmd = "resume",  .fn = dm_dev_resume_ioctl},
126                 { .cmd = "clear",   .fn = dm_table_clear_ioctl},
127                 { .cmd = "deps",    .fn = dm_table_deps_ioctl},
128                 { .cmd = "reload",  .fn = dm_table_load_ioctl},
129                 { .cmd = "status",  .fn = dm_table_status_ioctl},
130                 { .cmd = "table",   .fn = dm_table_status_ioctl},
131                 { .cmd = "message", .fn = dm_message_ioctl},
132                 {NULL, NULL}
133 };
134
135 /* New module handle routine */
136 static int
137 dm_modcmd(module_t mod, int cmd, void *unused)
138 {
139         int error, bmajor, cmajor;
140
141         error = 0;
142         bmajor = -1;
143         cmajor = -1;
144
145         switch (cmd) {
146         case MOD_LOAD:
147                 dm_doinit();
148                 kprintf("Device Mapper version %d.%d.%d loaded\n",
149                     DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL);
150                 break;
151
152         case MOD_UNLOAD:
153                 /*
154                  * Disable unloading of dm module if there are any devices
155                  * defined in driver. This is probably too strong we need
156                  * to disable auto-unload only if there is mounted dm device
157                  * present.
158                  */
159                 if (dm_dev_counter > 0)
160                         return EBUSY;
161
162                 error = dmdestroy();
163                 if (error)
164                         break;
165                 kprintf("Device Mapper unloaded\n");
166                 break;
167
168         default:
169                 break;
170         }
171
172         return error;
173 }
174
175 static void
176 dm_doinit(void)
177 {
178         dm_target_init();
179         dm_dev_init();
180         dm_pdev_init();
181         dmcdev = make_dev(&dm_ops, 0, UID_ROOT, GID_OPERATOR, 0640, "mapper/control");
182 }
183
184 /* Destroy routine */
185 static int
186 dmdestroy(void)
187 {
188         destroy_dev(dmcdev);
189
190         dm_dev_uninit();
191         dm_pdev_uninit();
192         dm_target_uninit();
193
194         return 0;
195 }
196
197 static int
198 dmopen(struct dev_open_args *ap)
199 {
200         cdev_t dev = ap->a_head.a_dev;
201         dm_dev_t *dmv;
202
203         /* Shortcut for the control device */
204         if (minor(dev) == 0)
205                 return 0;
206
207         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL)
208                 return ENXIO;
209
210         dmv->is_open = 1;
211         dm_dev_unbusy(dmv);
212
213         aprint_debug("dm open routine called %" PRIu32 "\n",
214             minor(ap->a_head.a_dev));
215         return 0;
216 }
217
218 static int
219 dmclose(struct dev_close_args *ap)
220 {
221         cdev_t dev = ap->a_head.a_dev;
222         dm_dev_t *dmv;
223
224         /* Shortcut for the control device */
225         if (minor(dev) == 0)
226                 return 0;
227
228         if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL)
229                 return ENXIO;
230
231         dmv->is_open = 0;
232         dm_dev_unbusy(dmv);
233
234         aprint_debug("dm close routine called %" PRIu32 "\n",
235             minor(ap->a_head.a_dev));
236         return 0;
237 }
238
239
240 static int
241 dmioctl(struct dev_ioctl_args *ap)
242 {
243         cdev_t dev = ap->a_head.a_dev;
244         u_long cmd = ap->a_cmd;
245         void *data = ap->a_data;
246
247         int r, err;
248         prop_dictionary_t dm_dict_in;
249
250         err = r = 0;
251
252         aprint_debug("dmioctl called\n");
253
254         KKASSERT(data != NULL);
255
256         if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) {
257                 struct plistref *pref = (struct plistref *) data;
258
259                 /* Check if we were called with NETBSD_DM_IOCTL ioctl
260                    otherwise quit. */
261                 if ((r = dm_ioctl_switch(cmd)) != 0)
262                         return r;
263
264                 if((r = prop_dictionary_copyin_ioctl(pref, cmd, &dm_dict_in)) != 0)
265                         return r;
266
267                 if ((r = dm_check_version(dm_dict_in)) != 0)
268                         goto cleanup_exit;
269
270                 /* run ioctl routine */
271                 if ((err = dm_cmd_to_fun(dm_dict_in)) != 0)
272                         goto cleanup_exit;
273
274 cleanup_exit:
275                 r = prop_dictionary_copyout_ioctl(pref, cmd, dm_dict_in);
276                 prop_object_release(dm_dict_in);
277         }
278
279         /*
280          * Return the error of the actual command if one one has
281          * happened. Otherwise return 'r' which indicates errors
282          * that occurred during helper operations.
283          */
284         return (err != 0)?err:r;
285 }
286
287 /*
288  * Translate command sent from libdevmapper to func.
289  */
290 static int
291 dm_cmd_to_fun(prop_dictionary_t dm_dict){
292         int i, r;
293         prop_string_t command;
294
295         r = 0;
296
297         if ((command = prop_dictionary_get(dm_dict, DM_IOCTL_COMMAND)) == NULL)
298                 return EINVAL;
299
300         for(i = 0; cmd_fn[i].cmd != NULL; i++)
301                 if (prop_string_equals_cstring(command, cmd_fn[i].cmd))
302                         break;
303
304         if (cmd_fn[i].cmd == NULL)
305                 return EINVAL;
306
307         aprint_debug("ioctl %s called\n", cmd_fn[i].cmd);
308         r = cmd_fn[i].fn(dm_dict);
309
310         return r;
311 }
312
313 /* Call apropriate ioctl handler function. */
314 static int
315 dm_ioctl_switch(u_long cmd)
316 {
317
318         switch(cmd) {
319
320         case NETBSD_DM_IOCTL:
321                 aprint_debug("dm NetBSD_DM_IOCTL called\n");
322                 break;
323         default:
324                  aprint_debug("dm unknown ioctl called\n");
325                  return ENOTTY;
326                  break; /* NOT REACHED */
327         }
328
329          return 0;
330 }
331
332  /*
333   * Check for disk specific ioctls.
334   */
335
336 static int
337 disk_ioctl_switch(cdev_t dev, u_long cmd, void *data)
338 {
339         dm_dev_t *dmv;
340
341         /* disk ioctls make sense only on block devices */
342         if (minor(dev) == 0)
343                 return ENOTTY;
344
345         switch(cmd) {
346         case DIOCGPART:
347         {
348                 struct partinfo *dpart;
349                 u_int64_t size;
350                 dpart = (void *)data;
351                 bzero(dpart, sizeof(*dpart));
352
353                 if ((dmv = dev->si_drv1) == NULL)
354                         return ENODEV;
355                 if (dmv->diskp->d_info.d_media_blksize == 0) {
356                         return ENOTSUP;
357                 } else {
358                         size = dm_table_size(&dmv->table_head);
359                         dpart->media_offset  = 0;
360                         dpart->media_size    = size * DEV_BSIZE;
361                         dpart->media_blocks  = size;
362                         dpart->media_blksize = DEV_BSIZE;
363                         dpart->fstype = FS_BSDFFS;
364                 }
365                 break;
366         }
367
368         default:
369                 aprint_debug("unknown disk_ioctl called\n");
370                 return ENOTTY;
371                 break; /* NOT REACHED */
372         }
373
374         return 0;
375 }
376
377 /*
378  * Do all IO operations on dm logical devices.
379  */
380 static int
381 dmstrategy(struct dev_strategy_args *ap)
382 {
383         cdev_t dev = ap->a_head.a_dev;
384         struct bio *bio = ap->a_bio;
385         struct buf *bp = bio->bio_buf;
386         int bypass;
387
388         dm_dev_t *dmv;
389         dm_table_t  *tbl;
390         dm_table_entry_t *table_en;
391         struct buf *nestbuf;
392
393         uint32_t dev_type;
394
395         uint64_t buf_start, buf_len, issued_len;
396         uint64_t table_start, table_end;
397         uint64_t start, end;
398
399         buf_start = bio->bio_offset;
400         buf_len = bp->b_bcount;
401
402         tbl = NULL;
403
404         table_end = 0;
405         dev_type = 0;
406         issued_len = 0;
407
408         dmv = dev->si_drv1;
409
410         switch(bp->b_cmd) {
411         case BUF_CMD_READ:
412         case BUF_CMD_WRITE:
413         case BUF_CMD_FREEBLKS:
414                 bypass = 0;
415                 break;
416         case BUF_CMD_FLUSH:
417                 bypass = 1;
418                 KKASSERT(buf_len == 0);
419                 break;
420         default:
421                 bp->b_error = EIO;
422                 bp->b_resid = bp->b_bcount;
423                 biodone(bio);
424                 return 0;
425         }
426
427         if (bypass == 0 &&
428             bounds_check_with_mediasize(bio, DEV_BSIZE,
429                                         dm_table_size(&dmv->table_head)) <= 0) {
430                 bp->b_resid = bp->b_bcount;
431                 biodone(bio);
432                 return 0;
433         }
434
435         /* Select active table */
436         tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);
437
438         nestiobuf_init(bio);
439         devstat_start_transaction(&dmv->stats);
440
441         /*
442          * Find out what tables I want to select.
443          */
444         SLIST_FOREACH(table_en, tbl, next) {
445                 /*
446                  * I need need number of bytes not blocks.
447                  */
448                 table_start = table_en->start * DEV_BSIZE;
449                 table_end = table_start + (table_en->length) * DEV_BSIZE;
450
451                 /*
452                  * Calculate the start and end
453                  */
454                 start = MAX(table_start, buf_start);
455                 end = MIN(table_end, buf_start + buf_len);
456
457                 aprint_debug("----------------------------------------\n");
458                 aprint_debug("table_start %010" PRIu64", table_end %010"
459                     PRIu64 "\n", table_start, table_end);
460                 aprint_debug("buf_start %010" PRIu64", buf_len %010"
461                     PRIu64"\n", buf_start, buf_len);
462                 aprint_debug("start-buf_start %010"PRIu64", end %010"
463                     PRIu64"\n", start - buf_start, end);
464                 aprint_debug("start %010" PRIu64" , end %010"
465                     PRIu64"\n", start, end);
466                 aprint_debug("\n----------------------------------------\n");
467
468                 if (bypass) {
469                         nestbuf = getpbuf(NULL);
470                         nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;
471
472                         nestiobuf_add(bio, nestbuf, 0, 0, &dmv->stats);
473                         nestbuf->b_bio1.bio_offset = 0;
474                         table_en->target->strategy(table_en, nestbuf);
475                 } else if (start < end) {
476                         nestbuf = getpbuf(NULL);
477                         nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;
478
479                         nestiobuf_add(bio, nestbuf,
480                                       start - buf_start, (end - start),
481                                       &dmv->stats);
482                         issued_len += end - start;
483
484                         nestbuf->b_bio1.bio_offset = (start - table_start);
485                         table_en->target->strategy(table_en, nestbuf);
486                 }
487         }
488
489         if (issued_len < buf_len)
490                 nestiobuf_error(bio, EINVAL);
491         nestiobuf_start(bio);
492         dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE);
493
494         return 0;
495 }
496
497 static int
498 dmdump(struct dev_dump_args *ap)
499 {
500         cdev_t dev = ap->a_head.a_dev;
501         dm_dev_t *dmv;
502         dm_table_t  *tbl;
503         dm_table_entry_t *table_en;
504         uint32_t dev_type;
505         uint64_t buf_start, buf_len, issued_len;
506         uint64_t table_start, table_end;
507         uint64_t start, end, data_offset;
508         off_t offset;
509         size_t length;
510         int error = 0;
511
512         buf_start = ap->a_offset;
513         buf_len = ap->a_length;
514
515         tbl = NULL;
516
517         table_end = 0;
518         dev_type = 0;
519         issued_len = 0;
520
521         dmv = dev->si_drv1;
522
523         /* Select active table */
524         tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);
525
526
527         /*
528          * Find out what tables I want to select.
529          */
530         SLIST_FOREACH(table_en, tbl, next) {
531                 /*
532                  * I need need number of bytes not blocks.
533                  */
534                 table_start = table_en->start * DEV_BSIZE;
535                 table_end = table_start + (table_en->length) * DEV_BSIZE;
536
537                 /*
538                  * Calculate the start and end
539                  */
540                 start = MAX(table_start, buf_start);
541                 end = MIN(table_end, buf_start + buf_len);
542
543                 if (ap->a_length == 0) {
544                         if (table_en->target->dump == NULL) {
545                                 error = ENXIO;
546                                 goto out;
547                         }
548
549                         table_en->target->dump(table_en, NULL, 0, 0);
550                 } else if (start < end) {
551                         data_offset = start - buf_start;
552                         offset = start - table_start;
553                         length = end - start;
554
555                         if (table_en->target->dump == NULL) {
556                                 error = ENXIO;
557                                 goto out;
558                         }
559
560                         table_en->target->dump(table_en,
561                             (char *)ap->a_virtual + data_offset,
562                             length, offset);
563
564                         issued_len += end - start;
565                 }
566         }
567
568         if (issued_len < buf_len)
569                 error = EINVAL;
570
571 out:
572         dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE);
573
574         return error;
575 }
576
577 static int
578 dmsize(struct dev_psize_args *ap)
579 {
580         cdev_t dev = ap->a_head.a_dev;
581         dm_dev_t *dmv;
582         uint64_t size;
583
584         size = 0;
585
586         if ((dmv = dev->si_drv1) == NULL)
587                 return ENXIO;
588
589         size = dm_table_size(&dmv->table_head);
590         ap->a_result = (int64_t)size;
591
592         return 0;
593 }
594
595 #if 0
596 static void
597 dmminphys(struct buf *bp)
598 {
599
600         bp->b_bcount = MIN(bp->b_bcount, MAXPHYS);
601 }
602 #endif
603
604 void
605 dmsetdiskinfo(struct disk *disk, dm_table_head_t *head)
606 {
607         struct disk_info info;
608         uint64_t dmp_size;
609
610         dmp_size = dm_table_size(head);
611
612         bzero(&info, sizeof(struct disk_info));
613         info.d_media_blksize = DEV_BSIZE;
614         info.d_media_blocks = dmp_size;
615 #if 0
616         /* this is set by disk_setdiskinfo */
617         info.d_media_size = dmp_size * DEV_BSIZE;
618 #endif
619         info.d_dsflags = DSO_MBRQUIET | DSO_DEVICEMAPPER;
620
621         info.d_secpertrack = 32;
622         info.d_nheads = 64;
623         info.d_secpercyl = info.d_secpertrack * info.d_nheads;
624         info.d_ncylinders = dmp_size / info.d_secpercyl;
625
626         disk_setdiskinfo(disk, &info);
627 }
628
629 /*
630  * Transform char s to uint64_t offset number.
631  */
632 uint64_t
633 atoi64(const char *s)
634 {
635         uint64_t n;
636         n = 0;
637
638         while (*s != '\0') {
639                 if (!isdigit(*s))
640                         break;
641
642                 n = (10 * n) + (*s - '0');
643                 s++;
644         }
645
646         return n;
647 }
648
649 void
650 dm_builtin_init(void *arg)
651 {
652         modeventhand_t evh = (modeventhand_t)arg;
653
654         KKASSERT(evh != NULL);
655         evh(NULL, MOD_LOAD, NULL);
656 }
657
658 void
659 dm_builtin_uninit(void *arg)
660 {
661         modeventhand_t evh = (modeventhand_t)arg;
662
663         KKASSERT(evh != NULL);
664         evh(NULL, MOD_UNLOAD, NULL);
665 }
666
667 TUNABLE_INT("debug.dm_debug", &dm_debug_level);
668 SYSCTL_INT(_debug, OID_AUTO, dm_debug, CTLFLAG_RW, &dm_debug_level,
669                0, "Eanble device mapper debugging");
670