Merge branch 'vendor/OPENSSH'
[dragonfly.git] / sys / dev / disk / md / md.c
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  * $FreeBSD: src/sys/dev/md/md.c,v 1.8.2.2 2002/08/19 17:43:34 jdp Exp $
10  * $DragonFly: src/sys/dev/disk/md/md.c,v 1.20 2008/09/07 08:09:39 swildner Exp $
11  *
12  */
13
14 #include "opt_md.h"             /* We have adopted some tasks from MFS */
15
16 #include <sys/param.h>
17 #include <sys/systm.h>
18 #include <sys/buf.h>
19 #include <sys/conf.h>
20 #include <sys/devicestat.h>
21 #include <sys/disk.h>
22 #include <sys/kernel.h>
23 #include <sys/malloc.h>
24 #include <sys/sysctl.h>
25 #include <sys/linker.h>
26 #include <sys/proc.h>
27 #include <sys/buf2.h>
28 #include <sys/thread2.h>
29 #include <sys/queue.h>
30
31 #ifndef MD_NSECT
32 #define MD_NSECT (10000 * 2)
33 #endif
34
35 MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
36 MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
37
38 static int md_debug;
39 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
40
41 #if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
42 /* Image gets put here: */
43 static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
44 static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
45 #endif
46
47 static int mdrootready;
48
49 #define CDEV_MAJOR      95
50
51 static d_strategy_t mdstrategy;
52 static d_strategy_t mdstrategy_preload;
53 static d_strategy_t mdstrategy_malloc;
54 static d_open_t mdopen;
55 static d_close_t mdclose;
56 static d_ioctl_t mdioctl;
57
58 static struct dev_ops md_ops = {
59         { "md", CDEV_MAJOR, D_DISK | D_CANFREE | D_MEMDISK | D_TRACKCLOSE},
60         .d_open =       mdopen,
61         .d_close =      mdclose,
62         .d_read =       physread,
63         .d_write =      physwrite,
64         .d_ioctl =      mdioctl,
65         .d_strategy =   mdstrategy,
66 };
67
68 struct md_s {
69         int unit;
70         struct devstat stats;
71         struct bio_queue_head bio_queue;
72         struct disk disk;
73         cdev_t dev;
74         int busy;
75         enum {                  /* Memory disk type */
76                 MD_MALLOC,
77                 MD_PRELOAD
78         } type;
79         unsigned nsect;
80
81         /* MD_MALLOC related fields */
82         unsigned nsecp;
83         u_char **secp;
84
85         /* MD_PRELOAD related fields */
86         u_char *pl_ptr;
87         unsigned pl_len;
88         TAILQ_ENTRY(md_s) link;
89 };
90 TAILQ_HEAD(mdshead, md_s) mdlist = TAILQ_HEAD_INITIALIZER(mdlist);
91
92 static int mdunits;
93 static int refcnt;
94
95 static struct md_s *mdcreate(unsigned);
96 static void mdcreate_malloc(void);
97 static int mdinit(module_t, int, void *);
98 static void md_drvinit(void *);
99 static int md_drvcleanup(void);
100
101 static int
102 mdinit(module_t mod, int cmd, void *arg)
103 {
104     int ret = 0;
105
106     switch(cmd) {
107         case MOD_LOAD:
108                 TAILQ_INIT(&mdlist);
109                 md_drvinit(NULL);
110                 break;
111         case MOD_UNLOAD:
112                 ret = md_drvcleanup();
113                 break;
114         default:
115                 ret = EINVAL;
116                 break;
117     }
118
119     return (ret);
120 }
121
122 static int
123 mdopen(struct dev_open_args *ap)
124 {
125         cdev_t dev = ap->a_head.a_dev;
126         struct md_s *sc;
127
128         if (md_debug)
129                 kprintf("mdopen(%s %x %x)\n",
130                         devtoname(dev), ap->a_oflags, ap->a_devtype);
131
132         sc = dev->si_drv1;
133         if (sc->unit + 1 == mdunits)
134                 mdcreate_malloc();
135
136         atomic_add_int(&refcnt, 1);
137         return (0);
138 }
139
140 static int
141 mdclose(struct dev_close_args *ap)
142 {
143         cdev_t dev = ap->a_head.a_dev;
144         struct md_s *sc;
145
146         if (md_debug)
147                 kprintf("mdclose(%s %x %x)\n",
148                         devtoname(dev), ap->a_fflag, ap->a_devtype);
149         sc = dev->si_drv1;
150         atomic_add_int(&refcnt, -1);
151
152         return (0);
153 }
154
155 static int
156 mdioctl(struct dev_ioctl_args *ap)
157 {
158         cdev_t dev = ap->a_head.a_dev;
159
160         if (md_debug)
161                 kprintf("mdioctl(%s %lx %p %x)\n",
162                         devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag);
163
164         return (ENOIOCTL);
165 }
166
167 static int
168 mdstrategy(struct dev_strategy_args *ap)
169 {
170         cdev_t dev = ap->a_head.a_dev;
171         struct bio *bio = ap->a_bio;
172         struct buf *bp = bio->bio_buf;
173         struct md_s *sc;
174
175         if (md_debug > 1) {
176                 kprintf("mdstrategy(%p) %s %08x, %lld, %d, %p)\n",
177                     bp, devtoname(dev), bp->b_flags,
178                     (long long)bio->bio_offset,
179                     bp->b_bcount, bp->b_data);
180         }
181         bio->bio_driver_info = dev;
182         sc = dev->si_drv1;
183         if (sc->type == MD_MALLOC) {
184                 mdstrategy_malloc(ap);
185         } else {
186                 mdstrategy_preload(ap);
187         }
188         return(0);
189 }
190
191
192 static int
193 mdstrategy_malloc(struct dev_strategy_args *ap)
194 {
195         cdev_t dev = ap->a_head.a_dev;
196         struct bio *bio = ap->a_bio;
197         struct buf *bp = bio->bio_buf;
198         unsigned secno, nsec, secval, uc;
199         u_char *secp, **secpp, *dst;
200         struct md_s *sc;
201         int i;
202
203         if (md_debug > 1)
204                 kprintf("mdstrategy_malloc(%p) %s %08xx, %lld, %d, %p)\n",
205                     bp, devtoname(dev), bp->b_flags,
206                     (long long)bio->bio_offset,
207                     bp->b_bcount, bp->b_data);
208
209         sc = dev->si_drv1;
210
211         crit_enter();
212
213         bioqdisksort(&sc->bio_queue, bio);
214
215         if (sc->busy) {
216                 crit_exit();
217                 return(0);
218         }
219
220         sc->busy++;
221         
222         while (1) {
223                 bio = bioq_first(&sc->bio_queue);
224                 if (bio == NULL) {
225                         crit_exit();
226                         break;
227                 }
228                 crit_exit();
229                 bioq_remove(&sc->bio_queue, bio);
230                 bp = bio->bio_buf;
231
232                 devstat_start_transaction(&sc->stats);
233
234                 switch (bp->b_cmd) {
235                 case BUF_CMD_FREEBLKS:
236                 case BUF_CMD_READ:
237                 case BUF_CMD_WRITE:
238                         break;
239                 default:
240                         panic("md: bad b_cmd %d", bp->b_cmd);
241                 }
242
243                 nsec = bp->b_bcount >> DEV_BSHIFT;
244                 secno = (unsigned)(bio->bio_offset >> DEV_BSHIFT);
245                 dst = bp->b_data;
246                 while (nsec--) {
247                         if (secno < sc->nsecp) {
248                                 secpp = &sc->secp[secno];
249                                 if ((u_int)(uintptr_t)*secpp > 255) {
250                                         secp = *secpp;
251                                         secval = 0;
252                                 } else {
253                                         secp = 0;
254                                         secval = (u_int)(uintptr_t)*secpp;
255                                 }
256                         } else {
257                                 secpp = 0;
258                                 secp = 0;
259                                 secval = 0;
260                         }
261                         if (md_debug > 2)
262                                 kprintf("%08x %p %p %d\n", bp->b_flags, secpp, secp, secval);
263
264                         switch (bp->b_cmd) {
265                         case BUF_CMD_FREEBLKS:
266                                 if (secpp) {
267                                         if (secp)
268                                                 FREE(secp, M_MDSECT);
269                                         *secpp = 0;
270                                 }
271                                 break;
272                         case BUF_CMD_READ:
273                                 if (secp) {
274                                         bcopy(secp, dst, DEV_BSIZE);
275                                 } else if (secval) {
276                                         for (i = 0; i < DEV_BSIZE; i++)
277                                                 dst[i] = secval;
278                                 } else {
279                                         bzero(dst, DEV_BSIZE);
280                                 }
281                                 break;
282                         case BUF_CMD_WRITE:
283                                 uc = dst[0];
284                                 for (i = 1; i < DEV_BSIZE; i++) 
285                                         if (dst[i] != uc)
286                                                 break;
287                                 if (i == DEV_BSIZE && !uc) {
288                                         if (secp)
289                                                 FREE(secp, M_MDSECT);
290                                         if (secpp)
291                                                 *secpp = (u_char *)(uintptr_t)uc;
292                                 } else {
293                                         if (!secpp) {
294                                                 MALLOC(secpp, u_char **, (secno + nsec + 1) * sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
295                                                 bcopy(sc->secp, secpp, sc->nsecp * sizeof(u_char *));
296                                                 FREE(sc->secp, M_MD);
297                                                 sc->secp = secpp;
298                                                 sc->nsecp = secno + nsec + 1;
299                                                 secpp = &sc->secp[secno];
300                                         }
301                                         if (i == DEV_BSIZE) {
302                                                 if (secp)
303                                                         FREE(secp, M_MDSECT);
304                                                 *secpp = (u_char *)(uintptr_t)uc;
305                                         } else {
306                                                 if (!secp) 
307                                                         MALLOC(secp, u_char *, DEV_BSIZE, M_MDSECT, M_WAITOK);
308                                                 bcopy(dst, secp, DEV_BSIZE);
309
310                                                 *secpp = secp;
311                                         }
312                                 }
313                                 break;
314                         default:
315                                 panic("md: bad b_cmd %d", bp->b_cmd);
316
317                         }
318                         secno++;
319                         dst += DEV_BSIZE;
320                 }
321                 bp->b_resid = 0;
322                 devstat_end_transaction_buf(&sc->stats, bp);
323                 biodone(bio);
324                 crit_enter();
325         }
326         sc->busy = 0;
327         return(0);
328 }
329
330
331 static int
332 mdstrategy_preload(struct dev_strategy_args *ap)
333 {
334         cdev_t dev = ap->a_head.a_dev;
335         struct bio *bio = ap->a_bio;
336         struct buf *bp = bio->bio_buf;
337         struct md_s *sc;
338
339         if (md_debug > 1)
340                 kprintf("mdstrategy_preload(%p) %s %08x, %lld, %d, %p)\n",
341                     bp, devtoname(dev), bp->b_flags,
342                     (long long)bio->bio_offset,
343                     bp->b_bcount, bp->b_data);
344
345         sc = dev->si_drv1;
346
347         crit_enter();
348
349         bioqdisksort(&sc->bio_queue, bio);
350
351         if (sc->busy) {
352                 crit_exit();
353                 return(0);
354         }
355
356         sc->busy++;
357         
358         while (1) {
359                 bio = bioq_first(&sc->bio_queue);
360                 if (bio)
361                         bioq_remove(&sc->bio_queue, bio);
362                 crit_exit();
363                 if (bio == NULL)
364                         break;
365
366                 devstat_start_transaction(&sc->stats);
367
368                 switch (bp->b_cmd) {
369                 case BUF_CMD_FREEBLKS:
370                         break;
371                 case BUF_CMD_READ:
372                         bcopy(sc->pl_ptr + bio->bio_offset, 
373                                bp->b_data, bp->b_bcount);
374                         break;
375                 case BUF_CMD_WRITE:
376                         bcopy(bp->b_data, sc->pl_ptr + bio->bio_offset,
377                               bp->b_bcount);
378                         break;
379                 default:
380                         panic("md: bad cmd %d\n", bp->b_cmd);
381                 }
382                 bp->b_resid = 0;
383                 devstat_end_transaction_buf(&sc->stats, bp);
384                 biodone(bio);
385                 crit_enter();
386         }
387         sc->busy = 0;
388         return(0);
389 }
390
391 static struct md_s *
392 mdcreate(unsigned length)
393 {
394         struct md_s *sc;
395         struct disk_info info;
396
397         MALLOC(sc, struct md_s *,sizeof(*sc), M_MD, M_WAITOK | M_ZERO);
398         sc->unit = mdunits++;
399         bioq_init(&sc->bio_queue);
400         devstat_add_entry(&sc->stats, "md", sc->unit, DEV_BSIZE,
401                 DEVSTAT_NO_ORDERED_TAGS, 
402                 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
403                 DEVSTAT_PRIORITY_OTHER);
404         sc->dev = disk_create(sc->unit, &sc->disk, &md_ops);
405         sc->dev->si_drv1 = sc;
406         sc->dev->si_iosize_max = DFLTPHYS;
407
408         bzero(&info, sizeof(info));
409         info.d_media_blksize = DEV_BSIZE;       /* mandatory */
410         info.d_media_blocks = length / DEV_BSIZE;
411
412         info.d_secpertrack = 1024;              /* optional */
413         info.d_nheads = 1;
414         info.d_secpercyl = info.d_secpertrack * info.d_nheads;
415         info.d_ncylinders = (u_int)(info.d_media_blocks / info.d_secpercyl);
416         disk_setdiskinfo(&sc->disk, &info);
417         TAILQ_INSERT_HEAD(&mdlist, sc, link);
418
419         return (sc);
420 }
421
422
423 static void
424 mdcreate_preload(u_char *image, unsigned length)
425 {
426         struct md_s *sc;
427
428         sc = mdcreate(length);
429         sc->type = MD_PRELOAD;
430         sc->nsect = length / DEV_BSIZE;
431         sc->pl_ptr = image;
432         sc->pl_len = length;
433
434         if (sc->unit == 0) 
435                 mdrootready = 1;
436 }
437
438 static void
439 mdcreate_malloc(void)
440 {
441         struct md_s *sc;
442
443         sc = mdcreate(MD_NSECT*DEV_BSIZE);
444         sc->type = MD_MALLOC;
445
446         sc->nsect = MD_NSECT;   /* for now */
447         MALLOC(sc->secp, u_char **, sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
448         sc->nsecp = 1;
449         kprintf("md%d: Malloc disk\n", sc->unit);
450 }
451
452 static int
453 md_drvcleanup(void)
454 {
455
456         int secno;
457         struct md_s *sc, *sc_temp;
458
459         if (atomic_fetchadd_int(&refcnt, 0) != 0)
460                 return EBUSY;
461
462         /*
463          * Go through all the md devices, freeing up all the
464          * memory allocated for sectors, and the md_s struct
465          * itself.
466          */
467         TAILQ_FOREACH_MUTABLE(sc, &mdlist, link, sc_temp) {
468                 for (secno = 0; secno < sc->nsecp; secno++) {
469                         if ((u_int)(uintptr_t)sc->secp[secno] > 255)
470                                 FREE(sc->secp[secno], M_MDSECT);
471                 }
472
473                 if (sc->dev != NULL)
474                         disk_destroy(&sc->disk);
475
476                 devstat_remove_entry(&sc->stats);
477                 TAILQ_REMOVE(&mdlist, sc, link);
478
479                 FREE(sc->secp, M_MD);
480                 FREE(sc, M_MD);
481         }
482
483         return 0;
484
485 }
486
487 static void
488 md_drvinit(void *unused)
489 {
490
491         caddr_t mod;
492         caddr_t c;
493         u_char *ptr, *name, *type;
494         unsigned len;
495
496 #ifdef MD_ROOT_SIZE
497         mdcreate_preload(mfs_root, MD_ROOT_SIZE*1024);
498 #endif
499         mod = NULL;
500         while ((mod = preload_search_next_name(mod)) != NULL) {
501                 name = (char *)preload_search_info(mod, MODINFO_NAME);
502                 type = (char *)preload_search_info(mod, MODINFO_TYPE);
503                 if (name == NULL)
504                         continue;
505                 if (type == NULL)
506                         continue;
507                 if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
508                         continue;
509                 c = preload_search_info(mod, MODINFO_ADDR);
510                 ptr = *(u_char **)c;
511                 c = preload_search_info(mod, MODINFO_SIZE);
512                 len = *(unsigned *)c;
513                 kprintf("md%d: Preloaded image <%s> %d bytes at %p\n",
514                    mdunits, name, len, ptr);
515                 mdcreate_preload(ptr, len);
516         } 
517         mdcreate_malloc();
518 }
519
520 DEV_MODULE(md, mdinit, NULL);
521
522 #ifdef MD_ROOT
523 static void
524 md_takeroot(void *junk)
525 {
526         if (mdrootready)
527                 rootdevnames[0] = "ufs:/dev/md0s0";
528 }
529
530 SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
531 #endif