1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.7 2003/07/19 21:14:19 dillon Exp $ */
4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
7 * Copyright (c) 1995 Jason R. Thorpe.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project
22 * 4. The name of the author may not be used to endorse or promote products
23 * derived from this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
81 * "Concatenated" disk driver.
83 * Dynamic configuration and disklabel support by:
84 * Jason R. Thorpe <thorpej@nas.nasa.gov>
85 * Numerical Aerodynamic Simulation Facility
87 * NASA Ames Research Center
88 * Moffett Field, CA 94035
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
99 #include <sys/malloc.h>
100 #include <sys/namei.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <ufs/ffs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 #include <sys/buf2.h>
111 #include <sys/ccdvar.h>
113 #include <vm/vm_zone.h>
115 #if defined(CCDDEBUG) && !defined(DEBUG)
120 #define CCDB_FOLLOW 0x01
121 #define CCDB_INIT 0x02
123 #define CCDB_LABEL 0x08
124 #define CCDB_VNODE 0x10
125 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
127 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
131 #define ccdunit(x) dkunit(x)
132 #define ccdpart(x) dkpart(x)
135 This is how mirroring works (only writes are special):
137 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
138 linked together by the cb_mirror field. "cb_pflags &
139 CCDPF_MIRROR_DONE" is set to 0 on both of them.
141 When a component returns to ccdiodone(), it checks if "cb_pflags &
142 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
143 flag and returns. If it is, it means its partner has already
144 returned, so it will go to the regular cleanup.
149 struct buf cb_buf; /* new I/O buf */
150 struct buf *cb_obp; /* ptr. to original I/O buf */
151 struct ccdbuf *cb_freenext; /* free list link */
152 int cb_unit; /* target unit */
153 int cb_comp; /* target component */
154 int cb_pflags; /* mirror/parity status flag */
155 struct ccdbuf *cb_mirror; /* mirror counterpart */
158 /* bits in cb_pflags */
159 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
161 #define CCDLABELDEV(dev) \
162 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
164 static d_open_t ccdopen;
165 static d_close_t ccdclose;
166 static d_strategy_t ccdstrategy;
167 static d_ioctl_t ccdioctl;
168 static d_dump_t ccddump;
169 static d_psize_t ccdsize;
171 #define NCCDFREEHIWAT 16
173 #define CDEV_MAJOR 74
174 #define BDEV_MAJOR 21
176 static struct cdevsw ccd_cdevsw = {
178 /* close */ ccdclose,
180 /* write */ physwrite,
181 /* ioctl */ ccdioctl,
184 /* strategy */ ccdstrategy,
186 /* maj */ CDEV_MAJOR,
190 /* bmaj */ BDEV_MAJOR
193 /* called during module initialization */
194 static void ccdattach __P((void));
195 static int ccd_modevent __P((module_t, int, void *));
197 /* called by biodone() at interrupt time */
198 static void ccdiodone __P((struct ccdbuf *cbp));
200 static void ccdstart __P((struct ccd_softc *, struct buf *));
201 static void ccdinterleave __P((struct ccd_softc *, int));
202 static void ccdintr __P((struct ccd_softc *, struct buf *));
203 static int ccdinit __P((struct ccddevice *, char **, struct thread *));
204 static int ccdlookup __P((char *, struct thread *td, struct vnode **));
205 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
206 struct buf *, daddr_t, caddr_t, long));
207 static void ccdgetdisklabel __P((dev_t));
208 static void ccdmakedisklabel __P((struct ccd_softc *));
209 static int ccdlock __P((struct ccd_softc *));
210 static void ccdunlock __P((struct ccd_softc *));
213 static void printiinfo __P((struct ccdiinfo *));
216 /* Non-private for the benefit of libkvm. */
217 struct ccd_softc *ccd_softc;
218 struct ccddevice *ccddevs;
219 struct ccdbuf *ccdfreebufs;
220 static int numccdfreebufs;
221 static int numccd = 0;
224 * getccdbuf() - Allocate and zero a ccd buffer.
226 * This routine is called at splbio().
231 getccdbuf(struct ccdbuf *cpy)
236 * Allocate from freelist or malloc as necessary
238 if ((cbp = ccdfreebufs) != NULL) {
239 ccdfreebufs = cbp->cb_freenext;
242 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
246 * Used by mirroring code
249 bcopy(cpy, cbp, sizeof(struct ccdbuf));
251 bzero(cbp, sizeof(struct ccdbuf));
254 * independant struct buf initialization
256 LIST_INIT(&cbp->cb_buf.b_dep);
257 BUF_LOCKINIT(&cbp->cb_buf);
258 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
259 BUF_KERNPROC(&cbp->cb_buf);
265 * putccdbuf() - Free a ccd buffer.
267 * This routine is called at splbio().
272 putccdbuf(struct ccdbuf *cbp)
274 BUF_UNLOCK(&cbp->cb_buf);
275 BUF_LOCKFREE(&cbp->cb_buf);
277 if (numccdfreebufs < NCCDFREEHIWAT) {
278 cbp->cb_freenext = ccdfreebufs;
282 free((caddr_t)cbp, M_DEVBUF);
288 * Number of blocks to untouched in front of a component partition.
289 * This is to avoid violating its disklabel area when it starts at the
290 * beginning of the slice.
292 #if !defined(CCD_OFFSET)
293 #define CCD_OFFSET 16
297 * Called by main() during pseudo-device attachment. All we need
298 * to do is allocate enough space for devices to be configured later, and
308 printf("ccd0-%d: Concatenated disk drivers\n", num-1);
310 printf("ccd0: Concatenated disk driver\n");
312 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
314 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
316 if ((ccd_softc == NULL) || (ccddevs == NULL)) {
317 printf("WARNING: no memory for concatenated disks\n");
318 if (ccd_softc != NULL)
319 free(ccd_softc, M_DEVBUF);
321 free(ccddevs, M_DEVBUF);
325 bzero(ccd_softc, num * sizeof(struct ccd_softc));
326 bzero(ccddevs, num * sizeof(struct ccddevice));
328 cdevsw_add(&ccd_cdevsw);
329 /* XXX: is this necessary? */
330 for (i = 0; i < numccd; ++i)
331 ccddevs[i].ccd_dk = -1;
335 ccd_modevent(mod, type, data)
348 printf("ccd0: Unload not supported!\n");
352 default: /* MOD_SHUTDOWN etc */
358 DEV_MODULE(ccd, ccd_modevent, NULL);
361 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td)
363 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
364 struct ccdcinfo *ci = NULL; /* XXX */
370 struct partinfo dpart;
371 struct ccdgeom *ccg = &cs->sc_geom;
372 char tmppath[MAXPATHLEN];
376 KKASSERT(td->td_proc);
377 cred = td->td_proc->p_ucred;
380 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
381 printf("ccdinit: unit %d\n", ccd->ccd_unit);
385 cs->sc_ileave = ccd->ccd_interleave;
386 cs->sc_nccdisks = ccd->ccd_ndev;
388 /* Allocate space for the component info. */
389 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
393 * Verify that each component piece exists and record
394 * relevant information about it.
398 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
399 vp = ccd->ccd_vpp[ix];
400 ci = &cs->sc_cinfo[ix];
404 * Copy in the pathname of the component.
406 bzero(tmppath, sizeof(tmppath)); /* sanity */
407 if ((error = copyinstr(cpaths[ix], tmppath,
408 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
410 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
411 printf("ccd%d: can't copy path, error = %d\n",
412 ccd->ccd_unit, error);
416 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
417 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
419 ci->ci_dev = vn_todev(vp);
422 * Get partition information for the component.
424 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
425 FREAD, cred, td)) != 0) {
427 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
428 printf("ccd%d: %s: ioctl failed, error = %d\n",
429 ccd->ccd_unit, ci->ci_path, error);
433 if (dpart.part->p_fstype == FS_BSDFFS) {
435 ((dpart.disklab->d_secsize > maxsecsize) ?
436 dpart.disklab->d_secsize : maxsecsize);
437 size = dpart.part->p_size - CCD_OFFSET;
440 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
441 printf("ccd%d: %s: incorrect partition type\n",
442 ccd->ccd_unit, ci->ci_path);
449 * Calculate the size, truncating to an interleave
450 * boundary if necessary.
453 if (cs->sc_ileave > 1)
454 size -= size % cs->sc_ileave;
458 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
459 printf("ccd%d: %s: size == 0\n",
460 ccd->ccd_unit, ci->ci_path);
466 if (minsize == 0 || size < minsize)
473 * Don't allow the interleave to be smaller than
474 * the biggest component sector.
476 if ((cs->sc_ileave > 0) &&
477 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
479 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
480 printf("ccd%d: interleave must be at least %d\n",
481 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
488 * If uniform interleave is desired set all sizes to that of
489 * the smallest component. This will guarentee that a single
490 * interleave table is generated.
492 * Lost space must be taken into account when calculating the
493 * overall size. Half the space is lost when CCDF_MIRROR is
494 * specified. One disk is lost when CCDF_PARITY is specified.
496 if (ccd->ccd_flags & CCDF_UNIFORM) {
497 for (ci = cs->sc_cinfo;
498 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
499 ci->ci_size = minsize;
501 if (ccd->ccd_flags & CCDF_MIRROR) {
503 * Check to see if an even number of components
504 * have been specified. The interleave must also
505 * be non-zero in order for us to be able to
506 * guarentee the topology.
508 if (cs->sc_nccdisks % 2) {
509 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
513 if (cs->sc_ileave == 0) {
514 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
518 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
519 } else if (ccd->ccd_flags & CCDF_PARITY) {
520 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
522 if (cs->sc_ileave == 0) {
523 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
527 cs->sc_size = cs->sc_nccdisks * minsize;
532 * Construct the interleave table.
534 ccdinterleave(cs, ccd->ccd_unit);
537 * Create pseudo-geometry based on 1MB cylinders. It's
540 ccg->ccg_secsize = maxsecsize;
541 ccg->ccg_ntracks = 1;
542 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
543 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
546 * Add an devstat entry for this device.
548 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
549 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
550 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
551 DEVSTAT_PRIORITY_ARRAY);
553 cs->sc_flags |= CCDF_INITED;
554 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
555 cs->sc_unit = ccd->ccd_unit;
558 while (ci > cs->sc_cinfo) {
560 free(ci->ci_path, M_DEVBUF);
562 free(cs->sc_cinfo, M_DEVBUF);
567 ccdinterleave(cs, unit)
568 struct ccd_softc *cs;
571 struct ccdcinfo *ci, *smallci;
578 if (ccddebug & CCDB_INIT)
579 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
583 * Allocate an interleave table. The worst case occurs when each
584 * of N disks is of a different size, resulting in N interleave
587 * Chances are this is too big, but we don't care.
589 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
590 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
591 bzero((caddr_t)cs->sc_itable, size);
594 * Trivial case: no interleave (actually interleave of disk size).
595 * Each table entry represents a single component in its entirety.
597 * An interleave of 0 may not be used with a mirror or parity setup.
599 if (cs->sc_ileave == 0) {
603 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
604 /* Allocate space for ii_index. */
605 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
607 ii->ii_startblk = bn;
609 ii->ii_index[0] = ix;
610 bn += cs->sc_cinfo[ix].ci_size;
615 if (ccddebug & CCDB_INIT)
616 printiinfo(cs->sc_itable);
622 * The following isn't fast or pretty; it doesn't have to be.
626 for (ii = cs->sc_itable; ; ii++) {
628 * Allocate space for ii_index. We might allocate more then
631 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
635 * Locate the smallest of the remaining components
638 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
640 if (ci->ci_size > size &&
642 ci->ci_size < smallci->ci_size)) {
648 * Nobody left, all done
650 if (smallci == NULL) {
656 * Record starting logical block using an sc_ileave blocksize.
658 ii->ii_startblk = bn / cs->sc_ileave;
661 * Record starting comopnent block using an sc_ileave
662 * blocksize. This value is relative to the beginning of
665 ii->ii_startoff = lbn;
668 * Determine how many disks take part in this interleave
669 * and record their indices.
672 for (ci = cs->sc_cinfo;
673 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
674 if (ci->ci_size >= smallci->ci_size) {
675 ii->ii_index[ix++] = ci - cs->sc_cinfo;
679 bn += ix * (smallci->ci_size - size);
680 lbn = smallci->ci_size / cs->sc_ileave;
681 size = smallci->ci_size;
684 if (ccddebug & CCDB_INIT)
685 printiinfo(cs->sc_itable);
691 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td)
693 int unit = ccdunit(dev);
694 struct ccd_softc *cs;
695 struct disklabel *lp;
696 int error = 0, part, pmask;
699 if (ccddebug & CCDB_FOLLOW)
700 printf("ccdopen(%x, %x)\n", dev, flags);
704 cs = &ccd_softc[unit];
706 if ((error = ccdlock(cs)) != 0)
715 * If we're initialized, check to see if there are any other
716 * open partitions. If not, then it's safe to update
717 * the in-core disklabel.
719 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
720 ccdgetdisklabel(dev);
722 /* Check that the partition exists. */
723 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
724 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
729 cs->sc_openmask |= pmask;
737 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td)
739 int unit = ccdunit(dev);
740 struct ccd_softc *cs;
744 if (ccddebug & CCDB_FOLLOW)
745 printf("ccdclose(%x, %x)\n", dev, flags);
750 cs = &ccd_softc[unit];
752 if ((error = ccdlock(cs)) != 0)
757 /* ...that much closer to allowing unconfiguration... */
758 cs->sc_openmask &= ~(1 << part);
767 int unit = ccdunit(bp->b_dev);
768 struct ccd_softc *cs = &ccd_softc[unit];
771 struct disklabel *lp;
774 if (ccddebug & CCDB_FOLLOW)
775 printf("ccdstrategy(%x): unit %d\n", bp, unit);
777 if ((cs->sc_flags & CCDF_INITED) == 0) {
779 bp->b_flags |= B_ERROR;
783 /* If it's a nil transfer, wake up the top half now. */
784 if (bp->b_bcount == 0)
790 * Do bounds checking and adjust transfer. If there's an
791 * error, the bounds check will flag that for us.
793 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
794 if (ccdpart(bp->b_dev) != RAW_PART) {
795 if (bounds_check_with_label(bp, lp, wlabel) <= 0)
798 int pbn; /* in sc_secsize chunks */
799 long sz; /* in sc_secsize chunks */
801 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
802 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
805 * If out of bounds return an error. If at the EOF point,
806 * simply read or write less.
809 if (pbn < 0 || pbn >= cs->sc_size) {
810 bp->b_resid = bp->b_bcount;
811 if (pbn != cs->sc_size) {
812 bp->b_error = EINVAL;
813 bp->b_flags |= B_ERROR | B_INVAL;
819 * If the request crosses EOF, truncate the request.
821 if (pbn + sz > cs->sc_size) {
822 bp->b_bcount = (cs->sc_size - pbn) *
823 cs->sc_geom.ccg_secsize;
827 bp->b_resid = bp->b_bcount;
842 struct ccd_softc *cs;
846 struct ccdbuf *cbp[4];
847 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
850 struct partition *pp;
853 if (ccddebug & CCDB_FOLLOW)
854 printf("ccdstart(%x, %x)\n", cs, bp);
857 /* Record the transaction start */
858 devstat_start_transaction(&cs->device_stats);
861 * Translate the partition-relative block number to an absolute.
864 if (ccdpart(bp->b_dev) != RAW_PART) {
865 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
870 * Allocate component buffers and fire off the requests
873 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
874 ccdbuffer(cbp, cs, bp, bn, addr, bcount);
875 rcount = cbp[0]->cb_buf.b_bcount;
877 if (cs->sc_cflags & CCDF_MIRROR) {
879 * Mirroring. Writes go to both disks, reads are
880 * taken from whichever disk seems most appropriate.
882 * We attempt to localize reads to the disk whos arm
883 * is nearest the read request. We ignore seeks due
884 * to writes when making this determination and we
885 * also try to avoid hogging.
887 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
888 cbp[0]->cb_buf.b_vp->v_numoutput++;
889 cbp[1]->cb_buf.b_vp->v_numoutput++;
890 VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
892 VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
895 int pick = cs->sc_pick;
896 daddr_t range = cs->sc_size / 16;
898 if (bn < cs->sc_blk[pick] - range ||
899 bn > cs->sc_blk[pick] + range
901 cs->sc_pick = pick = 1 - pick;
903 cs->sc_blk[pick] = bn + btodb(rcount);
904 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
911 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
912 cbp[0]->cb_buf.b_vp->v_numoutput++;
913 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
921 * Build a component buffer header.
924 ccdbuffer(cb, cs, bp, bn, addr, bcount)
926 struct ccd_softc *cs;
932 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
938 if (ccddebug & CCDB_IO)
939 printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
940 cs, bp, bn, addr, bcount);
943 * Determine which component bn falls in.
948 if (cs->sc_ileave == 0) {
950 * Serially concatenated and neither a mirror nor a parity
951 * config. This is a special case.
956 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
964 * Calculate cbn, the logical superblock (sc_ileave chunks),
965 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
968 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
969 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
972 * Figure out which interleave table to use.
974 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
975 if (ii->ii_startblk > cbn)
981 * off is the logical superblock relative to the beginning
982 * of this interleave block.
984 off = cbn - ii->ii_startblk;
987 * We must calculate which disk component to use (ccdisk),
988 * and recalculate cbn to be the superblock relative to
989 * the beginning of the component. This is typically done by
990 * adding 'off' and ii->ii_startoff together. However, 'off'
991 * must typically be divided by the number of components in
992 * this interleave array to be properly convert it from a
993 * CCD-relative logical superblock number to a
994 * component-relative superblock number.
996 if (ii->ii_ndisk == 1) {
998 * When we have just one disk, it can't be a mirror
999 * or a parity config.
1001 ccdisk = ii->ii_index[0];
1002 cbn = ii->ii_startoff + off;
1004 if (cs->sc_cflags & CCDF_MIRROR) {
1006 * We have forced a uniform mapping, resulting
1007 * in a single interleave array. We double
1008 * up on the first half of the available
1009 * components and our mirror is in the second
1010 * half. This only works with a single
1011 * interleave array because doubling up
1012 * doubles the number of sectors, so there
1013 * cannot be another interleave array because
1014 * the next interleave array's calculations
1017 int ndisk2 = ii->ii_ndisk / 2;
1018 ccdisk = ii->ii_index[off % ndisk2];
1019 cbn = ii->ii_startoff + off / ndisk2;
1020 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1021 } else if (cs->sc_cflags & CCDF_PARITY) {
1023 * XXX not implemented yet
1025 int ndisk2 = ii->ii_ndisk - 1;
1026 ccdisk = ii->ii_index[off % ndisk2];
1027 cbn = ii->ii_startoff + off / ndisk2;
1028 if (cbn % ii->ii_ndisk <= ccdisk)
1031 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1032 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1036 ci = &cs->sc_cinfo[ccdisk];
1039 * Convert cbn from a superblock to a normal block so it
1040 * can be used to calculate (along with cboff) the normal
1041 * block index into this particular disk.
1043 cbn *= cs->sc_ileave;
1047 * Fill in the component buf structure.
1049 cbp = getccdbuf(NULL);
1050 cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
1051 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1052 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */
1053 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1054 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1055 cbp->cb_buf.b_data = addr;
1056 cbp->cb_buf.b_vp = ci->ci_vp;
1057 if (cs->sc_ileave == 0)
1058 cbc = dbtob((off_t)(ci->ci_size - cbn));
1060 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1061 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1062 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1065 * context for ccdiodone
1068 cbp->cb_unit = cs - ccd_softc;
1069 cbp->cb_comp = ci - cs->sc_cinfo;
1072 if (ccddebug & CCDB_IO)
1073 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1074 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1075 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1080 * Note: both I/O's setup when reading from mirror, but only one
1083 if (cs->sc_cflags & CCDF_MIRROR) {
1084 /* mirror, setup second I/O */
1085 cbp = getccdbuf(cb[0]);
1086 cbp->cb_buf.b_dev = ci2->ci_dev;
1087 cbp->cb_buf.b_vp = ci2->ci_vp;
1088 cbp->cb_comp = ci2 - cs->sc_cinfo;
1090 /* link together the ccdbuf's and clear "mirror done" flag */
1091 cb[0]->cb_mirror = cb[1];
1092 cb[1]->cb_mirror = cb[0];
1093 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1094 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1100 struct ccd_softc *cs;
1104 if (ccddebug & CCDB_FOLLOW)
1105 printf("ccdintr(%x, %x)\n", cs, bp);
1108 * Request is done for better or worse, wakeup the top half.
1110 if (bp->b_flags & B_ERROR)
1111 bp->b_resid = bp->b_bcount;
1112 devstat_end_transaction_buf(&cs->device_stats, bp);
1117 * Called at interrupt time.
1118 * Mark the component as done and if all components are done,
1119 * take a ccd interrupt.
1125 struct buf *bp = cbp->cb_obp;
1126 int unit = cbp->cb_unit;
1131 if (ccddebug & CCDB_FOLLOW)
1132 printf("ccdiodone(%x)\n", cbp);
1133 if (ccddebug & CCDB_IO) {
1134 printf("ccdiodone: bp %x bcount %d resid %d\n",
1135 bp, bp->b_bcount, bp->b_resid);
1136 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1137 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1138 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1139 cbp->cb_buf.b_bcount);
1143 * If an error occured, report it. If this is a mirrored
1144 * configuration and the first of two possible reads, do not
1145 * set the error in the bp yet because the second read may
1149 if (cbp->cb_buf.b_flags & B_ERROR) {
1150 const char *msg = "";
1152 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1153 (cbp->cb_buf.b_flags & B_READ) &&
1154 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1156 * We will try our read on the other disk down
1157 * below, also reverse the default pick so if we
1158 * are doing a scan we do not keep hitting the
1161 struct ccd_softc *cs = &ccd_softc[unit];
1163 msg = ", trying other disk";
1164 cs->sc_pick = 1 - cs->sc_pick;
1165 cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1167 bp->b_flags |= B_ERROR;
1168 bp->b_error = cbp->cb_buf.b_error ?
1169 cbp->cb_buf.b_error : EIO;
1171 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1172 unit, bp->b_error, cbp->cb_comp,
1173 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1177 * Process mirror. If we are writing, I/O has been initiated on both
1178 * buffers and we fall through only after both are finished.
1180 * If we are reading only one I/O is initiated at a time. If an
1181 * error occurs we initiate the second I/O and return, otherwise
1182 * we free the second I/O without initiating it.
1185 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1186 if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1188 * When writing, handshake with the second buffer
1189 * to determine when both are done. If both are not
1190 * done, return here.
1192 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1193 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1200 * When reading, either dispose of the second buffer
1201 * or initiate I/O on the second buffer if an error
1202 * occured with this one.
1204 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1205 if (cbp->cb_buf.b_flags & B_ERROR) {
1206 cbp->cb_mirror->cb_pflags |=
1209 cbp->cb_mirror->cb_buf.b_vp,
1210 &cbp->cb_mirror->cb_buf
1216 putccdbuf(cbp->cb_mirror);
1224 * use b_bufsize to determine how big the original request was rather
1225 * then b_bcount, because b_bcount may have been truncated for EOF.
1227 * XXX We check for an error, but we do not test the resid for an
1228 * aligned EOF condition. This may result in character & block
1229 * device access not recognizing EOF properly when read or written
1230 * sequentially, but will not effect filesystems.
1232 count = cbp->cb_buf.b_bufsize;
1236 * If all done, "interrupt".
1238 bp->b_resid -= count;
1239 if (bp->b_resid < 0)
1240 panic("ccdiodone: count");
1241 if (bp->b_resid == 0)
1242 ccdintr(&ccd_softc[unit], bp);
1247 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
1249 int unit = ccdunit(dev);
1250 int i, j, lookedup = 0, error = 0;
1252 struct ccd_softc *cs;
1253 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1254 struct ccddevice ccd;
1259 KKASSERT(td->td_proc != NULL);
1260 cred = td->td_proc->p_ucred;
1264 cs = &ccd_softc[unit];
1266 bzero(&ccd, sizeof(ccd));
1270 if (cs->sc_flags & CCDF_INITED)
1273 if ((flag & FWRITE) == 0)
1276 if ((error = ccdlock(cs)) != 0)
1279 if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1282 /* Fill in some important bits. */
1283 ccd.ccd_unit = unit;
1284 ccd.ccd_interleave = ccio->ccio_ileave;
1285 if (ccd.ccd_interleave == 0 &&
1286 ((ccio->ccio_flags & CCDF_MIRROR) ||
1287 (ccio->ccio_flags & CCDF_PARITY))) {
1288 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1289 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1291 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1292 (ccio->ccio_flags & CCDF_PARITY)) {
1293 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1294 ccio->ccio_flags &= ~CCDF_PARITY;
1296 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1297 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1298 printf("ccd%d: mirror/parity forces uniform flag\n",
1300 ccio->ccio_flags |= CCDF_UNIFORM;
1302 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1305 * Allocate space for and copy in the array of
1306 * componet pathnames and device numbers.
1308 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1309 M_DEVBUF, M_WAITOK);
1310 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1311 M_DEVBUF, M_WAITOK);
1313 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1314 ccio->ccio_ndisks * sizeof(char **));
1316 free(vpp, M_DEVBUF);
1317 free(cpp, M_DEVBUF);
1323 if (ccddebug & CCDB_INIT)
1324 for (i = 0; i < ccio->ccio_ndisks; ++i)
1325 printf("ccdioctl: component %d: 0x%x\n",
1329 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1331 if (ccddebug & CCDB_INIT)
1332 printf("ccdioctl: lookedup = %d\n", lookedup);
1334 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1335 for (j = 0; j < lookedup; ++j)
1336 (void)vn_close(vpp[j], FREAD|FWRITE, td);
1337 free(vpp, M_DEVBUF);
1338 free(cpp, M_DEVBUF);
1346 ccd.ccd_ndev = ccio->ccio_ndisks;
1349 * Initialize the ccd. Fills in the softc for us.
1351 if ((error = ccdinit(&ccd, cpp, td)) != 0) {
1352 for (j = 0; j < lookedup; ++j)
1353 (void)vn_close(vpp[j], FREAD|FWRITE, td);
1354 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1355 free(vpp, M_DEVBUF);
1356 free(cpp, M_DEVBUF);
1362 * The ccd has been successfully initialized, so
1363 * we can place it into the array and read the disklabel.
1365 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1366 ccio->ccio_unit = unit;
1367 ccio->ccio_size = cs->sc_size;
1368 ccdgetdisklabel(dev);
1375 if ((cs->sc_flags & CCDF_INITED) == 0)
1378 if ((flag & FWRITE) == 0)
1381 if ((error = ccdlock(cs)) != 0)
1384 /* Don't unconfigure if any other partitions are open */
1385 part = ccdpart(dev);
1386 pmask = (1 << part);
1387 if ((cs->sc_openmask & ~pmask)) {
1393 * Free ccd_softc information and clear entry.
1396 /* Close the components and free their pathnames. */
1397 for (i = 0; i < cs->sc_nccdisks; ++i) {
1399 * XXX: this close could potentially fail and
1400 * cause Bad Things. Maybe we need to force
1401 * the close to happen?
1404 if (ccddebug & CCDB_VNODE)
1405 vprint("CCDIOCCLR: vnode info",
1406 cs->sc_cinfo[i].ci_vp);
1408 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td);
1409 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1412 /* Free interleave index. */
1413 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1414 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1416 /* Free component info and interleave table. */
1417 free(cs->sc_cinfo, M_DEVBUF);
1418 free(cs->sc_itable, M_DEVBUF);
1419 cs->sc_flags &= ~CCDF_INITED;
1422 * Free ccddevice information and clear entry.
1424 free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1425 free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1427 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1430 * And remove the devstat entry.
1432 devstat_remove_entry(&cs->device_stats);
1434 /* This must be atomic. */
1437 bzero(cs, sizeof(struct ccd_softc));
1443 if ((cs->sc_flags & CCDF_INITED) == 0)
1446 *(struct disklabel *)data = cs->sc_label;
1450 if ((cs->sc_flags & CCDF_INITED) == 0)
1453 ((struct partinfo *)data)->disklab = &cs->sc_label;
1454 ((struct partinfo *)data)->part =
1455 &cs->sc_label.d_partitions[ccdpart(dev)];
1460 if ((cs->sc_flags & CCDF_INITED) == 0)
1463 if ((flag & FWRITE) == 0)
1466 if ((error = ccdlock(cs)) != 0)
1469 cs->sc_flags |= CCDF_LABELLING;
1471 error = setdisklabel(&cs->sc_label,
1472 (struct disklabel *)data, 0);
1474 if (cmd == DIOCWDINFO)
1475 error = writedisklabel(CCDLABELDEV(dev),
1479 cs->sc_flags &= ~CCDF_LABELLING;
1488 if ((cs->sc_flags & CCDF_INITED) == 0)
1491 if ((flag & FWRITE) == 0)
1493 if (*(int *)data != 0)
1494 cs->sc_flags |= CCDF_WLABEL;
1496 cs->sc_flags &= ~CCDF_WLABEL;
1509 struct ccd_softc *cs;
1512 if (ccdopen(dev, 0, S_IFCHR, curthread))
1515 cs = &ccd_softc[ccdunit(dev)];
1516 part = ccdpart(dev);
1518 if ((cs->sc_flags & CCDF_INITED) == 0)
1521 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1524 size = cs->sc_label.d_partitions[part].p_size;
1526 if (ccdclose(dev, 0, S_IFCHR, curthread))
1537 /* Not implemented. */
1542 * Lookup the provided name in the filesystem. If the file exists,
1543 * is a valid block device, and isn't being used by anyone else,
1544 * set *vpp to the file's vnode.
1547 ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1549 struct nameidata nd;
1554 KKASSERT(td->td_proc);
1555 cred = td->td_proc->p_ucred;
1557 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td);
1558 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1560 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1561 printf("ccdlookup: vn_open error = %d\n", error);
1567 if (vp->v_usecount > 1) {
1572 if (!vn_isdisk(vp, &error))
1576 if (ccddebug & CCDB_VNODE)
1577 vprint("ccdlookup: vnode info", vp);
1580 VOP_UNLOCK(vp, 0, td);
1581 NDFREE(&nd, NDF_ONLY_PNBUF);
1585 VOP_UNLOCK(vp, 0, td);
1586 NDFREE(&nd, NDF_ONLY_PNBUF);
1587 /* vn_close does vrele() for vp */
1588 (void)vn_close(vp, FREAD|FWRITE, td);
1593 * Read the disklabel from the ccd. If one is not present, fake one
1597 ccdgetdisklabel(dev)
1600 int unit = ccdunit(dev);
1601 struct ccd_softc *cs = &ccd_softc[unit];
1603 struct disklabel *lp = &cs->sc_label;
1604 struct ccdgeom *ccg = &cs->sc_geom;
1606 bzero(lp, sizeof(*lp));
1608 lp->d_secperunit = cs->sc_size;
1609 lp->d_secsize = ccg->ccg_secsize;
1610 lp->d_nsectors = ccg->ccg_nsectors;
1611 lp->d_ntracks = ccg->ccg_ntracks;
1612 lp->d_ncylinders = ccg->ccg_ncylinders;
1613 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1615 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1616 lp->d_type = DTYPE_CCD;
1617 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1619 lp->d_interleave = 1;
1622 lp->d_partitions[RAW_PART].p_offset = 0;
1623 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1624 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1625 lp->d_npartitions = RAW_PART + 1;
1627 lp->d_bbsize = BBSIZE; /* XXX */
1628 lp->d_sbsize = SBSIZE; /* XXX */
1630 lp->d_magic = DISKMAGIC;
1631 lp->d_magic2 = DISKMAGIC;
1632 lp->d_checksum = dkcksum(&cs->sc_label);
1635 * Call the generic disklabel extraction routine.
1637 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1638 if (errstring != NULL)
1639 ccdmakedisklabel(cs);
1642 /* It's actually extremely common to have unlabeled ccds. */
1643 if (ccddebug & CCDB_LABEL)
1644 if (errstring != NULL)
1645 printf("ccd%d: %s\n", unit, errstring);
1650 * Take care of things one might want to take care of in the event
1651 * that a disklabel isn't present.
1654 ccdmakedisklabel(cs)
1655 struct ccd_softc *cs;
1657 struct disklabel *lp = &cs->sc_label;
1660 * For historical reasons, if there's no disklabel present
1661 * the raw partition must be marked FS_BSDFFS.
1663 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1665 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1669 * Wait interruptibly for an exclusive lock.
1672 * Several drivers do this; it should be abstracted and made MP-safe.
1676 struct ccd_softc *cs;
1680 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1681 cs->sc_flags |= CCDF_WANTED;
1682 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1685 cs->sc_flags |= CCDF_LOCKED;
1690 * Unlock and wake up any waiters.
1694 struct ccd_softc *cs;
1697 cs->sc_flags &= ~CCDF_LOCKED;
1698 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1699 cs->sc_flags &= ~CCDF_WANTED;
1707 struct ccdiinfo *ii;
1711 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1712 printf(" itab[%d]: #dk %d sblk %d soff %d",
1713 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1714 for (i = 0; i < ii->ii_ndisk; i++)
1715 printf(" %d", ii->ii_index[i]);
1722 /* Local Variables: */
1723 /* c-argdecl-indent: 8 */
1724 /* c-continued-statement-offset: 8 */
1725 /* c-indent-level: 8 */