1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.20 2005/08/03 16:36:33 hmp Exp $ */
4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
7 * Copyright (c) 1995 Jason R. Thorpe.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project
22 * 4. The name of the author may not be used to endorse or promote products
23 * derived from this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
81 * "Concatenated" disk driver.
83 * Dynamic configuration and disklabel support by:
84 * Jason R. Thorpe <thorpej@nas.nasa.gov>
85 * Numerical Aerodynamic Simulation Facility
87 * NASA Ames Research Center
88 * Moffett Field, CA 94035
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
99 #include <sys/malloc.h>
100 #include <sys/nlookup.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <vfs/ufs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 #include <sys/buf2.h>
111 #include <sys/ccdvar.h>
113 #include <sys/thread2.h>
115 #include <vm/vm_zone.h>
117 #if defined(CCDDEBUG) && !defined(DEBUG)
122 #define CCDB_FOLLOW 0x01
123 #define CCDB_INIT 0x02
125 #define CCDB_LABEL 0x08
126 #define CCDB_VNODE 0x10
127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
133 #define ccdunit(x) dkunit(x)
134 #define ccdpart(x) dkpart(x)
137 This is how mirroring works (only writes are special):
139 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
140 linked together by the cb_mirror field. "cb_pflags &
141 CCDPF_MIRROR_DONE" is set to 0 on both of them.
143 When a component returns to ccdiodone(), it checks if "cb_pflags &
144 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
145 flag and returns. If it is, it means its partner has already
146 returned, so it will go to the regular cleanup.
151 struct buf cb_buf; /* new I/O buf */
152 struct buf *cb_obp; /* ptr. to original I/O buf */
153 struct ccdbuf *cb_freenext; /* free list link */
154 int cb_unit; /* target unit */
155 int cb_comp; /* target component */
156 int cb_pflags; /* mirror/parity status flag */
157 struct ccdbuf *cb_mirror; /* mirror counterpart */
160 /* bits in cb_pflags */
161 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
163 #define CCDLABELDEV(dev) \
164 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
166 static d_open_t ccdopen;
167 static d_close_t ccdclose;
168 static d_strategy_t ccdstrategy;
169 static d_ioctl_t ccdioctl;
170 static d_dump_t ccddump;
171 static d_psize_t ccdsize;
173 #define NCCDFREEHIWAT 16
175 #define CDEV_MAJOR 74
177 static struct cdevsw ccd_cdevsw = {
179 /* maj */ CDEV_MAJOR,
185 /* close */ ccdclose,
187 /* write */ physwrite,
188 /* ioctl */ ccdioctl,
191 /* strategy */ ccdstrategy,
196 /* called during module initialization */
197 static void ccdattach (void);
198 static int ccd_modevent (module_t, int, void *);
200 /* called by biodone() at interrupt time */
201 static void ccdiodone (struct ccdbuf *cbp);
203 static void ccdstart (struct ccd_softc *, struct buf *);
204 static void ccdinterleave (struct ccd_softc *, int);
205 static void ccdintr (struct ccd_softc *, struct buf *);
206 static int ccdinit (struct ccddevice *, char **, struct thread *);
207 static int ccdlookup (char *, struct thread *td, struct vnode **);
208 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
209 struct buf *, daddr_t, caddr_t, long);
210 static void ccdgetdisklabel (dev_t);
211 static void ccdmakedisklabel (struct ccd_softc *);
212 static int ccdlock (struct ccd_softc *);
213 static void ccdunlock (struct ccd_softc *);
216 static void printiinfo (struct ccdiinfo *);
219 /* Non-private for the benefit of libkvm. */
220 struct ccd_softc *ccd_softc;
221 struct ccddevice *ccddevs;
222 struct ccdbuf *ccdfreebufs;
223 static int numccdfreebufs;
224 static int numccd = 0;
227 * getccdbuf() - Allocate and zero a ccd buffer.
229 * This routine is called at splbio().
234 getccdbuf(struct ccdbuf *cpy)
239 * Allocate from freelist or malloc as necessary
241 if ((cbp = ccdfreebufs) != NULL) {
242 ccdfreebufs = cbp->cb_freenext;
245 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
249 * Used by mirroring code
252 bcopy(cpy, cbp, sizeof(struct ccdbuf));
254 bzero(cbp, sizeof(struct ccdbuf));
257 * independant struct buf initialization
259 LIST_INIT(&cbp->cb_buf.b_dep);
260 BUF_LOCKINIT(&cbp->cb_buf);
261 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
262 BUF_KERNPROC(&cbp->cb_buf);
268 * putccdbuf() - Free a ccd buffer.
270 * This routine is called at splbio().
275 putccdbuf(struct ccdbuf *cbp)
277 BUF_UNLOCK(&cbp->cb_buf);
278 BUF_LOCKFREE(&cbp->cb_buf);
280 if (numccdfreebufs < NCCDFREEHIWAT) {
281 cbp->cb_freenext = ccdfreebufs;
285 free((caddr_t)cbp, M_DEVBUF);
291 * Number of blocks to untouched in front of a component partition.
292 * This is to avoid violating its disklabel area when it starts at the
293 * beginning of the slice.
295 #if !defined(CCD_OFFSET)
296 #define CCD_OFFSET 16
300 * Called by main() during pseudo-device attachment. All we need
301 * to do is allocate enough space for devices to be configured later, and
311 printf("ccd0-%d: Concatenated disk drivers\n", num-1);
313 printf("ccd0: Concatenated disk driver\n");
315 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF,
317 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF,
321 cdevsw_add(&ccd_cdevsw, 0, 0);
322 /* XXX: is this necessary? */
323 for (i = 0; i < numccd; ++i)
324 ccddevs[i].ccd_dk = -1;
328 ccd_modevent(mod, type, data)
341 printf("ccd0: Unload not supported!\n");
345 default: /* MOD_SHUTDOWN etc */
351 DEV_MODULE(ccd, ccd_modevent, NULL);
354 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td)
356 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
357 struct ccdcinfo *ci = NULL; /* XXX */
363 struct partinfo dpart;
364 struct ccdgeom *ccg = &cs->sc_geom;
365 char tmppath[MAXPATHLEN];
369 KKASSERT(td->td_proc);
370 cred = td->td_proc->p_ucred;
373 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
374 printf("ccdinit: unit %d\n", ccd->ccd_unit);
378 cs->sc_ileave = ccd->ccd_interleave;
379 cs->sc_nccdisks = ccd->ccd_ndev;
381 /* Allocate space for the component info. */
382 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
386 * Verify that each component piece exists and record
387 * relevant information about it.
391 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
392 vp = ccd->ccd_vpp[ix];
393 ci = &cs->sc_cinfo[ix];
397 * Copy in the pathname of the component.
399 bzero(tmppath, sizeof(tmppath)); /* sanity */
400 if ((error = copyinstr(cpaths[ix], tmppath,
401 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
403 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
404 printf("ccd%d: can't copy path, error = %d\n",
405 ccd->ccd_unit, error);
409 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
410 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
412 ci->ci_dev = vn_todev(vp);
415 * Get partition information for the component.
417 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
418 FREAD, cred, td)) != 0) {
420 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
421 printf("ccd%d: %s: ioctl failed, error = %d\n",
422 ccd->ccd_unit, ci->ci_path, error);
426 if (dpart.part->p_fstype == FS_BSDFFS) {
428 ((dpart.disklab->d_secsize > maxsecsize) ?
429 dpart.disklab->d_secsize : maxsecsize);
430 size = dpart.part->p_size - CCD_OFFSET;
433 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
434 printf("ccd%d: %s: incorrect partition type\n",
435 ccd->ccd_unit, ci->ci_path);
442 * Calculate the size, truncating to an interleave
443 * boundary if necessary.
446 if (cs->sc_ileave > 1)
447 size -= size % cs->sc_ileave;
451 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
452 printf("ccd%d: %s: size == 0\n",
453 ccd->ccd_unit, ci->ci_path);
459 if (minsize == 0 || size < minsize)
466 * Don't allow the interleave to be smaller than
467 * the biggest component sector.
469 if ((cs->sc_ileave > 0) &&
470 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
472 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
473 printf("ccd%d: interleave must be at least %d\n",
474 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
481 * If uniform interleave is desired set all sizes to that of
482 * the smallest component. This will guarentee that a single
483 * interleave table is generated.
485 * Lost space must be taken into account when calculating the
486 * overall size. Half the space is lost when CCDF_MIRROR is
487 * specified. One disk is lost when CCDF_PARITY is specified.
489 if (ccd->ccd_flags & CCDF_UNIFORM) {
490 for (ci = cs->sc_cinfo;
491 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
492 ci->ci_size = minsize;
494 if (ccd->ccd_flags & CCDF_MIRROR) {
496 * Check to see if an even number of components
497 * have been specified. The interleave must also
498 * be non-zero in order for us to be able to
499 * guarentee the topology.
501 if (cs->sc_nccdisks % 2) {
502 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
506 if (cs->sc_ileave == 0) {
507 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
511 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
512 } else if (ccd->ccd_flags & CCDF_PARITY) {
513 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
515 if (cs->sc_ileave == 0) {
516 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
520 cs->sc_size = cs->sc_nccdisks * minsize;
525 * Construct the interleave table.
527 ccdinterleave(cs, ccd->ccd_unit);
530 * Create pseudo-geometry based on 1MB cylinders. It's
533 ccg->ccg_secsize = maxsecsize;
534 ccg->ccg_ntracks = 1;
535 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
536 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
539 * Add an devstat entry for this device.
541 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
542 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
543 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
544 DEVSTAT_PRIORITY_ARRAY);
546 cs->sc_flags |= CCDF_INITED;
547 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
548 cs->sc_unit = ccd->ccd_unit;
551 while (ci > cs->sc_cinfo) {
553 free(ci->ci_path, M_DEVBUF);
555 free(cs->sc_cinfo, M_DEVBUF);
560 ccdinterleave(cs, unit)
561 struct ccd_softc *cs;
564 struct ccdcinfo *ci, *smallci;
571 if (ccddebug & CCDB_INIT)
572 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
576 * Allocate an interleave table. The worst case occurs when each
577 * of N disks is of a different size, resulting in N interleave
580 * Chances are this is too big, but we don't care.
582 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
583 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
584 bzero((caddr_t)cs->sc_itable, size);
587 * Trivial case: no interleave (actually interleave of disk size).
588 * Each table entry represents a single component in its entirety.
590 * An interleave of 0 may not be used with a mirror or parity setup.
592 if (cs->sc_ileave == 0) {
596 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
597 /* Allocate space for ii_index. */
598 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
600 ii->ii_startblk = bn;
602 ii->ii_index[0] = ix;
603 bn += cs->sc_cinfo[ix].ci_size;
608 if (ccddebug & CCDB_INIT)
609 printiinfo(cs->sc_itable);
615 * The following isn't fast or pretty; it doesn't have to be.
619 for (ii = cs->sc_itable; ; ii++) {
621 * Allocate space for ii_index. We might allocate more then
624 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
628 * Locate the smallest of the remaining components
631 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
633 if (ci->ci_size > size &&
635 ci->ci_size < smallci->ci_size)) {
641 * Nobody left, all done
643 if (smallci == NULL) {
649 * Record starting logical block using an sc_ileave blocksize.
651 ii->ii_startblk = bn / cs->sc_ileave;
654 * Record starting comopnent block using an sc_ileave
655 * blocksize. This value is relative to the beginning of
658 ii->ii_startoff = lbn;
661 * Determine how many disks take part in this interleave
662 * and record their indices.
665 for (ci = cs->sc_cinfo;
666 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
667 if (ci->ci_size >= smallci->ci_size) {
668 ii->ii_index[ix++] = ci - cs->sc_cinfo;
672 bn += ix * (smallci->ci_size - size);
673 lbn = smallci->ci_size / cs->sc_ileave;
674 size = smallci->ci_size;
677 if (ccddebug & CCDB_INIT)
678 printiinfo(cs->sc_itable);
684 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td)
686 int unit = ccdunit(dev);
687 struct ccd_softc *cs;
688 struct disklabel *lp;
689 int error = 0, part, pmask;
692 if (ccddebug & CCDB_FOLLOW)
693 printf("ccdopen(%x, %x)\n", dev, flags);
697 cs = &ccd_softc[unit];
699 if ((error = ccdlock(cs)) != 0)
708 * If we're initialized, check to see if there are any other
709 * open partitions. If not, then it's safe to update
710 * the in-core disklabel.
712 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
713 ccdgetdisklabel(dev);
715 /* Check that the partition exists. */
716 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
717 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
722 cs->sc_openmask |= pmask;
730 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td)
732 int unit = ccdunit(dev);
733 struct ccd_softc *cs;
737 if (ccddebug & CCDB_FOLLOW)
738 printf("ccdclose(%x, %x)\n", dev, flags);
743 cs = &ccd_softc[unit];
745 if ((error = ccdlock(cs)) != 0)
750 /* ...that much closer to allowing unconfiguration... */
751 cs->sc_openmask &= ~(1 << part);
760 int unit = ccdunit(bp->b_dev);
761 struct ccd_softc *cs = &ccd_softc[unit];
763 struct disklabel *lp;
766 if (ccddebug & CCDB_FOLLOW)
767 printf("ccdstrategy(%x): unit %d\n", bp, unit);
769 if ((cs->sc_flags & CCDF_INITED) == 0) {
771 bp->b_flags |= B_ERROR;
775 /* If it's a nil transfer, wake up the top half now. */
776 if (bp->b_bcount == 0)
782 * Do bounds checking and adjust transfer. If there's an
783 * error, the bounds check will flag that for us.
785 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
786 if (ccdpart(bp->b_dev) != RAW_PART) {
787 if (bounds_check_with_label(bp, lp, wlabel) <= 0)
790 int pbn; /* in sc_secsize chunks */
791 long sz; /* in sc_secsize chunks */
793 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
794 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
797 * If out of bounds return an error. If at the EOF point,
798 * simply read or write less.
801 if (pbn < 0 || pbn >= cs->sc_size) {
802 bp->b_resid = bp->b_bcount;
803 if (pbn != cs->sc_size) {
804 bp->b_error = EINVAL;
805 bp->b_flags |= B_ERROR | B_INVAL;
811 * If the request crosses EOF, truncate the request.
813 if (pbn + sz > cs->sc_size) {
814 bp->b_bcount = (cs->sc_size - pbn) *
815 cs->sc_geom.ccg_secsize;
819 bp->b_resid = bp->b_bcount;
834 struct ccd_softc *cs;
838 struct ccdbuf *cbp[4];
839 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
842 struct partition *pp;
845 if (ccddebug & CCDB_FOLLOW)
846 printf("ccdstart(%x, %x)\n", cs, bp);
849 /* Record the transaction start */
850 devstat_start_transaction(&cs->device_stats);
853 * Translate the partition-relative block number to an absolute.
856 if (ccdpart(bp->b_dev) != RAW_PART) {
857 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
862 * Allocate component buffers and fire off the requests
865 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
866 ccdbuffer(cbp, cs, bp, bn, addr, bcount);
867 rcount = cbp[0]->cb_buf.b_bcount;
869 if (cs->sc_cflags & CCDF_MIRROR) {
871 * Mirroring. Writes go to both disks, reads are
872 * taken from whichever disk seems most appropriate.
874 * We attempt to localize reads to the disk whos arm
875 * is nearest the read request. We ignore seeks due
876 * to writes when making this determination and we
877 * also try to avoid hogging.
879 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
880 cbp[0]->cb_buf.b_vp->v_numoutput++;
881 cbp[1]->cb_buf.b_vp->v_numoutput++;
882 VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
884 VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
887 int pick = cs->sc_pick;
888 daddr_t range = cs->sc_size / 16;
890 if (bn < cs->sc_blk[pick] - range ||
891 bn > cs->sc_blk[pick] + range
893 cs->sc_pick = pick = 1 - pick;
895 cs->sc_blk[pick] = bn + btodb(rcount);
896 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
903 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
904 cbp[0]->cb_buf.b_vp->v_numoutput++;
905 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
913 * Build a component buffer header.
916 ccdbuffer(cb, cs, bp, bn, addr, bcount)
918 struct ccd_softc *cs;
924 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
930 if (ccddebug & CCDB_IO)
931 printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
932 cs, bp, bn, addr, bcount);
935 * Determine which component bn falls in.
940 if (cs->sc_ileave == 0) {
942 * Serially concatenated and neither a mirror nor a parity
943 * config. This is a special case.
948 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
956 * Calculate cbn, the logical superblock (sc_ileave chunks),
957 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
960 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
961 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
964 * Figure out which interleave table to use.
966 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
967 if (ii->ii_startblk > cbn)
973 * off is the logical superblock relative to the beginning
974 * of this interleave block.
976 off = cbn - ii->ii_startblk;
979 * We must calculate which disk component to use (ccdisk),
980 * and recalculate cbn to be the superblock relative to
981 * the beginning of the component. This is typically done by
982 * adding 'off' and ii->ii_startoff together. However, 'off'
983 * must typically be divided by the number of components in
984 * this interleave array to be properly convert it from a
985 * CCD-relative logical superblock number to a
986 * component-relative superblock number.
988 if (ii->ii_ndisk == 1) {
990 * When we have just one disk, it can't be a mirror
991 * or a parity config.
993 ccdisk = ii->ii_index[0];
994 cbn = ii->ii_startoff + off;
996 if (cs->sc_cflags & CCDF_MIRROR) {
998 * We have forced a uniform mapping, resulting
999 * in a single interleave array. We double
1000 * up on the first half of the available
1001 * components and our mirror is in the second
1002 * half. This only works with a single
1003 * interleave array because doubling up
1004 * doubles the number of sectors, so there
1005 * cannot be another interleave array because
1006 * the next interleave array's calculations
1009 int ndisk2 = ii->ii_ndisk / 2;
1010 ccdisk = ii->ii_index[off % ndisk2];
1011 cbn = ii->ii_startoff + off / ndisk2;
1012 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1013 } else if (cs->sc_cflags & CCDF_PARITY) {
1015 * XXX not implemented yet
1017 int ndisk2 = ii->ii_ndisk - 1;
1018 ccdisk = ii->ii_index[off % ndisk2];
1019 cbn = ii->ii_startoff + off / ndisk2;
1020 if (cbn % ii->ii_ndisk <= ccdisk)
1023 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1024 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1028 ci = &cs->sc_cinfo[ccdisk];
1031 * Convert cbn from a superblock to a normal block so it
1032 * can be used to calculate (along with cboff) the normal
1033 * block index into this particular disk.
1035 cbn *= cs->sc_ileave;
1039 * Fill in the component buf structure.
1041 cbp = getccdbuf(NULL);
1042 cbp->cb_buf.b_flags = bp->b_flags;
1043 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1044 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */
1045 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1046 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1047 cbp->cb_buf.b_data = addr;
1048 cbp->cb_buf.b_vp = ci->ci_vp;
1049 if (cs->sc_ileave == 0)
1050 cbc = dbtob((off_t)(ci->ci_size - cbn));
1052 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1053 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1054 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1057 * context for ccdiodone
1060 cbp->cb_unit = cs - ccd_softc;
1061 cbp->cb_comp = ci - cs->sc_cinfo;
1064 if (ccddebug & CCDB_IO)
1065 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1066 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1067 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1072 * Note: both I/O's setup when reading from mirror, but only one
1075 if (cs->sc_cflags & CCDF_MIRROR) {
1076 /* mirror, setup second I/O */
1077 cbp = getccdbuf(cb[0]);
1078 cbp->cb_buf.b_dev = ci2->ci_dev;
1079 cbp->cb_buf.b_vp = ci2->ci_vp;
1080 cbp->cb_comp = ci2 - cs->sc_cinfo;
1082 /* link together the ccdbuf's and clear "mirror done" flag */
1083 cb[0]->cb_mirror = cb[1];
1084 cb[1]->cb_mirror = cb[0];
1085 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1086 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1092 struct ccd_softc *cs;
1096 if (ccddebug & CCDB_FOLLOW)
1097 printf("ccdintr(%x, %x)\n", cs, bp);
1100 * Request is done for better or worse, wakeup the top half.
1102 if (bp->b_flags & B_ERROR)
1103 bp->b_resid = bp->b_bcount;
1104 devstat_end_transaction_buf(&cs->device_stats, bp);
1109 * Called at interrupt time.
1110 * Mark the component as done and if all components are done,
1111 * take a ccd interrupt.
1117 struct buf *bp = cbp->cb_obp;
1118 int unit = cbp->cb_unit;
1123 if (ccddebug & CCDB_FOLLOW)
1124 printf("ccdiodone(%x)\n", cbp);
1125 if (ccddebug & CCDB_IO) {
1126 printf("ccdiodone: bp %x bcount %d resid %d\n",
1127 bp, bp->b_bcount, bp->b_resid);
1128 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1129 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1130 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1131 cbp->cb_buf.b_bcount);
1135 * If an error occured, report it. If this is a mirrored
1136 * configuration and the first of two possible reads, do not
1137 * set the error in the bp yet because the second read may
1141 if (cbp->cb_buf.b_flags & B_ERROR) {
1142 const char *msg = "";
1144 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1145 (cbp->cb_buf.b_flags & B_READ) &&
1146 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1148 * We will try our read on the other disk down
1149 * below, also reverse the default pick so if we
1150 * are doing a scan we do not keep hitting the
1153 struct ccd_softc *cs = &ccd_softc[unit];
1155 msg = ", trying other disk";
1156 cs->sc_pick = 1 - cs->sc_pick;
1157 cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1159 bp->b_flags |= B_ERROR;
1160 bp->b_error = cbp->cb_buf.b_error ?
1161 cbp->cb_buf.b_error : EIO;
1163 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1164 unit, bp->b_error, cbp->cb_comp,
1165 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1169 * Process mirror. If we are writing, I/O has been initiated on both
1170 * buffers and we fall through only after both are finished.
1172 * If we are reading only one I/O is initiated at a time. If an
1173 * error occurs we initiate the second I/O and return, otherwise
1174 * we free the second I/O without initiating it.
1177 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1178 if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1180 * When writing, handshake with the second buffer
1181 * to determine when both are done. If both are not
1182 * done, return here.
1184 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1185 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1192 * When reading, either dispose of the second buffer
1193 * or initiate I/O on the second buffer if an error
1194 * occured with this one.
1196 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1197 if (cbp->cb_buf.b_flags & B_ERROR) {
1198 cbp->cb_mirror->cb_pflags |=
1201 cbp->cb_mirror->cb_buf.b_vp,
1202 &cbp->cb_mirror->cb_buf
1208 putccdbuf(cbp->cb_mirror);
1216 * use b_bufsize to determine how big the original request was rather
1217 * then b_bcount, because b_bcount may have been truncated for EOF.
1219 * XXX We check for an error, but we do not test the resid for an
1220 * aligned EOF condition. This may result in character & block
1221 * device access not recognizing EOF properly when read or written
1222 * sequentially, but will not effect filesystems.
1224 count = cbp->cb_buf.b_bufsize;
1228 * If all done, "interrupt".
1230 bp->b_resid -= count;
1231 if (bp->b_resid < 0)
1232 panic("ccdiodone: count");
1233 if (bp->b_resid == 0)
1234 ccdintr(&ccd_softc[unit], bp);
1239 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
1241 int unit = ccdunit(dev);
1242 int i, j, lookedup = 0, error = 0;
1244 struct ccd_softc *cs;
1245 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1246 struct ccddevice ccd;
1251 KKASSERT(td->td_proc != NULL);
1252 cred = td->td_proc->p_ucred;
1256 cs = &ccd_softc[unit];
1258 bzero(&ccd, sizeof(ccd));
1262 if (cs->sc_flags & CCDF_INITED)
1265 if ((flag & FWRITE) == 0)
1268 if ((error = ccdlock(cs)) != 0)
1271 if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1274 /* Fill in some important bits. */
1275 ccd.ccd_unit = unit;
1276 ccd.ccd_interleave = ccio->ccio_ileave;
1277 if (ccd.ccd_interleave == 0 &&
1278 ((ccio->ccio_flags & CCDF_MIRROR) ||
1279 (ccio->ccio_flags & CCDF_PARITY))) {
1280 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1281 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1283 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1284 (ccio->ccio_flags & CCDF_PARITY)) {
1285 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1286 ccio->ccio_flags &= ~CCDF_PARITY;
1288 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1289 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1290 printf("ccd%d: mirror/parity forces uniform flag\n",
1292 ccio->ccio_flags |= CCDF_UNIFORM;
1294 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1297 * Allocate space for and copy in the array of
1298 * componet pathnames and device numbers.
1300 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1301 M_DEVBUF, M_WAITOK);
1302 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1303 M_DEVBUF, M_WAITOK);
1305 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1306 ccio->ccio_ndisks * sizeof(char **));
1308 free(vpp, M_DEVBUF);
1309 free(cpp, M_DEVBUF);
1315 if (ccddebug & CCDB_INIT)
1316 for (i = 0; i < ccio->ccio_ndisks; ++i)
1317 printf("ccdioctl: component %d: 0x%x\n",
1321 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1323 if (ccddebug & CCDB_INIT)
1324 printf("ccdioctl: lookedup = %d\n", lookedup);
1326 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1327 for (j = 0; j < lookedup; ++j)
1328 (void)vn_close(vpp[j], FREAD|FWRITE, td);
1329 free(vpp, M_DEVBUF);
1330 free(cpp, M_DEVBUF);
1338 ccd.ccd_ndev = ccio->ccio_ndisks;
1341 * Initialize the ccd. Fills in the softc for us.
1343 if ((error = ccdinit(&ccd, cpp, td)) != 0) {
1344 for (j = 0; j < lookedup; ++j)
1345 (void)vn_close(vpp[j], FREAD|FWRITE, td);
1346 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1347 free(vpp, M_DEVBUF);
1348 free(cpp, M_DEVBUF);
1354 * The ccd has been successfully initialized, so
1355 * we can place it into the array and read the disklabel.
1357 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1358 ccio->ccio_unit = unit;
1359 ccio->ccio_size = cs->sc_size;
1360 ccdgetdisklabel(dev);
1367 if ((cs->sc_flags & CCDF_INITED) == 0)
1370 if ((flag & FWRITE) == 0)
1373 if ((error = ccdlock(cs)) != 0)
1376 /* Don't unconfigure if any other partitions are open */
1377 part = ccdpart(dev);
1378 pmask = (1 << part);
1379 if ((cs->sc_openmask & ~pmask)) {
1385 * Free ccd_softc information and clear entry.
1388 /* Close the components and free their pathnames. */
1389 for (i = 0; i < cs->sc_nccdisks; ++i) {
1391 * XXX: this close could potentially fail and
1392 * cause Bad Things. Maybe we need to force
1393 * the close to happen?
1396 if (ccddebug & CCDB_VNODE)
1397 vprint("CCDIOCCLR: vnode info",
1398 cs->sc_cinfo[i].ci_vp);
1400 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td);
1401 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1404 /* Free interleave index. */
1405 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1406 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1408 /* Free component info and interleave table. */
1409 free(cs->sc_cinfo, M_DEVBUF);
1410 free(cs->sc_itable, M_DEVBUF);
1411 cs->sc_flags &= ~CCDF_INITED;
1414 * Free ccddevice information and clear entry.
1416 free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1417 free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1419 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1422 * And remove the devstat entry.
1424 devstat_remove_entry(&cs->device_stats);
1426 /* This must be atomic. */
1429 bzero(cs, sizeof(struct ccd_softc));
1435 if ((cs->sc_flags & CCDF_INITED) == 0)
1438 *(struct disklabel *)data = cs->sc_label;
1442 if ((cs->sc_flags & CCDF_INITED) == 0)
1445 ((struct partinfo *)data)->disklab = &cs->sc_label;
1446 ((struct partinfo *)data)->part =
1447 &cs->sc_label.d_partitions[ccdpart(dev)];
1452 if ((cs->sc_flags & CCDF_INITED) == 0)
1455 if ((flag & FWRITE) == 0)
1458 if ((error = ccdlock(cs)) != 0)
1461 cs->sc_flags |= CCDF_LABELLING;
1463 error = setdisklabel(&cs->sc_label,
1464 (struct disklabel *)data, 0);
1466 if (cmd == DIOCWDINFO) {
1467 dev_t cdev = CCDLABELDEV(dev);
1468 error = writedisklabel(cdev, &cs->sc_label);
1472 cs->sc_flags &= ~CCDF_LABELLING;
1481 if ((cs->sc_flags & CCDF_INITED) == 0)
1484 if ((flag & FWRITE) == 0)
1486 if (*(int *)data != 0)
1487 cs->sc_flags |= CCDF_WLABEL;
1489 cs->sc_flags &= ~CCDF_WLABEL;
1502 struct ccd_softc *cs;
1505 if (ccdopen(dev, 0, S_IFCHR, curthread))
1508 cs = &ccd_softc[ccdunit(dev)];
1509 part = ccdpart(dev);
1511 if ((cs->sc_flags & CCDF_INITED) == 0)
1514 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1517 size = cs->sc_label.d_partitions[part].p_size;
1519 if (ccdclose(dev, 0, S_IFCHR, curthread))
1526 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize)
1528 /* Not implemented. */
1533 * Lookup the provided name in the filesystem. If the file exists,
1534 * is a valid block device, and isn't being used by anyone else,
1535 * set *vpp to the file's vnode.
1538 ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1540 struct nlookupdata nd;
1545 KKASSERT(td->td_proc);
1546 cred = td->td_proc->p_ucred;
1549 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1552 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1554 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1555 printf("ccdlookup: vn_open error = %d\n", error);
1561 if (vp->v_usecount > 1) {
1566 if (!vn_isdisk(vp, &error))
1570 if (ccddebug & CCDB_VNODE)
1571 vprint("ccdlookup: vnode info", vp);
1574 VOP_UNLOCK(vp, 0, td);
1575 nd.nl_open_vp = NULL;
1577 *vpp = vp; /* leave ref intact */
1585 * Read the disklabel from the ccd. If one is not present, fake one
1589 ccdgetdisklabel(dev)
1592 int unit = ccdunit(dev);
1593 struct ccd_softc *cs = &ccd_softc[unit];
1595 struct disklabel *lp = &cs->sc_label;
1596 struct ccdgeom *ccg = &cs->sc_geom;
1599 bzero(lp, sizeof(*lp));
1601 lp->d_secperunit = cs->sc_size;
1602 lp->d_secsize = ccg->ccg_secsize;
1603 lp->d_nsectors = ccg->ccg_nsectors;
1604 lp->d_ntracks = ccg->ccg_ntracks;
1605 lp->d_ncylinders = ccg->ccg_ncylinders;
1606 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1608 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1609 lp->d_type = DTYPE_CCD;
1610 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1612 lp->d_interleave = 1;
1615 lp->d_partitions[RAW_PART].p_offset = 0;
1616 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1617 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1618 lp->d_npartitions = RAW_PART + 1;
1620 lp->d_bbsize = BBSIZE; /* XXX */
1621 lp->d_sbsize = SBSIZE; /* XXX */
1623 lp->d_magic = DISKMAGIC;
1624 lp->d_magic2 = DISKMAGIC;
1625 lp->d_checksum = dkcksum(&cs->sc_label);
1628 * Call the generic disklabel extraction routine.
1630 cdev = CCDLABELDEV(dev);
1631 errstring = readdisklabel(cdev, &cs->sc_label);
1632 if (errstring != NULL)
1633 ccdmakedisklabel(cs);
1636 /* It's actually extremely common to have unlabeled ccds. */
1637 if (ccddebug & CCDB_LABEL)
1638 if (errstring != NULL)
1639 printf("ccd%d: %s\n", unit, errstring);
1644 * Take care of things one might want to take care of in the event
1645 * that a disklabel isn't present.
1648 ccdmakedisklabel(cs)
1649 struct ccd_softc *cs;
1651 struct disklabel *lp = &cs->sc_label;
1654 * For historical reasons, if there's no disklabel present
1655 * the raw partition must be marked FS_BSDFFS.
1657 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1659 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1663 * Wait interruptibly for an exclusive lock.
1666 * Several drivers do this; it should be abstracted and made MP-safe.
1670 struct ccd_softc *cs;
1674 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1675 cs->sc_flags |= CCDF_WANTED;
1676 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1679 cs->sc_flags |= CCDF_LOCKED;
1684 * Unlock and wake up any waiters.
1688 struct ccd_softc *cs;
1691 cs->sc_flags &= ~CCDF_LOCKED;
1692 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1693 cs->sc_flags &= ~CCDF_WANTED;
1701 struct ccdiinfo *ii;
1705 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1706 printf(" itab[%d]: #dk %d sblk %d soff %d",
1707 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1708 for (i = 0; i < ii->ii_ndisk; i++)
1709 printf(" %d", ii->ii_index[i]);
1716 /* Local Variables: */
1717 /* c-argdecl-indent: 8 */
1718 /* c-continued-statement-offset: 8 */
1719 /* c-indent-level: 8 */