Use SYSREF to reference count struct vnode. v_usecount is now
[dragonfly.git] / sys / dev / disk / ccd / ccd.c
CommitLineData
984263bc 1/* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
3c37c940 2/* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.39 2007/05/06 19:23:21 dillon Exp $ */
984263bc
MD
3
4/* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
5
6/*
7 * Copyright (c) 1995 Jason R. Thorpe.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project
21 * by Jason R. Thorpe.
22 * 4. The name of the author may not be used to endorse or promote products
23 * derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80/*
81 * "Concatenated" disk driver.
82 *
83 * Dynamic configuration and disklabel support by:
84 * Jason R. Thorpe <thorpej@nas.nasa.gov>
85 * Numerical Aerodynamic Simulation Facility
86 * Mail Stop 258-6
87 * NASA Ames Research Center
88 * Moffett Field, CA 94035
89 */
90
1f2de5d4 91#include "use_ccd.h"
984263bc
MD
92
93#include <sys/param.h>
94#include <sys/systm.h>
95#include <sys/kernel.h>
96#include <sys/module.h>
97#include <sys/proc.h>
98#include <sys/buf.h>
99#include <sys/malloc.h>
fad57d0e 100#include <sys/nlookup.h>
984263bc
MD
101#include <sys/conf.h>
102#include <sys/stat.h>
103#include <sys/sysctl.h>
104#include <sys/disklabel.h>
984263bc
MD
105#include <sys/devicestat.h>
106#include <sys/fcntl.h>
107#include <sys/vnode.h>
3020e3be 108#include <sys/buf2.h>
984263bc
MD
109#include <sys/ccdvar.h>
110
111#include <vm/vm_zone.h>
112
50e58362
MD
113#include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */
114#include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */
115
116#include <sys/thread2.h>
117
984263bc
MD
118#if defined(CCDDEBUG) && !defined(DEBUG)
119#define DEBUG
120#endif
121
122#ifdef DEBUG
123#define CCDB_FOLLOW 0x01
124#define CCDB_INIT 0x02
125#define CCDB_IO 0x04
126#define CCDB_LABEL 0x08
127#define CCDB_VNODE 0x10
128static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
129 CCDB_VNODE;
130SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
131#undef DEBUG
132#endif
133
134#define ccdunit(x) dkunit(x)
135#define ccdpart(x) dkpart(x)
136
137/*
138 This is how mirroring works (only writes are special):
139
140 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
141 linked together by the cb_mirror field. "cb_pflags &
142 CCDPF_MIRROR_DONE" is set to 0 on both of them.
143
144 When a component returns to ccdiodone(), it checks if "cb_pflags &
145 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
146 flag and returns. If it is, it means its partner has already
147 returned, so it will go to the regular cleanup.
148
149 */
150
151struct ccdbuf {
152 struct buf cb_buf; /* new I/O buf */
a8f169e2 153 struct vnode *cb_vp; /* related vnode */
81b5c339 154 struct bio *cb_obio; /* ptr. to original I/O buf */
984263bc
MD
155 struct ccdbuf *cb_freenext; /* free list link */
156 int cb_unit; /* target unit */
157 int cb_comp; /* target component */
158 int cb_pflags; /* mirror/parity status flag */
159 struct ccdbuf *cb_mirror; /* mirror counterpart */
160};
161
162/* bits in cb_pflags */
163#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
164
165#define CCDLABELDEV(dev) \
e4c9c0c8 166 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
984263bc
MD
167
168static d_open_t ccdopen;
169static d_close_t ccdclose;
170static d_strategy_t ccdstrategy;
171static d_ioctl_t ccdioctl;
172static d_dump_t ccddump;
173static d_psize_t ccdsize;
174
175#define NCCDFREEHIWAT 16
176
177#define CDEV_MAJOR 74
984263bc 178
fef8985e
MD
179static struct dev_ops ccd_ops = {
180 { "ccd", CDEV_MAJOR, D_DISK },
181 .d_open = ccdopen,
182 .d_close = ccdclose,
183 .d_read = physread,
184 .d_write = physwrite,
185 .d_ioctl = ccdioctl,
186 .d_strategy = ccdstrategy,
187 .d_dump = ccddump,
188 .d_psize = ccdsize
984263bc
MD
189};
190
191/* called during module initialization */
38e94a25
RG
192static void ccdattach (void);
193static int ccd_modevent (module_t, int, void *);
984263bc
MD
194
195/* called by biodone() at interrupt time */
81b5c339 196static void ccdiodone (struct bio *bio);
38e94a25 197
81b5c339 198static void ccdstart (struct ccd_softc *, struct bio *);
38e94a25 199static void ccdinterleave (struct ccd_softc *, int);
81b5c339 200static void ccdintr (struct ccd_softc *, struct bio *);
fef8985e
MD
201static int ccdinit (struct ccddevice *, char **, struct ucred *);
202static int ccdlookup (char *, struct vnode **);
38e94a25 203static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
54078292 204 struct bio *, off_t, caddr_t, long);
b13267a5 205static void ccdgetdisklabel (cdev_t);
38e94a25
RG
206static void ccdmakedisklabel (struct ccd_softc *);
207static int ccdlock (struct ccd_softc *);
208static void ccdunlock (struct ccd_softc *);
984263bc
MD
209
210#ifdef DEBUG
38e94a25 211static void printiinfo (struct ccdiinfo *);
984263bc
MD
212#endif
213
214/* Non-private for the benefit of libkvm. */
215struct ccd_softc *ccd_softc;
216struct ccddevice *ccddevs;
217struct ccdbuf *ccdfreebufs;
218static int numccdfreebufs;
219static int numccd = 0;
220
221/*
222 * getccdbuf() - Allocate and zero a ccd buffer.
223 *
224 * This routine is called at splbio().
225 */
226
227static __inline
228struct ccdbuf *
4483eb0d 229getccdbuf(void)
984263bc
MD
230{
231 struct ccdbuf *cbp;
232
233 /*
234 * Allocate from freelist or malloc as necessary
235 */
236 if ((cbp = ccdfreebufs) != NULL) {
237 ccdfreebufs = cbp->cb_freenext;
238 --numccdfreebufs;
81b5c339 239 reinitbufbio(&cbp->cb_buf);
984263bc 240 } else {
efda3bd0 241 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO);
81b5c339 242 initbufbio(&cbp->cb_buf);
984263bc
MD
243 }
244
984263bc
MD
245 /*
246 * independant struct buf initialization
247 */
248 LIST_INIT(&cbp->cb_buf.b_dep);
249 BUF_LOCKINIT(&cbp->cb_buf);
250 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
251 BUF_KERNPROC(&cbp->cb_buf);
4414f2c9 252 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
984263bc
MD
253
254 return(cbp);
255}
256
257/*
258 * putccdbuf() - Free a ccd buffer.
259 *
260 * This routine is called at splbio().
261 */
262
263static __inline
264void
265putccdbuf(struct ccdbuf *cbp)
266{
267 BUF_UNLOCK(&cbp->cb_buf);
268 BUF_LOCKFREE(&cbp->cb_buf);
269
270 if (numccdfreebufs < NCCDFREEHIWAT) {
271 cbp->cb_freenext = ccdfreebufs;
272 ccdfreebufs = cbp;
273 ++numccdfreebufs;
274 } else {
efda3bd0 275 kfree((caddr_t)cbp, M_DEVBUF);
984263bc
MD
276 }
277}
278
279
280/*
281 * Number of blocks to untouched in front of a component partition.
282 * This is to avoid violating its disklabel area when it starts at the
283 * beginning of the slice.
284 */
285#if !defined(CCD_OFFSET)
286#define CCD_OFFSET 16
287#endif
288
289/*
290 * Called by main() during pseudo-device attachment. All we need
291 * to do is allocate enough space for devices to be configured later, and
292 * add devsw entries.
293 */
294static void
c436375a 295ccdattach(void)
984263bc
MD
296{
297 int i;
298 int num = NCCD;
299
300 if (num > 1)
e3869ec7 301 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
984263bc 302 else
e3869ec7 303 kprintf("ccd0: Concatenated disk driver\n");
984263bc 304
efda3bd0 305 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
3aed1355 306 M_WAITOK | M_ZERO);
efda3bd0 307 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
3aed1355 308 M_WAITOK | M_ZERO);
984263bc 309 numccd = num;
984263bc 310
fef8985e 311 dev_ops_add(&ccd_ops, 0, 0);
984263bc
MD
312 /* XXX: is this necessary? */
313 for (i = 0; i < numccd; ++i)
314 ccddevs[i].ccd_dk = -1;
315}
316
317static int
c436375a 318ccd_modevent(module_t mod, int type, void *data)
984263bc
MD
319{
320 int error = 0;
321
322 switch (type) {
323 case MOD_LOAD:
324 ccdattach();
325 break;
326
327 case MOD_UNLOAD:
e3869ec7 328 kprintf("ccd0: Unload not supported!\n");
984263bc
MD
329 error = EOPNOTSUPP;
330 break;
331
332 default: /* MOD_SHUTDOWN etc */
333 break;
334 }
335 return (error);
336}
337
338DEV_MODULE(ccd, ccd_modevent, NULL);
339
340static int
fef8985e 341ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
984263bc
MD
342{
343 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
344 struct ccdcinfo *ci = NULL; /* XXX */
345 size_t size;
346 int ix;
347 struct vnode *vp;
348 size_t minsize;
349 int maxsecsize;
350 struct partinfo dpart;
351 struct ccdgeom *ccg = &cs->sc_geom;
352 char tmppath[MAXPATHLEN];
353 int error = 0;
354
355#ifdef DEBUG
356 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 357 kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
984263bc
MD
358#endif
359
360 cs->sc_size = 0;
361 cs->sc_ileave = ccd->ccd_interleave;
362 cs->sc_nccdisks = ccd->ccd_ndev;
363
364 /* Allocate space for the component info. */
77652cad 365 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
984263bc
MD
366 M_DEVBUF, M_WAITOK);
367
368 /*
369 * Verify that each component piece exists and record
370 * relevant information about it.
371 */
372 maxsecsize = 0;
373 minsize = 0;
374 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
375 vp = ccd->ccd_vpp[ix];
376 ci = &cs->sc_cinfo[ix];
377 ci->ci_vp = vp;
378
379 /*
380 * Copy in the pathname of the component.
381 */
382 bzero(tmppath, sizeof(tmppath)); /* sanity */
383 if ((error = copyinstr(cpaths[ix], tmppath,
384 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
385#ifdef DEBUG
386 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 387 kprintf("ccd%d: can't copy path, error = %d\n",
984263bc
MD
388 ccd->ccd_unit, error);
389#endif
390 goto fail;
391 }
efda3bd0 392 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
984263bc
MD
393 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
394
395 ci->ci_dev = vn_todev(vp);
396
397 /*
398 * Get partition information for the component.
399 */
400 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
87de5057 401 FREAD, cred)) != 0) {
984263bc
MD
402#ifdef DEBUG
403 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 404 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
984263bc
MD
405 ccd->ccd_unit, ci->ci_path, error);
406#endif
407 goto fail;
408 }
409 if (dpart.part->p_fstype == FS_BSDFFS) {
410 maxsecsize =
411 ((dpart.disklab->d_secsize > maxsecsize) ?
412 dpart.disklab->d_secsize : maxsecsize);
413 size = dpart.part->p_size - CCD_OFFSET;
414 } else {
415#ifdef DEBUG
416 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 417 kprintf("ccd%d: %s: incorrect partition type\n",
984263bc
MD
418 ccd->ccd_unit, ci->ci_path);
419#endif
420 error = EFTYPE;
421 goto fail;
422 }
423
424 /*
425 * Calculate the size, truncating to an interleave
426 * boundary if necessary.
427 */
428
429 if (cs->sc_ileave > 1)
430 size -= size % cs->sc_ileave;
431
432 if (size == 0) {
433#ifdef DEBUG
434 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 435 kprintf("ccd%d: %s: size == 0\n",
984263bc
MD
436 ccd->ccd_unit, ci->ci_path);
437#endif
438 error = ENODEV;
439 goto fail;
440 }
441
442 if (minsize == 0 || size < minsize)
443 minsize = size;
444 ci->ci_size = size;
445 cs->sc_size += size;
446 }
447
448 /*
449 * Don't allow the interleave to be smaller than
450 * the biggest component sector.
451 */
452 if ((cs->sc_ileave > 0) &&
453 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
454#ifdef DEBUG
455 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 456 kprintf("ccd%d: interleave must be at least %d\n",
984263bc
MD
457 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
458#endif
459 error = EINVAL;
460 goto fail;
461 }
462
463 /*
464 * If uniform interleave is desired set all sizes to that of
465 * the smallest component. This will guarentee that a single
466 * interleave table is generated.
467 *
468 * Lost space must be taken into account when calculating the
469 * overall size. Half the space is lost when CCDF_MIRROR is
470 * specified. One disk is lost when CCDF_PARITY is specified.
471 */
472 if (ccd->ccd_flags & CCDF_UNIFORM) {
473 for (ci = cs->sc_cinfo;
474 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
475 ci->ci_size = minsize;
476 }
477 if (ccd->ccd_flags & CCDF_MIRROR) {
478 /*
479 * Check to see if an even number of components
480 * have been specified. The interleave must also
481 * be non-zero in order for us to be able to
482 * guarentee the topology.
483 */
484 if (cs->sc_nccdisks % 2) {
e3869ec7 485 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
984263bc
MD
486 error = EINVAL;
487 goto fail;
488 }
489 if (cs->sc_ileave == 0) {
e3869ec7 490 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
984263bc
MD
491 error = EINVAL;
492 goto fail;
493 }
494 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
495 } else if (ccd->ccd_flags & CCDF_PARITY) {
496 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
497 } else {
498 if (cs->sc_ileave == 0) {
e3869ec7 499 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
984263bc
MD
500 error = EINVAL;
501 goto fail;
502 }
503 cs->sc_size = cs->sc_nccdisks * minsize;
504 }
505 }
506
507 /*
508 * Construct the interleave table.
509 */
510 ccdinterleave(cs, ccd->ccd_unit);
511
512 /*
513 * Create pseudo-geometry based on 1MB cylinders. It's
514 * pretty close.
515 */
516 ccg->ccg_secsize = maxsecsize;
517 ccg->ccg_ntracks = 1;
518 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
519 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
520
521 /*
522 * Add an devstat entry for this device.
523 */
524 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
525 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
526 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
527 DEVSTAT_PRIORITY_ARRAY);
528
529 cs->sc_flags |= CCDF_INITED;
530 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
531 cs->sc_unit = ccd->ccd_unit;
532 return (0);
533fail:
534 while (ci > cs->sc_cinfo) {
535 ci--;
efda3bd0 536 kfree(ci->ci_path, M_DEVBUF);
984263bc 537 }
efda3bd0 538 kfree(cs->sc_cinfo, M_DEVBUF);
984263bc
MD
539 return (error);
540}
541
542static void
c436375a 543ccdinterleave(struct ccd_softc *cs, int unit)
984263bc
MD
544{
545 struct ccdcinfo *ci, *smallci;
546 struct ccdiinfo *ii;
547 daddr_t bn, lbn;
548 int ix;
549 u_long size;
550
551#ifdef DEBUG
552 if (ccddebug & CCDB_INIT)
e3869ec7 553 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
984263bc
MD
554#endif
555
556 /*
557 * Allocate an interleave table. The worst case occurs when each
558 * of N disks is of a different size, resulting in N interleave
559 * tables.
560 *
561 * Chances are this is too big, but we don't care.
562 */
563 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
efda3bd0 564 cs->sc_itable = (struct ccdiinfo *)kmalloc(size, M_DEVBUF, M_WAITOK);
984263bc
MD
565 bzero((caddr_t)cs->sc_itable, size);
566
567 /*
568 * Trivial case: no interleave (actually interleave of disk size).
569 * Each table entry represents a single component in its entirety.
570 *
571 * An interleave of 0 may not be used with a mirror or parity setup.
572 */
573 if (cs->sc_ileave == 0) {
574 bn = 0;
575 ii = cs->sc_itable;
576
577 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
578 /* Allocate space for ii_index. */
efda3bd0 579 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
984263bc
MD
580 ii->ii_ndisk = 1;
581 ii->ii_startblk = bn;
582 ii->ii_startoff = 0;
583 ii->ii_index[0] = ix;
584 bn += cs->sc_cinfo[ix].ci_size;
585 ii++;
586 }
587 ii->ii_ndisk = 0;
588#ifdef DEBUG
589 if (ccddebug & CCDB_INIT)
590 printiinfo(cs->sc_itable);
591#endif
592 return;
593 }
594
595 /*
596 * The following isn't fast or pretty; it doesn't have to be.
597 */
598 size = 0;
599 bn = lbn = 0;
600 for (ii = cs->sc_itable; ; ii++) {
601 /*
602 * Allocate space for ii_index. We might allocate more then
603 * we use.
604 */
77652cad 605 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
984263bc
MD
606 M_DEVBUF, M_WAITOK);
607
608 /*
609 * Locate the smallest of the remaining components
610 */
611 smallci = NULL;
612 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
613 ci++) {
614 if (ci->ci_size > size &&
615 (smallci == NULL ||
616 ci->ci_size < smallci->ci_size)) {
617 smallci = ci;
618 }
619 }
620
621 /*
622 * Nobody left, all done
623 */
624 if (smallci == NULL) {
625 ii->ii_ndisk = 0;
626 break;
627 }
628
629 /*
630 * Record starting logical block using an sc_ileave blocksize.
631 */
632 ii->ii_startblk = bn / cs->sc_ileave;
633
634 /*
635 * Record starting comopnent block using an sc_ileave
636 * blocksize. This value is relative to the beginning of
637 * a component disk.
638 */
639 ii->ii_startoff = lbn;
640
641 /*
642 * Determine how many disks take part in this interleave
643 * and record their indices.
644 */
645 ix = 0;
646 for (ci = cs->sc_cinfo;
647 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
648 if (ci->ci_size >= smallci->ci_size) {
649 ii->ii_index[ix++] = ci - cs->sc_cinfo;
650 }
651 }
652 ii->ii_ndisk = ix;
653 bn += ix * (smallci->ci_size - size);
654 lbn = smallci->ci_size / cs->sc_ileave;
655 size = smallci->ci_size;
656 }
657#ifdef DEBUG
658 if (ccddebug & CCDB_INIT)
659 printiinfo(cs->sc_itable);
660#endif
661}
662
663/* ARGSUSED */
664static int
fef8985e 665ccdopen(struct dev_open_args *ap)
984263bc 666{
b13267a5 667 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
668 int unit = ccdunit(dev);
669 struct ccd_softc *cs;
670 struct disklabel *lp;
671 int error = 0, part, pmask;
672
673#ifdef DEBUG
674 if (ccddebug & CCDB_FOLLOW)
e3869ec7 675 kprintf("ccdopen(%x, %x)\n", dev, flags);
984263bc
MD
676#endif
677 if (unit >= numccd)
678 return (ENXIO);
679 cs = &ccd_softc[unit];
680
681 if ((error = ccdlock(cs)) != 0)
682 return (error);
683
684 lp = &cs->sc_label;
685
686 part = ccdpart(dev);
687 pmask = (1 << part);
688
689 /*
690 * If we're initialized, check to see if there are any other
691 * open partitions. If not, then it's safe to update
692 * the in-core disklabel.
693 */
694 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
695 ccdgetdisklabel(dev);
696
697 /* Check that the partition exists. */
698 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
699 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
700 error = ENXIO;
701 goto done;
702 }
703
704 cs->sc_openmask |= pmask;
705 done:
706 ccdunlock(cs);
707 return (0);
708}
709
710/* ARGSUSED */
711static int
fef8985e 712ccdclose(struct dev_close_args *ap)
984263bc 713{
b13267a5 714 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
715 int unit = ccdunit(dev);
716 struct ccd_softc *cs;
717 int error = 0, part;
718
719#ifdef DEBUG
720 if (ccddebug & CCDB_FOLLOW)
e3869ec7 721 kprintf("ccdclose(%x, %x)\n", dev, flags);
984263bc
MD
722#endif
723
724 if (unit >= numccd)
725 return (ENXIO);
726 cs = &ccd_softc[unit];
727
728 if ((error = ccdlock(cs)) != 0)
729 return (error);
730
731 part = ccdpart(dev);
732
733 /* ...that much closer to allowing unconfiguration... */
734 cs->sc_openmask &= ~(1 << part);
735 ccdunlock(cs);
736 return (0);
737}
738
fef8985e
MD
739static int
740ccdstrategy(struct dev_strategy_args *ap)
984263bc 741{
b13267a5 742 cdev_t dev = ap->a_head.a_dev;
fef8985e 743 struct bio *bio = ap->a_bio;
81b5c339
MD
744 int unit = ccdunit(dev);
745 struct bio *nbio;
746 struct buf *bp = bio->bio_buf;
984263bc 747 struct ccd_softc *cs = &ccd_softc[unit];
984263bc
MD
748 int wlabel;
749 struct disklabel *lp;
750
751#ifdef DEBUG
752 if (ccddebug & CCDB_FOLLOW)
e3869ec7 753 kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
984263bc
MD
754#endif
755 if ((cs->sc_flags & CCDF_INITED) == 0) {
756 bp->b_error = ENXIO;
4414f2c9 757 goto error;
984263bc
MD
758 }
759
760 /* If it's a nil transfer, wake up the top half now. */
4414f2c9
MD
761 if (bp->b_bcount == 0) {
762 bp->b_resid = 0;
984263bc 763 goto done;
4414f2c9 764 }
984263bc
MD
765
766 lp = &cs->sc_label;
767
768 /*
769 * Do bounds checking and adjust transfer. If there's an
770 * error, the bounds check will flag that for us.
771 */
772 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
81b5c339
MD
773 if (ccdpart(dev) != RAW_PART) {
774 nbio = bounds_check_with_label(dev, bio, lp, wlabel);
775 if (nbio == NULL)
984263bc
MD
776 goto done;
777 } else {
778 int pbn; /* in sc_secsize chunks */
779 long sz; /* in sc_secsize chunks */
780
54078292 781 pbn = (int)(bio->bio_offset / cs->sc_geom.ccg_secsize);
984263bc
MD
782 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
783
784 /*
4414f2c9
MD
785 * If out of bounds return an error. If the request goes
786 * past EOF, clip the request as appropriate. If exactly
787 * at EOF, return success (don't clip), but with 0 bytes
788 * of I/O.
789 *
790 * Mark EOF B_INVAL (just like bad), indicating that the
791 * contents of the buffer, if any, is invalid.
984263bc 792 */
4414f2c9
MD
793 if (pbn < 0)
794 goto bad;
984263bc 795 if (pbn + sz > cs->sc_size) {
4414f2c9
MD
796 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
797 goto bad;
798 if (pbn == cs->sc_size) {
799 bp->b_resid = bp->b_bcount;
800 bp->b_flags |= B_INVAL;
801 goto done;
802 }
803 sz = cs->sc_size - pbn;
804 bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
984263bc 805 }
81b5c339 806 nbio = bio;
984263bc
MD
807 }
808
809 bp->b_resid = bp->b_bcount;
81b5c339 810 nbio->bio_driver_info = dev;
984263bc
MD
811
812 /*
813 * "Start" the unit.
814 */
abe2ad7c 815 crit_enter();
81b5c339 816 ccdstart(cs, nbio);
abe2ad7c 817 crit_exit();
fef8985e 818 return(0);
81b5c339
MD
819
820 /*
821 * note: bio, not nbio, is valid at the done label.
822 */
4414f2c9
MD
823bad:
824 bp->b_error = EINVAL;
825error:
826 bp->b_resid = bp->b_bcount;
827 bp->b_flags |= B_ERROR | B_INVAL;
984263bc 828done:
81b5c339 829 biodone(bio);
fef8985e 830 return(0);
984263bc
MD
831}
832
833static void
81b5c339 834ccdstart(struct ccd_softc *cs, struct bio *bio)
984263bc
MD
835{
836 long bcount, rcount;
837 struct ccdbuf *cbp[4];
81b5c339 838 struct buf *bp = bio->bio_buf;
b13267a5 839 cdev_t dev = bio->bio_driver_info;
984263bc
MD
840 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
841 caddr_t addr;
54078292 842 off_t doffset;
984263bc
MD
843 struct partition *pp;
844
845#ifdef DEBUG
846 if (ccddebug & CCDB_FOLLOW)
e3869ec7 847 kprintf("ccdstart(%x, %x)\n", cs, bp);
984263bc
MD
848#endif
849
850 /* Record the transaction start */
851 devstat_start_transaction(&cs->device_stats);
852
853 /*
854 * Translate the partition-relative block number to an absolute.
855 */
54078292 856 doffset = bio->bio_offset;
81b5c339
MD
857 if (ccdpart(dev) != RAW_PART) {
858 pp = &cs->sc_label.d_partitions[ccdpart(dev)];
54078292 859 doffset += pp->p_offset * cs->sc_label.d_secsize;
984263bc
MD
860 }
861
862 /*
863 * Allocate component buffers and fire off the requests
864 */
865 addr = bp->b_data;
866 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
54078292 867 ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
984263bc
MD
868 rcount = cbp[0]->cb_buf.b_bcount;
869
870 if (cs->sc_cflags & CCDF_MIRROR) {
871 /*
872 * Mirroring. Writes go to both disks, reads are
873 * taken from whichever disk seems most appropriate.
874 *
875 * We attempt to localize reads to the disk whos arm
876 * is nearest the read request. We ignore seeks due
877 * to writes when making this determination and we
878 * also try to avoid hogging.
879 */
10f3fee5 880 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
a8f169e2
MD
881 vn_strategy(cbp[0]->cb_vp,
882 &cbp[0]->cb_buf.b_bio1);
883 vn_strategy(cbp[1]->cb_vp,
884 &cbp[1]->cb_buf.b_bio1);
984263bc
MD
885 } else {
886 int pick = cs->sc_pick;
54078292 887 daddr_t range = cs->sc_size / 16 * cs->sc_label.d_secsize;
984263bc 888
54078292
MD
889 if (doffset < cs->sc_blk[pick] - range ||
890 doffset > cs->sc_blk[pick] + range
984263bc
MD
891 ) {
892 cs->sc_pick = pick = 1 - pick;
893 }
54078292 894 cs->sc_blk[pick] = doffset + rcount;
a8f169e2
MD
895 vn_strategy(cbp[pick]->cb_vp,
896 &cbp[pick]->cb_buf.b_bio1);
984263bc
MD
897 }
898 } else {
899 /*
900 * Not mirroring
901 */
a8f169e2 902 vn_strategy(cbp[0]->cb_vp,
81b5c339 903 &cbp[0]->cb_buf.b_bio1);
984263bc 904 }
54078292 905 doffset += rcount;
984263bc
MD
906 addr += rcount;
907 }
908}
909
910/*
911 * Build a component buffer header.
912 */
913static void
54078292
MD
914ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
915 off_t doffset, caddr_t addr, long bcount)
984263bc
MD
916{
917 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
918 struct ccdbuf *cbp;
54078292 919 daddr_t bn, cbn, cboff;
984263bc
MD
920 off_t cbc;
921
922#ifdef DEBUG
923 if (ccddebug & CCDB_IO)
e3869ec7 924 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
984263bc
MD
925 cs, bp, bn, addr, bcount);
926#endif
927 /*
928 * Determine which component bn falls in.
929 */
54078292 930 bn = (daddr_t)(doffset / cs->sc_geom.ccg_secsize);
984263bc
MD
931 cbn = bn;
932 cboff = 0;
933
934 if (cs->sc_ileave == 0) {
935 /*
936 * Serially concatenated and neither a mirror nor a parity
937 * config. This is a special case.
938 */
939 daddr_t sblk;
940
941 sblk = 0;
942 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
943 sblk += ci->ci_size;
944 cbn -= sblk;
945 } else {
946 struct ccdiinfo *ii;
947 int ccdisk, off;
948
949 /*
950 * Calculate cbn, the logical superblock (sc_ileave chunks),
951 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
952 * to cbn.
953 */
954 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
955 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
956
957 /*
958 * Figure out which interleave table to use.
959 */
960 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
961 if (ii->ii_startblk > cbn)
962 break;
963 }
964 ii--;
965
966 /*
967 * off is the logical superblock relative to the beginning
968 * of this interleave block.
969 */
970 off = cbn - ii->ii_startblk;
971
972 /*
973 * We must calculate which disk component to use (ccdisk),
974 * and recalculate cbn to be the superblock relative to
975 * the beginning of the component. This is typically done by
976 * adding 'off' and ii->ii_startoff together. However, 'off'
977 * must typically be divided by the number of components in
978 * this interleave array to be properly convert it from a
979 * CCD-relative logical superblock number to a
980 * component-relative superblock number.
981 */
982 if (ii->ii_ndisk == 1) {
983 /*
984 * When we have just one disk, it can't be a mirror
985 * or a parity config.
986 */
987 ccdisk = ii->ii_index[0];
988 cbn = ii->ii_startoff + off;
989 } else {
990 if (cs->sc_cflags & CCDF_MIRROR) {
991 /*
992 * We have forced a uniform mapping, resulting
993 * in a single interleave array. We double
994 * up on the first half of the available
995 * components and our mirror is in the second
996 * half. This only works with a single
997 * interleave array because doubling up
998 * doubles the number of sectors, so there
999 * cannot be another interleave array because
1000 * the next interleave array's calculations
1001 * would be off.
1002 */
1003 int ndisk2 = ii->ii_ndisk / 2;
1004 ccdisk = ii->ii_index[off % ndisk2];
1005 cbn = ii->ii_startoff + off / ndisk2;
1006 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1007 } else if (cs->sc_cflags & CCDF_PARITY) {
1008 /*
1009 * XXX not implemented yet
1010 */
1011 int ndisk2 = ii->ii_ndisk - 1;
1012 ccdisk = ii->ii_index[off % ndisk2];
1013 cbn = ii->ii_startoff + off / ndisk2;
1014 if (cbn % ii->ii_ndisk <= ccdisk)
1015 ccdisk++;
1016 } else {
1017 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1018 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1019 }
1020 }
1021
1022 ci = &cs->sc_cinfo[ccdisk];
1023
1024 /*
1025 * Convert cbn from a superblock to a normal block so it
1026 * can be used to calculate (along with cboff) the normal
1027 * block index into this particular disk.
1028 */
1029 cbn *= cs->sc_ileave;
1030 }
1031
1032 /*
1033 * Fill in the component buf structure.
9a71d53f
MD
1034 *
1035 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1036 * will be truncated on device EOF so we use b_bufsize to detect
1037 * the case.
984263bc 1038 */
4483eb0d 1039 cbp = getccdbuf();
c8bcf978 1040 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
4414f2c9 1041 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
984263bc 1042 cbp->cb_buf.b_data = addr;
a8f169e2 1043 cbp->cb_vp = ci->ci_vp;
984263bc
MD
1044 if (cs->sc_ileave == 0)
1045 cbc = dbtob((off_t)(ci->ci_size - cbn));
1046 else
1047 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1048 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1049 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1050
81b5c339
MD
1051 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1052 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
81b5c339
MD
1053 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1054
984263bc
MD
1055 /*
1056 * context for ccdiodone
1057 */
81b5c339 1058 cbp->cb_obio = bio;
984263bc
MD
1059 cbp->cb_unit = cs - ccd_softc;
1060 cbp->cb_comp = ci - cs->sc_cinfo;
1061
1062#ifdef DEBUG
1063 if (ccddebug & CCDB_IO)
e3869ec7 1064 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
81b5c339 1065 ci->ci_dev, ci-cs->sc_cinfo, cbp,
54078292 1066 cbp->cb_buf.b_bio1.bio_offset,
984263bc
MD
1067 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1068#endif
1069 cb[0] = cbp;
1070
1071 /*
1072 * Note: both I/O's setup when reading from mirror, but only one
1073 * will be executed.
1074 */
1075 if (cs->sc_cflags & CCDF_MIRROR) {
1076 /* mirror, setup second I/O */
4483eb0d
MD
1077 cbp = getccdbuf();
1078
c8bcf978 1079 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
4414f2c9 1080 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
4483eb0d 1081 cbp->cb_buf.b_data = addr;
a8f169e2 1082 cbp->cb_vp = ci2->ci_vp;
4483eb0d
MD
1083 if (cs->sc_ileave == 0)
1084 cbc = dbtob((off_t)(ci->ci_size - cbn));
1085 else
1086 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1087 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1088 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1089
1090 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1091 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
4483eb0d
MD
1092 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1093
1094 /*
1095 * context for ccdiodone
1096 */
1097 cbp->cb_obio = bio;
1098 cbp->cb_unit = cs - ccd_softc;
984263bc
MD
1099 cbp->cb_comp = ci2 - cs->sc_cinfo;
1100 cb[1] = cbp;
1101 /* link together the ccdbuf's and clear "mirror done" flag */
1102 cb[0]->cb_mirror = cb[1];
1103 cb[1]->cb_mirror = cb[0];
1104 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1105 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1106 }
1107}
1108
1109static void
81b5c339 1110ccdintr(struct ccd_softc *cs, struct bio *bio)
984263bc 1111{
81b5c339
MD
1112 struct buf *bp = bio->bio_buf;
1113
984263bc
MD
1114#ifdef DEBUG
1115 if (ccddebug & CCDB_FOLLOW)
e3869ec7 1116 kprintf("ccdintr(%x, %x)\n", cs, bp);
984263bc
MD
1117#endif
1118 /*
1119 * Request is done for better or worse, wakeup the top half.
1120 */
1121 if (bp->b_flags & B_ERROR)
1122 bp->b_resid = bp->b_bcount;
1123 devstat_end_transaction_buf(&cs->device_stats, bp);
81b5c339 1124 biodone(bio);
984263bc
MD
1125}
1126
1127/*
1128 * Called at interrupt time.
1129 * Mark the component as done and if all components are done,
1130 * take a ccd interrupt.
1131 */
1132static void
81b5c339 1133ccdiodone(struct bio *bio)
984263bc 1134{
81b5c339
MD
1135 struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1136 struct bio *obio = cbp->cb_obio;
1137 struct buf *obp = obio->bio_buf;
984263bc 1138 int unit = cbp->cb_unit;
abe2ad7c 1139 int count;
984263bc 1140
81b5c339
MD
1141 /*
1142 * Since we do not have exclusive access to underlying devices,
1143 * we can't keep cache translations around.
1144 */
1145 clearbiocache(bio->bio_next);
1146
abe2ad7c 1147 crit_enter();
984263bc
MD
1148#ifdef DEBUG
1149 if (ccddebug & CCDB_FOLLOW)
e3869ec7 1150 kprintf("ccdiodone(%x)\n", cbp);
984263bc 1151 if (ccddebug & CCDB_IO) {
e3869ec7 1152 kprintf("ccdiodone: bp %x bcount %d resid %d\n",
81b5c339 1153 obp, obp->b_bcount, obp->b_resid);
e3869ec7 1154 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
984263bc 1155 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
54078292 1156 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
984263bc
MD
1157 cbp->cb_buf.b_bcount);
1158 }
1159#endif
9a71d53f 1160
984263bc
MD
1161 /*
1162 * If an error occured, report it. If this is a mirrored
1163 * configuration and the first of two possible reads, do not
1164 * set the error in the bp yet because the second read may
1165 * succeed.
1166 */
984263bc
MD
1167 if (cbp->cb_buf.b_flags & B_ERROR) {
1168 const char *msg = "";
1169
1170 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
10f3fee5 1171 (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
984263bc
MD
1172 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1173 /*
1174 * We will try our read on the other disk down
1175 * below, also reverse the default pick so if we
1176 * are doing a scan we do not keep hitting the
1177 * bad disk first.
1178 */
1179 struct ccd_softc *cs = &ccd_softc[unit];
1180
1181 msg = ", trying other disk";
1182 cs->sc_pick = 1 - cs->sc_pick;
54078292 1183 cs->sc_blk[cs->sc_pick] = obio->bio_offset;
984263bc 1184 } else {
81b5c339
MD
1185 obp->b_flags |= B_ERROR;
1186 obp->b_error = cbp->cb_buf.b_error ?
984263bc
MD
1187 cbp->cb_buf.b_error : EIO;
1188 }
e3869ec7 1189 kprintf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n",
81b5c339 1190 unit, obp->b_error, cbp->cb_comp,
54078292
MD
1191 cbp->cb_buf.b_bio2.bio_offset,
1192 obio->bio_offset, msg);
984263bc
MD
1193 }
1194
1195 /*
1196 * Process mirror. If we are writing, I/O has been initiated on both
1197 * buffers and we fall through only after both are finished.
1198 *
1199 * If we are reading only one I/O is initiated at a time. If an
1200 * error occurs we initiate the second I/O and return, otherwise
1201 * we free the second I/O without initiating it.
1202 */
1203
1204 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
10f3fee5 1205 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
984263bc
MD
1206 /*
1207 * When writing, handshake with the second buffer
1208 * to determine when both are done. If both are not
1209 * done, return here.
1210 */
1211 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1212 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1213 putccdbuf(cbp);
abe2ad7c 1214 crit_exit();
984263bc
MD
1215 return;
1216 }
1217 } else {
1218 /*
1219 * When reading, either dispose of the second buffer
1220 * or initiate I/O on the second buffer if an error
1221 * occured with this one.
1222 */
1223 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1224 if (cbp->cb_buf.b_flags & B_ERROR) {
1225 cbp->cb_mirror->cb_pflags |=
1226 CCDPF_MIRROR_DONE;
81b5c339 1227 vn_strategy(
a8f169e2 1228 cbp->cb_mirror->cb_vp,
81b5c339 1229 &cbp->cb_mirror->cb_buf.b_bio1
984263bc
MD
1230 );
1231 putccdbuf(cbp);
abe2ad7c 1232 crit_exit();
984263bc
MD
1233 return;
1234 } else {
1235 putccdbuf(cbp->cb_mirror);
1236 /* fall through */
1237 }
1238 }
1239 }
1240 }
1241
1242 /*
9a71d53f 1243 * Use our saved b_bufsize to determine if an unexpected EOF occured.
984263bc
MD
1244 */
1245 count = cbp->cb_buf.b_bufsize;
1246 putccdbuf(cbp);
1247
1248 /*
1249 * If all done, "interrupt".
1250 */
81b5c339
MD
1251 obp->b_resid -= count;
1252 if (obp->b_resid < 0)
984263bc 1253 panic("ccdiodone: count");
81b5c339
MD
1254 if (obp->b_resid == 0)
1255 ccdintr(&ccd_softc[unit], obio);
abe2ad7c 1256 crit_exit();
984263bc
MD
1257}
1258
1259static int
fef8985e 1260ccdioctl(struct dev_ioctl_args *ap)
984263bc 1261{
b13267a5 1262 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
1263 int unit = ccdunit(dev);
1264 int i, j, lookedup = 0, error = 0;
abe2ad7c 1265 int part, pmask;
984263bc 1266 struct ccd_softc *cs;
fef8985e 1267 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
984263bc
MD
1268 struct ccddevice ccd;
1269 char **cpp;
1270 struct vnode **vpp;
1271
1272 if (unit >= numccd)
1273 return (ENXIO);
1274 cs = &ccd_softc[unit];
1275
1276 bzero(&ccd, sizeof(ccd));
1277
fef8985e 1278 switch (ap->a_cmd) {
984263bc
MD
1279 case CCDIOCSET:
1280 if (cs->sc_flags & CCDF_INITED)
1281 return (EBUSY);
1282
fef8985e 1283 if ((ap->a_fflag & FWRITE) == 0)
984263bc
MD
1284 return (EBADF);
1285
1286 if ((error = ccdlock(cs)) != 0)
1287 return (error);
1288
1289 if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1290 return (EINVAL);
1291
1292 /* Fill in some important bits. */
1293 ccd.ccd_unit = unit;
1294 ccd.ccd_interleave = ccio->ccio_ileave;
1295 if (ccd.ccd_interleave == 0 &&
1296 ((ccio->ccio_flags & CCDF_MIRROR) ||
1297 (ccio->ccio_flags & CCDF_PARITY))) {
e3869ec7 1298 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
984263bc
MD
1299 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1300 }
1301 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1302 (ccio->ccio_flags & CCDF_PARITY)) {
e3869ec7 1303 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
984263bc
MD
1304 ccio->ccio_flags &= ~CCDF_PARITY;
1305 }
1306 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1307 !(ccio->ccio_flags & CCDF_UNIFORM)) {
e3869ec7 1308 kprintf("ccd%d: mirror/parity forces uniform flag\n",
984263bc
MD
1309 unit);
1310 ccio->ccio_flags |= CCDF_UNIFORM;
1311 }
1312 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1313
1314 /*
1315 * Allocate space for and copy in the array of
1316 * componet pathnames and device numbers.
1317 */
77652cad 1318 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
984263bc 1319 M_DEVBUF, M_WAITOK);
77652cad 1320 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
984263bc
MD
1321 M_DEVBUF, M_WAITOK);
1322
1323 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1324 ccio->ccio_ndisks * sizeof(char **));
1325 if (error) {
efda3bd0
MD
1326 kfree(vpp, M_DEVBUF);
1327 kfree(cpp, M_DEVBUF);
984263bc
MD
1328 ccdunlock(cs);
1329 return (error);
1330 }
1331
1332#ifdef DEBUG
1333 if (ccddebug & CCDB_INIT)
1334 for (i = 0; i < ccio->ccio_ndisks; ++i)
e3869ec7 1335 kprintf("ccdioctl: component %d: 0x%x\n",
984263bc
MD
1336 i, cpp[i]);
1337#endif
1338
1339 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1340#ifdef DEBUG
1341 if (ccddebug & CCDB_INIT)
e3869ec7 1342 kprintf("ccdioctl: lookedup = %d\n", lookedup);
984263bc 1343#endif
fef8985e 1344 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
984263bc 1345 for (j = 0; j < lookedup; ++j)
87de5057 1346 (void)vn_close(vpp[j], FREAD|FWRITE);
efda3bd0
MD
1347 kfree(vpp, M_DEVBUF);
1348 kfree(cpp, M_DEVBUF);
984263bc
MD
1349 ccdunlock(cs);
1350 return (error);
1351 }
1352 ++lookedup;
1353 }
1354 ccd.ccd_cpp = cpp;
1355 ccd.ccd_vpp = vpp;
1356 ccd.ccd_ndev = ccio->ccio_ndisks;
1357
1358 /*
1359 * Initialize the ccd. Fills in the softc for us.
1360 */
fef8985e 1361 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
984263bc 1362 for (j = 0; j < lookedup; ++j)
87de5057 1363 (void)vn_close(vpp[j], FREAD|FWRITE);
984263bc 1364 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
efda3bd0
MD
1365 kfree(vpp, M_DEVBUF);
1366 kfree(cpp, M_DEVBUF);
984263bc
MD
1367 ccdunlock(cs);
1368 return (error);
1369 }
1370
1371 /*
1372 * The ccd has been successfully initialized, so
1373 * we can place it into the array and read the disklabel.
1374 */
1375 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1376 ccio->ccio_unit = unit;
1377 ccio->ccio_size = cs->sc_size;
1378 ccdgetdisklabel(dev);
1379
1380 ccdunlock(cs);
1381
1382 break;
1383
1384 case CCDIOCCLR:
1385 if ((cs->sc_flags & CCDF_INITED) == 0)
1386 return (ENXIO);
1387
fef8985e 1388 if ((ap->a_fflag & FWRITE) == 0)
984263bc
MD
1389 return (EBADF);
1390
1391 if ((error = ccdlock(cs)) != 0)
1392 return (error);
1393
1394 /* Don't unconfigure if any other partitions are open */
1395 part = ccdpart(dev);
1396 pmask = (1 << part);
1397 if ((cs->sc_openmask & ~pmask)) {
1398 ccdunlock(cs);
1399 return (EBUSY);
1400 }
1401
1402 /*
1403 * Free ccd_softc information and clear entry.
1404 */
1405
1406 /* Close the components and free their pathnames. */
1407 for (i = 0; i < cs->sc_nccdisks; ++i) {
1408 /*
1409 * XXX: this close could potentially fail and
1410 * cause Bad Things. Maybe we need to force
1411 * the close to happen?
1412 */
1413#ifdef DEBUG
1414 if (ccddebug & CCDB_VNODE)
1415 vprint("CCDIOCCLR: vnode info",
1416 cs->sc_cinfo[i].ci_vp);
1417#endif
87de5057 1418 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE);
efda3bd0 1419 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
984263bc
MD
1420 }
1421
1422 /* Free interleave index. */
1423 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
efda3bd0 1424 kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
984263bc
MD
1425
1426 /* Free component info and interleave table. */
efda3bd0
MD
1427 kfree(cs->sc_cinfo, M_DEVBUF);
1428 kfree(cs->sc_itable, M_DEVBUF);
984263bc
MD
1429 cs->sc_flags &= ~CCDF_INITED;
1430
1431 /*
1432 * Free ccddevice information and clear entry.
1433 */
efda3bd0
MD
1434 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1435 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
984263bc
MD
1436 ccd.ccd_dk = -1;
1437 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1438
1439 /*
1440 * And remove the devstat entry.
1441 */
1442 devstat_remove_entry(&cs->device_stats);
1443
1444 /* This must be atomic. */
abe2ad7c 1445 crit_enter();
984263bc
MD
1446 ccdunlock(cs);
1447 bzero(cs, sizeof(struct ccd_softc));
abe2ad7c 1448 crit_exit();
984263bc
MD
1449
1450 break;
1451
1452 case DIOCGDINFO:
1453 if ((cs->sc_flags & CCDF_INITED) == 0)
1454 return (ENXIO);
1455
fef8985e 1456 *(struct disklabel *)ap->a_data = cs->sc_label;
984263bc
MD
1457 break;
1458
1459 case DIOCGPART:
1460 if ((cs->sc_flags & CCDF_INITED) == 0)
1461 return (ENXIO);
1462
fef8985e
MD
1463 ((struct partinfo *)ap->a_data)->disklab = &cs->sc_label;
1464 ((struct partinfo *)ap->a_data)->part =
984263bc
MD
1465 &cs->sc_label.d_partitions[ccdpart(dev)];
1466 break;
1467
1468 case DIOCWDINFO:
1469 case DIOCSDINFO:
1470 if ((cs->sc_flags & CCDF_INITED) == 0)
1471 return (ENXIO);
1472
fef8985e 1473 if ((ap->a_fflag & FWRITE) == 0)
984263bc
MD
1474 return (EBADF);
1475
1476 if ((error = ccdlock(cs)) != 0)
1477 return (error);
1478
1479 cs->sc_flags |= CCDF_LABELLING;
1480
1481 error = setdisklabel(&cs->sc_label,
fef8985e 1482 (struct disklabel *)ap->a_data, 0);
984263bc 1483 if (error == 0) {
fef8985e 1484 if (ap->a_cmd == DIOCWDINFO) {
b13267a5 1485 cdev_t cdev = CCDLABELDEV(dev);
e4c9c0c8
MD
1486 error = writedisklabel(cdev, &cs->sc_label);
1487 }
984263bc
MD
1488 }
1489
1490 cs->sc_flags &= ~CCDF_LABELLING;
1491
1492 ccdunlock(cs);
1493
1494 if (error)
1495 return (error);
1496 break;
1497
1498 case DIOCWLABEL:
1499 if ((cs->sc_flags & CCDF_INITED) == 0)
1500 return (ENXIO);
1501
fef8985e 1502 if ((ap->a_fflag & FWRITE) == 0)
984263bc 1503 return (EBADF);
fef8985e 1504 if (*(int *)ap->a_data != 0)
984263bc
MD
1505 cs->sc_flags |= CCDF_WLABEL;
1506 else
1507 cs->sc_flags &= ~CCDF_WLABEL;
1508 break;
1509
1510 default:
1511 return (ENOTTY);
1512 }
1513
1514 return (0);
1515}
1516
1517static int
fef8985e 1518ccdsize(struct dev_psize_args *ap)
984263bc 1519{
b13267a5 1520 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
1521 struct ccd_softc *cs;
1522 int part, size;
1523
fef8985e 1524 if (dev_dopen(dev, 0, S_IFCHR, proc0.p_ucred))
984263bc
MD
1525 return (-1);
1526
1527 cs = &ccd_softc[ccdunit(dev)];
1528 part = ccdpart(dev);
1529
1530 if ((cs->sc_flags & CCDF_INITED) == 0)
1531 return (-1);
1532
1533 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1534 size = -1;
1535 else
1536 size = cs->sc_label.d_partitions[part].p_size;
1537
fef8985e 1538 if (dev_dclose(dev, 0, S_IFCHR))
984263bc
MD
1539 return (-1);
1540
fef8985e
MD
1541 ap->a_result = size;
1542 return(0);
984263bc
MD
1543}
1544
1545static int
fef8985e 1546ccddump(struct dev_dump_args *ap)
984263bc 1547{
984263bc
MD
1548 /* Not implemented. */
1549 return ENXIO;
1550}
1551
1552/*
1553 * Lookup the provided name in the filesystem. If the file exists,
1554 * is a valid block device, and isn't being used by anyone else,
1555 * set *vpp to the file's vnode.
1556 */
1557static int
fef8985e 1558ccdlookup(char *path, struct vnode **vpp)
984263bc 1559{
fad57d0e 1560 struct nlookupdata nd;
984263bc
MD
1561 struct vnode *vp;
1562 int error;
dadab5e9 1563
fad57d0e 1564 *vpp = NULL;
984263bc 1565
fad57d0e
MD
1566 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1567 if (error)
1568 return (error);
1569 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
984263bc
MD
1570#ifdef DEBUG
1571 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
e3869ec7 1572 kprintf("ccdlookup: vn_open error = %d\n", error);
984263bc 1573#endif
fad57d0e 1574 goto done;
984263bc 1575 }
fad57d0e 1576 vp = nd.nl_open_vp;
984263bc 1577
3c37c940 1578 if (vp->v_opencount > 1) {
984263bc 1579 error = EBUSY;
fad57d0e 1580 goto done;
984263bc
MD
1581 }
1582
1583 if (!vn_isdisk(vp, &error))
fad57d0e 1584 goto done;
984263bc
MD
1585
1586#ifdef DEBUG
1587 if (ccddebug & CCDB_VNODE)
1588 vprint("ccdlookup: vnode info", vp);
1589#endif
1590
a11aaa81 1591 vn_unlock(vp);
fad57d0e
MD
1592 nd.nl_open_vp = NULL;
1593 nlookup_done(&nd);
1594 *vpp = vp; /* leave ref intact */
984263bc 1595 return (0);
fad57d0e
MD
1596done:
1597 nlookup_done(&nd);
984263bc
MD
1598 return (error);
1599}
1600
1601/*
1602 * Read the disklabel from the ccd. If one is not present, fake one
1603 * up.
1604 */
1605static void
b13267a5 1606ccdgetdisklabel(cdev_t dev)
984263bc
MD
1607{
1608 int unit = ccdunit(dev);
1609 struct ccd_softc *cs = &ccd_softc[unit];
1610 char *errstring;
1611 struct disklabel *lp = &cs->sc_label;
1612 struct ccdgeom *ccg = &cs->sc_geom;
b13267a5 1613 cdev_t cdev;
984263bc
MD
1614
1615 bzero(lp, sizeof(*lp));
1616
1617 lp->d_secperunit = cs->sc_size;
1618 lp->d_secsize = ccg->ccg_secsize;
1619 lp->d_nsectors = ccg->ccg_nsectors;
1620 lp->d_ntracks = ccg->ccg_ntracks;
1621 lp->d_ncylinders = ccg->ccg_ncylinders;
1622 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1623
1624 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1625 lp->d_type = DTYPE_CCD;
1626 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1627 lp->d_rpm = 3600;
1628 lp->d_interleave = 1;
1629 lp->d_flags = 0;
1630
1631 lp->d_partitions[RAW_PART].p_offset = 0;
1632 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1633 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1634 lp->d_npartitions = RAW_PART + 1;
1635
1636 lp->d_bbsize = BBSIZE; /* XXX */
1637 lp->d_sbsize = SBSIZE; /* XXX */
1638
1639 lp->d_magic = DISKMAGIC;
1640 lp->d_magic2 = DISKMAGIC;
1641 lp->d_checksum = dkcksum(&cs->sc_label);
1642
1643 /*
1644 * Call the generic disklabel extraction routine.
1645 */
e4c9c0c8
MD
1646 cdev = CCDLABELDEV(dev);
1647 errstring = readdisklabel(cdev, &cs->sc_label);
984263bc
MD
1648 if (errstring != NULL)
1649 ccdmakedisklabel(cs);
1650
1651#ifdef DEBUG
1652 /* It's actually extremely common to have unlabeled ccds. */
1653 if (ccddebug & CCDB_LABEL)
1654 if (errstring != NULL)
e3869ec7 1655 kprintf("ccd%d: %s\n", unit, errstring);
984263bc
MD
1656#endif
1657}
1658
1659/*
1660 * Take care of things one might want to take care of in the event
1661 * that a disklabel isn't present.
1662 */
1663static void
c436375a 1664ccdmakedisklabel(struct ccd_softc *cs)
984263bc
MD
1665{
1666 struct disklabel *lp = &cs->sc_label;
1667
1668 /*
1669 * For historical reasons, if there's no disklabel present
1670 * the raw partition must be marked FS_BSDFFS.
1671 */
1672 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1673
1674 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1675}
1676
1677/*
1678 * Wait interruptibly for an exclusive lock.
1679 *
1680 * XXX
1681 * Several drivers do this; it should be abstracted and made MP-safe.
1682 */
1683static int
c436375a 1684ccdlock(struct ccd_softc *cs)
984263bc
MD
1685{
1686 int error;
1687
1688 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1689 cs->sc_flags |= CCDF_WANTED;
377d4740 1690 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
984263bc
MD
1691 return (error);
1692 }
1693 cs->sc_flags |= CCDF_LOCKED;
1694 return (0);
1695}
1696
1697/*
1698 * Unlock and wake up any waiters.
1699 */
1700static void
c436375a 1701ccdunlock(struct ccd_softc *cs)
984263bc
MD
1702{
1703
1704 cs->sc_flags &= ~CCDF_LOCKED;
1705 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1706 cs->sc_flags &= ~CCDF_WANTED;
1707 wakeup(cs);
1708 }
1709}
1710
1711#ifdef DEBUG
1712static void
c436375a 1713printiinfo(struct ccdiinfo *ii)
984263bc
MD
1714{
1715 int ix, i;
1716
1717 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
e3869ec7 1718 kprintf(" itab[%d]: #dk %d sblk %d soff %d",
984263bc
MD
1719 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1720 for (i = 0; i < ii->ii_ndisk; i++)
e3869ec7
SW
1721 kprintf(" %d", ii->ii_index[i]);
1722 kprintf("\n");
984263bc
MD
1723 }
1724}
1725#endif
1726
1727\f
1728/* Local Variables: */
1729/* c-argdecl-indent: 8 */
1730/* c-continued-statement-offset: 8 */
1731/* c-indent-level: 8 */
1732/* End: */