ccd(4): Fix operator precedence.
[dragonfly.git] / sys / dev / disk / ccd / ccd.c
CommitLineData
7dc62e37
MD
1/*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
7dc62e37 34 */
984263bc
MD
35/*
36 * Copyright (c) 1995 Jason R. Thorpe.
37 * All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. All advertising materials mentioning features or use of this software
48 * must display the following acknowledgement:
49 * This product includes software developed for the NetBSD Project
50 * by Jason R. Thorpe.
51 * 4. The name of the author may not be used to endorse or promote products
52 * derived from this software without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
58 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
59 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
61 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67/*
68 * Copyright (c) 1988 University of Utah.
69 * Copyright (c) 1990, 1993
70 * The Regents of the University of California. All rights reserved.
71 *
72 * This code is derived from software contributed to Berkeley by
73 * the Systems Programming Group of the University of Utah Computer
74 * Science Department.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * from: Utah $Hdr: cd.c 1.6 90/11/28$
7dc62e37
MD
105 */
106/*
107 * @(#)cd.c 8.2 (Berkeley) 11/16/93
108 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
109 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
984263bc
MD
110 */
111
112/*
113 * "Concatenated" disk driver.
114 *
7dc62e37 115 * Original dynamic configuration support by:
984263bc
MD
116 * Jason R. Thorpe <thorpej@nas.nasa.gov>
117 * Numerical Aerodynamic Simulation Facility
118 * Mail Stop 258-6
119 * NASA Ames Research Center
120 * Moffett Field, CA 94035
121 */
122
1f2de5d4 123#include "use_ccd.h"
984263bc
MD
124
125#include <sys/param.h>
126#include <sys/systm.h>
127#include <sys/kernel.h>
128#include <sys/module.h>
129#include <sys/proc.h>
130#include <sys/buf.h>
131#include <sys/malloc.h>
fad57d0e 132#include <sys/nlookup.h>
984263bc
MD
133#include <sys/conf.h>
134#include <sys/stat.h>
135#include <sys/sysctl.h>
7dc62e37 136#include <sys/disk.h>
ba0cc1ab 137#include <sys/dtype.h>
154b688d 138#include <sys/diskslice.h>
984263bc
MD
139#include <sys/devicestat.h>
140#include <sys/fcntl.h>
141#include <sys/vnode.h>
984263bc
MD
142#include <sys/ccdvar.h>
143
144#include <vm/vm_zone.h>
145
50e58362
MD
146#include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */
147#include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */
148
149#include <sys/thread2.h>
77912481
MD
150#include <sys/buf2.h>
151#include <sys/mplock2.h>
50e58362 152
984263bc
MD
153#if defined(CCDDEBUG) && !defined(DEBUG)
154#define DEBUG
155#endif
156
157#ifdef DEBUG
158#define CCDB_FOLLOW 0x01
159#define CCDB_INIT 0x02
160#define CCDB_IO 0x04
161#define CCDB_LABEL 0x08
162#define CCDB_VNODE 0x10
163static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
164 CCDB_VNODE;
165SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
166#undef DEBUG
167#endif
168
169#define ccdunit(x) dkunit(x)
170#define ccdpart(x) dkpart(x)
171
172/*
173 This is how mirroring works (only writes are special):
174
175 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
176 linked together by the cb_mirror field. "cb_pflags &
177 CCDPF_MIRROR_DONE" is set to 0 on both of them.
178
179 When a component returns to ccdiodone(), it checks if "cb_pflags &
180 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
181 flag and returns. If it is, it means its partner has already
182 returned, so it will go to the regular cleanup.
183
184 */
185
186struct ccdbuf {
187 struct buf cb_buf; /* new I/O buf */
a8f169e2 188 struct vnode *cb_vp; /* related vnode */
81b5c339 189 struct bio *cb_obio; /* ptr. to original I/O buf */
984263bc
MD
190 struct ccdbuf *cb_freenext; /* free list link */
191 int cb_unit; /* target unit */
192 int cb_comp; /* target component */
193 int cb_pflags; /* mirror/parity status flag */
194 struct ccdbuf *cb_mirror; /* mirror counterpart */
195};
196
197/* bits in cb_pflags */
198#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
199
984263bc
MD
200static d_open_t ccdopen;
201static d_close_t ccdclose;
202static d_strategy_t ccdstrategy;
203static d_ioctl_t ccdioctl;
204static d_dump_t ccddump;
984263bc
MD
205
206#define NCCDFREEHIWAT 16
207
fef8985e 208static struct dev_ops ccd_ops = {
88abd8b5 209 { "ccd", 0, D_DISK },
fef8985e
MD
210 .d_open = ccdopen,
211 .d_close = ccdclose,
212 .d_read = physread,
213 .d_write = physwrite,
214 .d_ioctl = ccdioctl,
215 .d_strategy = ccdstrategy,
7dc62e37 216 .d_dump = ccddump
984263bc
MD
217};
218
219/* called during module initialization */
38e94a25 220static void ccdattach (void);
50ac0fb0 221static int ccddetach (void);
38e94a25 222static int ccd_modevent (module_t, int, void *);
984263bc
MD
223
224/* called by biodone() at interrupt time */
81b5c339 225static void ccdiodone (struct bio *bio);
38e94a25 226
81b5c339 227static void ccdstart (struct ccd_softc *, struct bio *);
38e94a25 228static void ccdinterleave (struct ccd_softc *, int);
81b5c339 229static void ccdintr (struct ccd_softc *, struct bio *);
fef8985e
MD
230static int ccdinit (struct ccddevice *, char **, struct ucred *);
231static int ccdlookup (char *, struct vnode **);
38e94a25 232static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
54078292 233 struct bio *, off_t, caddr_t, long);
38e94a25
RG
234static int ccdlock (struct ccd_softc *);
235static void ccdunlock (struct ccd_softc *);
984263bc
MD
236
237#ifdef DEBUG
38e94a25 238static void printiinfo (struct ccdiinfo *);
984263bc
MD
239#endif
240
241/* Non-private for the benefit of libkvm. */
242struct ccd_softc *ccd_softc;
243struct ccddevice *ccddevs;
244struct ccdbuf *ccdfreebufs;
245static int numccdfreebufs;
246static int numccd = 0;
247
248/*
249 * getccdbuf() - Allocate and zero a ccd buffer.
250 *
251 * This routine is called at splbio().
252 */
253
254static __inline
255struct ccdbuf *
4483eb0d 256getccdbuf(void)
984263bc
MD
257{
258 struct ccdbuf *cbp;
259
260 /*
261 * Allocate from freelist or malloc as necessary
262 */
263 if ((cbp = ccdfreebufs) != NULL) {
264 ccdfreebufs = cbp->cb_freenext;
265 --numccdfreebufs;
81b5c339 266 reinitbufbio(&cbp->cb_buf);
984263bc 267 } else {
efda3bd0 268 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO);
81b5c339 269 initbufbio(&cbp->cb_buf);
984263bc
MD
270 }
271
984263bc
MD
272 /*
273 * independant struct buf initialization
274 */
408357d8 275 buf_dep_init(&cbp->cb_buf);
984263bc
MD
276 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
277 BUF_KERNPROC(&cbp->cb_buf);
4414f2c9 278 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
984263bc
MD
279
280 return(cbp);
281}
282
283/*
284 * putccdbuf() - Free a ccd buffer.
285 *
286 * This routine is called at splbio().
287 */
288
289static __inline
290void
291putccdbuf(struct ccdbuf *cbp)
292{
293 BUF_UNLOCK(&cbp->cb_buf);
984263bc
MD
294
295 if (numccdfreebufs < NCCDFREEHIWAT) {
296 cbp->cb_freenext = ccdfreebufs;
297 ccdfreebufs = cbp;
298 ++numccdfreebufs;
299 } else {
b5d7061d 300 uninitbufbio(&cbp->cb_buf);
efda3bd0 301 kfree((caddr_t)cbp, M_DEVBUF);
984263bc
MD
302 }
303}
304
984263bc
MD
305/*
306 * Called by main() during pseudo-device attachment. All we need
307 * to do is allocate enough space for devices to be configured later, and
308 * add devsw entries.
309 */
310static void
c436375a 311ccdattach(void)
984263bc 312{
7dc62e37
MD
313 struct disk_info info;
314 struct ccd_softc *cs;
984263bc
MD
315 int i;
316 int num = NCCD;
317
318 if (num > 1)
e3869ec7 319 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
984263bc 320 else
e3869ec7 321 kprintf("ccd0: Concatenated disk driver\n");
984263bc 322
efda3bd0 323 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
3aed1355 324 M_WAITOK | M_ZERO);
efda3bd0 325 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
3aed1355 326 M_WAITOK | M_ZERO);
984263bc 327 numccd = num;
984263bc 328
7dc62e37
MD
329 /*
330 * With normal disk devices the open simply fails if the media
331 * is not present. With CCD we have to be able to open the
332 * raw disk to use the ioctl's to set it up, so create a dummy
333 * disk info structure so dscheck() doesn't blow up.
334 */
335 bzero(&info, sizeof(info));
336 info.d_media_blksize = DEV_BSIZE;
337
338 for (i = 0; i < numccd; ++i) {
339 cs = &ccd_softc[i];
340 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
341 cs->sc_dev->si_drv1 = cs;
342 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */
343 disk_setdiskinfo(&cs->sc_disk, &info);
344 }
984263bc
MD
345}
346
50ac0fb0
MD
347static int
348ccddetach(void)
349{
350 struct ccd_softc *cs;
351 struct dev_ioctl_args ioctl_args;
352 int i;
353 int error = 0;
354 int eval;
355
356 bzero(&ioctl_args, sizeof(ioctl_args));
357
358 for (i = 0; i < numccd; ++i) {
359 cs = &ccd_softc[i];
360 if (cs->sc_dev == NULL)
361 continue;
362 ioctl_args.a_head.a_dev = cs->sc_dev;
363 ioctl_args.a_cmd = CCDIOCCLR;
364 ioctl_args.a_fflag = FWRITE;
365 eval = ccdioctl(&ioctl_args);
366 if (eval && eval != ENXIO) {
367 kprintf("ccd%d: In use, cannot detach\n", i);
368 error = EBUSY;
369 }
370 }
371 if (error == 0) {
372 for (i = 0; i < numccd; ++i) {
373 cs = &ccd_softc[i];
374 if (cs->sc_dev == NULL)
375 continue;
376 disk_destroy(&cs->sc_disk);
377 cs->sc_dev = NULL;
378 }
379 if (ccd_softc)
380 kfree(ccd_softc, M_DEVBUF);
381 if (ccddevs)
382 kfree(ccddevs, M_DEVBUF);
383 }
384 return (error);
385}
386
984263bc 387static int
c436375a 388ccd_modevent(module_t mod, int type, void *data)
984263bc
MD
389{
390 int error = 0;
391
392 switch (type) {
393 case MOD_LOAD:
394 ccdattach();
395 break;
396
397 case MOD_UNLOAD:
50ac0fb0 398 error = ccddetach();
984263bc
MD
399 break;
400
401 default: /* MOD_SHUTDOWN etc */
402 break;
403 }
404 return (error);
405}
406
407DEV_MODULE(ccd, ccd_modevent, NULL);
408
409static int
fef8985e 410ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
984263bc
MD
411{
412 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
413 struct ccdcinfo *ci = NULL; /* XXX */
984263bc
MD
414 int ix;
415 struct vnode *vp;
7dc62e37
MD
416 u_int64_t skip;
417 u_int64_t size;
418 u_int64_t minsize;
984263bc
MD
419 int maxsecsize;
420 struct partinfo dpart;
421 struct ccdgeom *ccg = &cs->sc_geom;
422 char tmppath[MAXPATHLEN];
423 int error = 0;
424
425#ifdef DEBUG
426 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 427 kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
984263bc
MD
428#endif
429
430 cs->sc_size = 0;
431 cs->sc_ileave = ccd->ccd_interleave;
432 cs->sc_nccdisks = ccd->ccd_ndev;
433
434 /* Allocate space for the component info. */
77652cad 435 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
7dc62e37 436 M_DEVBUF, M_WAITOK);
d785777f 437 cs->sc_maxiosize = MAXPHYS;
984263bc
MD
438
439 /*
440 * Verify that each component piece exists and record
441 * relevant information about it.
442 */
443 maxsecsize = 0;
444 minsize = 0;
445 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
446 vp = ccd->ccd_vpp[ix];
447 ci = &cs->sc_cinfo[ix];
448 ci->ci_vp = vp;
449
450 /*
451 * Copy in the pathname of the component.
452 */
453 bzero(tmppath, sizeof(tmppath)); /* sanity */
454 if ((error = copyinstr(cpaths[ix], tmppath,
455 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
456#ifdef DEBUG
457 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 458 kprintf("ccd%d: can't copy path, error = %d\n",
984263bc
MD
459 ccd->ccd_unit, error);
460#endif
461 goto fail;
462 }
efda3bd0 463 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
984263bc
MD
464 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
465
466 ci->ci_dev = vn_todev(vp);
d785777f
MD
467 if (ci->ci_dev->si_iosize_max &&
468 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
469 cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
470 }
984263bc
MD
471
472 /*
473 * Get partition information for the component.
474 */
87baaf0c
MD
475 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD,
476 cred, NULL);
2ec8fb79 477 if (error) {
984263bc
MD
478#ifdef DEBUG
479 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 480 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
984263bc
MD
481 ccd->ccd_unit, ci->ci_path, error);
482#endif
483 goto fail;
484 }
af255b66 485 if (dpart.fstype != FS_CCD &&
18cb7add 486 !kuuid_is_ccd(&dpart.fstype_uuid)) {
7dc62e37
MD
487 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
488 ccd->ccd_unit, ci->ci_path);
984263bc
MD
489 error = EFTYPE;
490 goto fail;
491 }
7dc62e37
MD
492 if (maxsecsize < dpart.media_blksize)
493 maxsecsize = dpart.media_blksize;
494
495 /*
496 * Skip a certain amount of storage at the beginning of
497 * the component to make sure we don't infringe on any
498 * reserved sectors. This is handled entirely by
1c3c151b 499 * dpart.reserved_blocks but we also impose a minimum
7dc62e37
MD
500 * of 16 sectors for backwards compatibility.
501 */
502 skip = 16;
1c3c151b
MD
503 if (skip < dpart.reserved_blocks)
504 skip = dpart.reserved_blocks;
7dc62e37 505 size = dpart.media_blocks - skip;
984263bc
MD
506
507 /*
508 * Calculate the size, truncating to an interleave
509 * boundary if necessary.
510 */
984263bc
MD
511 if (cs->sc_ileave > 1)
512 size -= size % cs->sc_ileave;
513
7dc62e37 514 if ((int64_t)size <= 0) {
984263bc
MD
515#ifdef DEBUG
516 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 517 kprintf("ccd%d: %s: size == 0\n",
984263bc
MD
518 ccd->ccd_unit, ci->ci_path);
519#endif
520 error = ENODEV;
521 goto fail;
522 }
523
7dc62e37
MD
524 /*
525 * Calculate the smallest uniform component, used
526 * elsewhere.
527 */
528 if (minsize == 0 || minsize > size)
984263bc 529 minsize = size;
7dc62e37 530 ci->ci_skip = skip;
984263bc
MD
531 ci->ci_size = size;
532 cs->sc_size += size;
533 }
50ac0fb0
MD
534 kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
535 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
984263bc
MD
536
537 /*
538 * Don't allow the interleave to be smaller than
539 * the biggest component sector.
540 */
541 if ((cs->sc_ileave > 0) &&
7dc62e37 542 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
984263bc
MD
543#ifdef DEBUG
544 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 545 kprintf("ccd%d: interleave must be at least %d\n",
984263bc
MD
546 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
547#endif
548 error = EINVAL;
549 goto fail;
550 }
551
552 /*
553 * If uniform interleave is desired set all sizes to that of
554 * the smallest component. This will guarentee that a single
555 * interleave table is generated.
556 *
557 * Lost space must be taken into account when calculating the
558 * overall size. Half the space is lost when CCDF_MIRROR is
559 * specified. One disk is lost when CCDF_PARITY is specified.
560 */
561 if (ccd->ccd_flags & CCDF_UNIFORM) {
562 for (ci = cs->sc_cinfo;
563 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
564 ci->ci_size = minsize;
565 }
566 if (ccd->ccd_flags & CCDF_MIRROR) {
567 /*
568 * Check to see if an even number of components
569 * have been specified. The interleave must also
570 * be non-zero in order for us to be able to
571 * guarentee the topology.
572 */
573 if (cs->sc_nccdisks % 2) {
e3869ec7 574 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
984263bc
MD
575 error = EINVAL;
576 goto fail;
577 }
578 if (cs->sc_ileave == 0) {
e3869ec7 579 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
984263bc
MD
580 error = EINVAL;
581 goto fail;
582 }
583 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
584 } else if (ccd->ccd_flags & CCDF_PARITY) {
585 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
586 } else {
587 if (cs->sc_ileave == 0) {
e3869ec7 588 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
984263bc
MD
589 error = EINVAL;
590 goto fail;
591 }
592 cs->sc_size = cs->sc_nccdisks * minsize;
593 }
594 }
595
596 /*
597 * Construct the interleave table.
598 */
599 ccdinterleave(cs, ccd->ccd_unit);
600
601 /*
602 * Create pseudo-geometry based on 1MB cylinders. It's
603 * pretty close.
604 */
605 ccg->ccg_secsize = maxsecsize;
606 ccg->ccg_ntracks = 1;
607 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
608 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
609
610 /*
611 * Add an devstat entry for this device.
612 */
613 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
614 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
615 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
616 DEVSTAT_PRIORITY_ARRAY);
617
618 cs->sc_flags |= CCDF_INITED;
619 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
620 cs->sc_unit = ccd->ccd_unit;
621 return (0);
622fail:
623 while (ci > cs->sc_cinfo) {
624 ci--;
efda3bd0 625 kfree(ci->ci_path, M_DEVBUF);
984263bc 626 }
efda3bd0 627 kfree(cs->sc_cinfo, M_DEVBUF);
7dc62e37 628 cs->sc_cinfo = NULL;
984263bc
MD
629 return (error);
630}
631
632static void
c436375a 633ccdinterleave(struct ccd_softc *cs, int unit)
984263bc
MD
634{
635 struct ccdcinfo *ci, *smallci;
636 struct ccdiinfo *ii;
7dc62e37
MD
637 u_int64_t bn;
638 u_int64_t lbn;
af255b66
MD
639 u_int64_t size;
640 int icount;
984263bc 641 int ix;
984263bc
MD
642
643#ifdef DEBUG
644 if (ccddebug & CCDB_INIT)
e3869ec7 645 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
984263bc
MD
646#endif
647
648 /*
649 * Allocate an interleave table. The worst case occurs when each
650 * of N disks is of a different size, resulting in N interleave
651 * tables.
652 *
653 * Chances are this is too big, but we don't care.
654 */
af255b66
MD
655 icount = cs->sc_nccdisks + 1;
656 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
657 M_DEVBUF, M_WAITOK|M_ZERO);
984263bc
MD
658
659 /*
660 * Trivial case: no interleave (actually interleave of disk size).
661 * Each table entry represents a single component in its entirety.
662 *
663 * An interleave of 0 may not be used with a mirror or parity setup.
664 */
665 if (cs->sc_ileave == 0) {
666 bn = 0;
667 ii = cs->sc_itable;
668
669 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
670 /* Allocate space for ii_index. */
efda3bd0 671 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
984263bc
MD
672 ii->ii_ndisk = 1;
673 ii->ii_startblk = bn;
674 ii->ii_startoff = 0;
675 ii->ii_index[0] = ix;
676 bn += cs->sc_cinfo[ix].ci_size;
677 ii++;
678 }
679 ii->ii_ndisk = 0;
680#ifdef DEBUG
681 if (ccddebug & CCDB_INIT)
682 printiinfo(cs->sc_itable);
683#endif
684 return;
685 }
686
687 /*
688 * The following isn't fast or pretty; it doesn't have to be.
689 */
690 size = 0;
691 bn = lbn = 0;
af255b66 692 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
984263bc
MD
693 /*
694 * Allocate space for ii_index. We might allocate more then
695 * we use.
696 */
77652cad 697 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
7dc62e37 698 M_DEVBUF, M_WAITOK);
984263bc
MD
699
700 /*
701 * Locate the smallest of the remaining components
702 */
703 smallci = NULL;
7dc62e37
MD
704 ci = cs->sc_cinfo;
705 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
984263bc
MD
706 if (ci->ci_size > size &&
707 (smallci == NULL ||
708 ci->ci_size < smallci->ci_size)) {
709 smallci = ci;
710 }
7dc62e37 711 ++ci;
984263bc
MD
712 }
713
714 /*
715 * Nobody left, all done
716 */
717 if (smallci == NULL) {
718 ii->ii_ndisk = 0;
719 break;
720 }
721
722 /*
723 * Record starting logical block using an sc_ileave blocksize.
724 */
725 ii->ii_startblk = bn / cs->sc_ileave;
726
727 /*
af255b66 728 * Record starting component block using an sc_ileave
984263bc
MD
729 * blocksize. This value is relative to the beginning of
730 * a component disk.
731 */
732 ii->ii_startoff = lbn;
733
734 /*
735 * Determine how many disks take part in this interleave
736 * and record their indices.
737 */
738 ix = 0;
739 for (ci = cs->sc_cinfo;
740 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
741 if (ci->ci_size >= smallci->ci_size) {
742 ii->ii_index[ix++] = ci - cs->sc_cinfo;
743 }
744 }
745 ii->ii_ndisk = ix;
af255b66
MD
746
747 /*
748 * Adjust for loop
749 */
984263bc
MD
750 bn += ix * (smallci->ci_size - size);
751 lbn = smallci->ci_size / cs->sc_ileave;
752 size = smallci->ci_size;
753 }
af255b66
MD
754 if (ii == &cs->sc_itable[icount])
755 panic("ccdinterlave software bug! table exhausted");
984263bc
MD
756#ifdef DEBUG
757 if (ccddebug & CCDB_INIT)
758 printiinfo(cs->sc_itable);
759#endif
760}
761
762/* ARGSUSED */
763static int
fef8985e 764ccdopen(struct dev_open_args *ap)
984263bc 765{
b13267a5 766 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
767 int unit = ccdunit(dev);
768 struct ccd_softc *cs;
7dc62e37 769 int error = 0;
984263bc
MD
770
771#ifdef DEBUG
772 if (ccddebug & CCDB_FOLLOW)
e3869ec7 773 kprintf("ccdopen(%x, %x)\n", dev, flags);
984263bc
MD
774#endif
775 if (unit >= numccd)
776 return (ENXIO);
777 cs = &ccd_softc[unit];
778
7dc62e37
MD
779 if ((error = ccdlock(cs)) == 0) {
780 ccdunlock(cs);
984263bc 781 }
7dc62e37 782 return (error);
984263bc
MD
783}
784
785/* ARGSUSED */
786static int
fef8985e 787ccdclose(struct dev_close_args *ap)
984263bc 788{
b13267a5 789 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
790 int unit = ccdunit(dev);
791 struct ccd_softc *cs;
7dc62e37 792 int error = 0;
984263bc
MD
793
794#ifdef DEBUG
795 if (ccddebug & CCDB_FOLLOW)
e3869ec7 796 kprintf("ccdclose(%x, %x)\n", dev, flags);
984263bc
MD
797#endif
798
799 if (unit >= numccd)
800 return (ENXIO);
801 cs = &ccd_softc[unit];
7dc62e37
MD
802 if ((error = ccdlock(cs)) == 0) {
803 ccdunlock(cs);
804 }
805 return (error);
984263bc
MD
806}
807
fef8985e
MD
808static int
809ccdstrategy(struct dev_strategy_args *ap)
984263bc 810{
b13267a5 811 cdev_t dev = ap->a_head.a_dev;
fef8985e 812 struct bio *bio = ap->a_bio;
81b5c339
MD
813 int unit = ccdunit(dev);
814 struct bio *nbio;
815 struct buf *bp = bio->bio_buf;
984263bc 816 struct ccd_softc *cs = &ccd_softc[unit];
7dc62e37
MD
817 u_int64_t pbn; /* in sc_secsize chunks */
818 u_int32_t sz; /* in sc_secsize chunks */
984263bc
MD
819
820#ifdef DEBUG
821 if (ccddebug & CCDB_FOLLOW)
e3869ec7 822 kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
984263bc
MD
823#endif
824 if ((cs->sc_flags & CCDF_INITED) == 0) {
825 bp->b_error = ENXIO;
4414f2c9 826 goto error;
984263bc
MD
827 }
828
829 /* If it's a nil transfer, wake up the top half now. */
4414f2c9
MD
830 if (bp->b_bcount == 0) {
831 bp->b_resid = 0;
984263bc 832 goto done;
4414f2c9 833 }
984263bc 834
984263bc
MD
835 /*
836 * Do bounds checking and adjust transfer. If there's an
837 * error, the bounds check will flag that for us.
838 */
984263bc 839
7dc62e37
MD
840 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
841 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
984263bc 842
7dc62e37
MD
843 /*
844 * If out of bounds return an error. If the request goes
845 * past EOF, clip the request as appropriate. If exactly
846 * at EOF, return success (don't clip), but with 0 bytes
847 * of I/O.
848 *
849 * Mark EOF B_INVAL (just like bad), indicating that the
850 * contents of the buffer, if any, is invalid.
851 */
852 if ((int64_t)pbn < 0)
853 goto bad;
854 if (pbn + sz > cs->sc_size) {
855 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
4414f2c9 856 goto bad;
7dc62e37
MD
857 if (pbn == cs->sc_size) {
858 bp->b_resid = bp->b_bcount;
859 bp->b_flags |= B_INVAL;
860 goto done;
984263bc 861 }
7dc62e37
MD
862 sz = (long)(cs->sc_size - pbn);
863 bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
984263bc 864 }
7dc62e37 865 nbio = bio;
984263bc
MD
866
867 bp->b_resid = bp->b_bcount;
81b5c339 868 nbio->bio_driver_info = dev;
984263bc
MD
869
870 /*
871 * "Start" the unit.
872 */
abe2ad7c 873 crit_enter();
81b5c339 874 ccdstart(cs, nbio);
abe2ad7c 875 crit_exit();
fef8985e 876 return(0);
81b5c339
MD
877
878 /*
879 * note: bio, not nbio, is valid at the done label.
880 */
4414f2c9
MD
881bad:
882 bp->b_error = EINVAL;
883error:
884 bp->b_resid = bp->b_bcount;
885 bp->b_flags |= B_ERROR | B_INVAL;
984263bc 886done:
81b5c339 887 biodone(bio);
fef8985e 888 return(0);
984263bc
MD
889}
890
891static void
81b5c339 892ccdstart(struct ccd_softc *cs, struct bio *bio)
984263bc
MD
893{
894 long bcount, rcount;
895 struct ccdbuf *cbp[4];
81b5c339 896 struct buf *bp = bio->bio_buf;
984263bc
MD
897 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
898 caddr_t addr;
54078292 899 off_t doffset;
984263bc
MD
900
901#ifdef DEBUG
902 if (ccddebug & CCDB_FOLLOW)
e3869ec7 903 kprintf("ccdstart(%x, %x)\n", cs, bp);
984263bc
MD
904#endif
905
906 /* Record the transaction start */
907 devstat_start_transaction(&cs->device_stats);
908
984263bc
MD
909 /*
910 * Allocate component buffers and fire off the requests
911 */
7dc62e37 912 doffset = bio->bio_offset;
984263bc 913 addr = bp->b_data;
7dc62e37 914
984263bc 915 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
54078292 916 ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
984263bc
MD
917 rcount = cbp[0]->cb_buf.b_bcount;
918
919 if (cs->sc_cflags & CCDF_MIRROR) {
920 /*
921 * Mirroring. Writes go to both disks, reads are
922 * taken from whichever disk seems most appropriate.
923 *
924 * We attempt to localize reads to the disk whos arm
925 * is nearest the read request. We ignore seeks due
926 * to writes when making this determination and we
927 * also try to avoid hogging.
928 */
10f3fee5 929 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
a8f169e2
MD
930 vn_strategy(cbp[0]->cb_vp,
931 &cbp[0]->cb_buf.b_bio1);
932 vn_strategy(cbp[1]->cb_vp,
933 &cbp[1]->cb_buf.b_bio1);
984263bc
MD
934 } else {
935 int pick = cs->sc_pick;
7dc62e37 936 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
54078292
MD
937 if (doffset < cs->sc_blk[pick] - range ||
938 doffset > cs->sc_blk[pick] + range
984263bc
MD
939 ) {
940 cs->sc_pick = pick = 1 - pick;
941 }
54078292 942 cs->sc_blk[pick] = doffset + rcount;
a8f169e2
MD
943 vn_strategy(cbp[pick]->cb_vp,
944 &cbp[pick]->cb_buf.b_bio1);
984263bc
MD
945 }
946 } else {
947 /*
948 * Not mirroring
949 */
a8f169e2 950 vn_strategy(cbp[0]->cb_vp,
81b5c339 951 &cbp[0]->cb_buf.b_bio1);
984263bc 952 }
54078292 953 doffset += rcount;
984263bc
MD
954 addr += rcount;
955 }
956}
957
958/*
959 * Build a component buffer header.
960 */
961static void
54078292
MD
962ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
963 off_t doffset, caddr_t addr, long bcount)
984263bc
MD
964{
965 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
966 struct ccdbuf *cbp;
7dc62e37
MD
967 u_int64_t bn;
968 u_int64_t cbn;
969 u_int64_t cboff;
984263bc
MD
970 off_t cbc;
971
972#ifdef DEBUG
973 if (ccddebug & CCDB_IO)
e3869ec7 974 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
984263bc
MD
975 cs, bp, bn, addr, bcount);
976#endif
977 /*
978 * Determine which component bn falls in.
979 */
7dc62e37 980 bn = doffset / cs->sc_geom.ccg_secsize;
984263bc
MD
981 cbn = bn;
982 cboff = 0;
983
984 if (cs->sc_ileave == 0) {
985 /*
986 * Serially concatenated and neither a mirror nor a parity
987 * config. This is a special case.
988 */
989 daddr_t sblk;
990
991 sblk = 0;
992 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
993 sblk += ci->ci_size;
994 cbn -= sblk;
995 } else {
996 struct ccdiinfo *ii;
997 int ccdisk, off;
998
999 /*
1000 * Calculate cbn, the logical superblock (sc_ileave chunks),
1001 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
1002 * to cbn.
1003 */
1004 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
1005 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
1006
1007 /*
1008 * Figure out which interleave table to use.
1009 */
1010 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
1011 if (ii->ii_startblk > cbn)
1012 break;
1013 }
1014 ii--;
1015
1016 /*
1017 * off is the logical superblock relative to the beginning
1018 * of this interleave block.
1019 */
1020 off = cbn - ii->ii_startblk;
1021
1022 /*
1023 * We must calculate which disk component to use (ccdisk),
1024 * and recalculate cbn to be the superblock relative to
1025 * the beginning of the component. This is typically done by
1026 * adding 'off' and ii->ii_startoff together. However, 'off'
1027 * must typically be divided by the number of components in
1028 * this interleave array to be properly convert it from a
1029 * CCD-relative logical superblock number to a
1030 * component-relative superblock number.
1031 */
1032 if (ii->ii_ndisk == 1) {
1033 /*
1034 * When we have just one disk, it can't be a mirror
1035 * or a parity config.
1036 */
1037 ccdisk = ii->ii_index[0];
1038 cbn = ii->ii_startoff + off;
1039 } else {
1040 if (cs->sc_cflags & CCDF_MIRROR) {
1041 /*
1042 * We have forced a uniform mapping, resulting
1043 * in a single interleave array. We double
1044 * up on the first half of the available
1045 * components and our mirror is in the second
1046 * half. This only works with a single
1047 * interleave array because doubling up
1048 * doubles the number of sectors, so there
1049 * cannot be another interleave array because
1050 * the next interleave array's calculations
1051 * would be off.
1052 */
1053 int ndisk2 = ii->ii_ndisk / 2;
1054 ccdisk = ii->ii_index[off % ndisk2];
1055 cbn = ii->ii_startoff + off / ndisk2;
1056 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1057 } else if (cs->sc_cflags & CCDF_PARITY) {
1058 /*
1059 * XXX not implemented yet
1060 */
1061 int ndisk2 = ii->ii_ndisk - 1;
1062 ccdisk = ii->ii_index[off % ndisk2];
1063 cbn = ii->ii_startoff + off / ndisk2;
1064 if (cbn % ii->ii_ndisk <= ccdisk)
1065 ccdisk++;
1066 } else {
1067 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1068 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1069 }
1070 }
1071
1072 ci = &cs->sc_cinfo[ccdisk];
1073
1074 /*
1075 * Convert cbn from a superblock to a normal block so it
1076 * can be used to calculate (along with cboff) the normal
1077 * block index into this particular disk.
1078 */
1079 cbn *= cs->sc_ileave;
1080 }
1081
1082 /*
1083 * Fill in the component buf structure.
9a71d53f
MD
1084 *
1085 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1086 * will be truncated on device EOF so we use b_bufsize to detect
1087 * the case.
984263bc 1088 */
4483eb0d 1089 cbp = getccdbuf();
c8bcf978 1090 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
4414f2c9 1091 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
984263bc 1092 cbp->cb_buf.b_data = addr;
a8f169e2 1093 cbp->cb_vp = ci->ci_vp;
984263bc 1094 if (cs->sc_ileave == 0)
d785777f 1095 cbc = dbtob((off_t)(ci->ci_size - cbn));
984263bc 1096 else
d785777f
MD
1097 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1098 if (cbc > cs->sc_maxiosize)
1099 cbc = cs->sc_maxiosize;
984263bc
MD
1100 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1101 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1102
81b5c339
MD
1103 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1104 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
7dc62e37 1105 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
81b5c339 1106
984263bc
MD
1107 /*
1108 * context for ccdiodone
1109 */
81b5c339 1110 cbp->cb_obio = bio;
984263bc
MD
1111 cbp->cb_unit = cs - ccd_softc;
1112 cbp->cb_comp = ci - cs->sc_cinfo;
1113
1114#ifdef DEBUG
1115 if (ccddebug & CCDB_IO)
e3869ec7 1116 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
81b5c339 1117 ci->ci_dev, ci-cs->sc_cinfo, cbp,
54078292 1118 cbp->cb_buf.b_bio1.bio_offset,
984263bc
MD
1119 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1120#endif
1121 cb[0] = cbp;
1122
1123 /*
1124 * Note: both I/O's setup when reading from mirror, but only one
1125 * will be executed.
1126 */
1127 if (cs->sc_cflags & CCDF_MIRROR) {
1128 /* mirror, setup second I/O */
4483eb0d
MD
1129 cbp = getccdbuf();
1130
c8bcf978 1131 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
4414f2c9 1132 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
4483eb0d 1133 cbp->cb_buf.b_data = addr;
a8f169e2 1134 cbp->cb_vp = ci2->ci_vp;
4483eb0d
MD
1135 if (cs->sc_ileave == 0)
1136 cbc = dbtob((off_t)(ci->ci_size - cbn));
1137 else
1138 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
d785777f
MD
1139 if (cbc > cs->sc_maxiosize)
1140 cbc = cs->sc_maxiosize;
4483eb0d
MD
1141 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1142 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1143
1144 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1145 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
7dc62e37 1146 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
4483eb0d
MD
1147
1148 /*
1149 * context for ccdiodone
1150 */
1151 cbp->cb_obio = bio;
1152 cbp->cb_unit = cs - ccd_softc;
984263bc
MD
1153 cbp->cb_comp = ci2 - cs->sc_cinfo;
1154 cb[1] = cbp;
1155 /* link together the ccdbuf's and clear "mirror done" flag */
1156 cb[0]->cb_mirror = cb[1];
1157 cb[1]->cb_mirror = cb[0];
1158 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1159 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1160 }
1161}
1162
1163static void
81b5c339 1164ccdintr(struct ccd_softc *cs, struct bio *bio)
984263bc 1165{
81b5c339
MD
1166 struct buf *bp = bio->bio_buf;
1167
984263bc
MD
1168#ifdef DEBUG
1169 if (ccddebug & CCDB_FOLLOW)
e3869ec7 1170 kprintf("ccdintr(%x, %x)\n", cs, bp);
984263bc
MD
1171#endif
1172 /*
1173 * Request is done for better or worse, wakeup the top half.
1174 */
1175 if (bp->b_flags & B_ERROR)
1176 bp->b_resid = bp->b_bcount;
1177 devstat_end_transaction_buf(&cs->device_stats, bp);
81b5c339 1178 biodone(bio);
984263bc
MD
1179}
1180
1181/*
1182 * Called at interrupt time.
77912481 1183 *
984263bc
MD
1184 * Mark the component as done and if all components are done,
1185 * take a ccd interrupt.
1186 */
1187static void
81b5c339 1188ccdiodone(struct bio *bio)
984263bc 1189{
81b5c339
MD
1190 struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1191 struct bio *obio = cbp->cb_obio;
1192 struct buf *obp = obio->bio_buf;
984263bc 1193 int unit = cbp->cb_unit;
abe2ad7c 1194 int count;
984263bc 1195
81b5c339
MD
1196 /*
1197 * Since we do not have exclusive access to underlying devices,
1198 * we can't keep cache translations around.
1199 */
1200 clearbiocache(bio->bio_next);
1201
77912481 1202 get_mplock();
abe2ad7c 1203 crit_enter();
984263bc
MD
1204#ifdef DEBUG
1205 if (ccddebug & CCDB_FOLLOW)
e3869ec7 1206 kprintf("ccdiodone(%x)\n", cbp);
984263bc 1207 if (ccddebug & CCDB_IO) {
e3869ec7 1208 kprintf("ccdiodone: bp %x bcount %d resid %d\n",
81b5c339 1209 obp, obp->b_bcount, obp->b_resid);
e3869ec7 1210 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
984263bc 1211 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
54078292 1212 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
984263bc
MD
1213 cbp->cb_buf.b_bcount);
1214 }
1215#endif
9a71d53f 1216
984263bc
MD
1217 /*
1218 * If an error occured, report it. If this is a mirrored
1219 * configuration and the first of two possible reads, do not
1220 * set the error in the bp yet because the second read may
1221 * succeed.
1222 */
984263bc
MD
1223 if (cbp->cb_buf.b_flags & B_ERROR) {
1224 const char *msg = "";
1225
1226 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
10f3fee5 1227 (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
984263bc
MD
1228 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1229 /*
1230 * We will try our read on the other disk down
1231 * below, also reverse the default pick so if we
1232 * are doing a scan we do not keep hitting the
1233 * bad disk first.
1234 */
1235 struct ccd_softc *cs = &ccd_softc[unit];
1236
1237 msg = ", trying other disk";
1238 cs->sc_pick = 1 - cs->sc_pick;
54078292 1239 cs->sc_blk[cs->sc_pick] = obio->bio_offset;
984263bc 1240 } else {
81b5c339
MD
1241 obp->b_flags |= B_ERROR;
1242 obp->b_error = cbp->cb_buf.b_error ?
984263bc
MD
1243 cbp->cb_buf.b_error : EIO;
1244 }
bfc09ba0
MD
1245 kprintf("ccd%d: error %d on component %d "
1246 "offset %jd (ccd offset %jd)%s\n",
1247 unit, obp->b_error, cbp->cb_comp,
1248 (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1249 (intmax_t)obio->bio_offset,
1250 msg);
984263bc
MD
1251 }
1252
1253 /*
1254 * Process mirror. If we are writing, I/O has been initiated on both
1255 * buffers and we fall through only after both are finished.
1256 *
1257 * If we are reading only one I/O is initiated at a time. If an
1258 * error occurs we initiate the second I/O and return, otherwise
1259 * we free the second I/O without initiating it.
1260 */
1261
1262 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
10f3fee5 1263 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
984263bc
MD
1264 /*
1265 * When writing, handshake with the second buffer
1266 * to determine when both are done. If both are not
1267 * done, return here.
1268 */
1269 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1270 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1271 putccdbuf(cbp);
abe2ad7c 1272 crit_exit();
77912481 1273 rel_mplock();
984263bc
MD
1274 return;
1275 }
1276 } else {
1277 /*
1278 * When reading, either dispose of the second buffer
1279 * or initiate I/O on the second buffer if an error
1280 * occured with this one.
1281 */
1282 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1283 if (cbp->cb_buf.b_flags & B_ERROR) {
1284 cbp->cb_mirror->cb_pflags |=
1285 CCDPF_MIRROR_DONE;
81b5c339 1286 vn_strategy(
a8f169e2 1287 cbp->cb_mirror->cb_vp,
81b5c339 1288 &cbp->cb_mirror->cb_buf.b_bio1
984263bc
MD
1289 );
1290 putccdbuf(cbp);
abe2ad7c 1291 crit_exit();
77912481 1292 rel_mplock();
984263bc
MD
1293 return;
1294 } else {
1295 putccdbuf(cbp->cb_mirror);
1296 /* fall through */
1297 }
1298 }
1299 }
1300 }
1301
1302 /*
9a71d53f 1303 * Use our saved b_bufsize to determine if an unexpected EOF occured.
984263bc
MD
1304 */
1305 count = cbp->cb_buf.b_bufsize;
1306 putccdbuf(cbp);
1307
1308 /*
1309 * If all done, "interrupt".
1310 */
81b5c339
MD
1311 obp->b_resid -= count;
1312 if (obp->b_resid < 0)
984263bc 1313 panic("ccdiodone: count");
81b5c339
MD
1314 if (obp->b_resid == 0)
1315 ccdintr(&ccd_softc[unit], obio);
abe2ad7c 1316 crit_exit();
77912481 1317 rel_mplock();
984263bc
MD
1318}
1319
1320static int
fef8985e 1321ccdioctl(struct dev_ioctl_args *ap)
984263bc 1322{
b13267a5 1323 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
1324 int unit = ccdunit(dev);
1325 int i, j, lookedup = 0, error = 0;
984263bc 1326 struct ccd_softc *cs;
fef8985e 1327 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
984263bc 1328 struct ccddevice ccd;
7dc62e37 1329 struct disk_info info;
984263bc
MD
1330 char **cpp;
1331 struct vnode **vpp;
1332
1333 if (unit >= numccd)
1334 return (ENXIO);
1335 cs = &ccd_softc[unit];
1336
1337 bzero(&ccd, sizeof(ccd));
1338
fef8985e 1339 switch (ap->a_cmd) {
984263bc
MD
1340 case CCDIOCSET:
1341 if (cs->sc_flags & CCDF_INITED)
1342 return (EBUSY);
1343
fef8985e 1344 if ((ap->a_fflag & FWRITE) == 0)
984263bc
MD
1345 return (EBADF);
1346
1347 if ((error = ccdlock(cs)) != 0)
1348 return (error);
1349
7dc62e37
MD
1350 if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1351 ccdunlock(cs);
984263bc 1352 return (EINVAL);
7dc62e37 1353 }
984263bc
MD
1354
1355 /* Fill in some important bits. */
1356 ccd.ccd_unit = unit;
1357 ccd.ccd_interleave = ccio->ccio_ileave;
1358 if (ccd.ccd_interleave == 0 &&
1359 ((ccio->ccio_flags & CCDF_MIRROR) ||
1360 (ccio->ccio_flags & CCDF_PARITY))) {
e3869ec7 1361 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
984263bc
MD
1362 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1363 }
1364 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1365 (ccio->ccio_flags & CCDF_PARITY)) {
e3869ec7 1366 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
984263bc
MD
1367 ccio->ccio_flags &= ~CCDF_PARITY;
1368 }
1369 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1370 !(ccio->ccio_flags & CCDF_UNIFORM)) {
e3869ec7 1371 kprintf("ccd%d: mirror/parity forces uniform flag\n",
984263bc
MD
1372 unit);
1373 ccio->ccio_flags |= CCDF_UNIFORM;
1374 }
1375 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1376
1377 /*
1378 * Allocate space for and copy in the array of
1379 * componet pathnames and device numbers.
1380 */
77652cad 1381 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
984263bc 1382 M_DEVBUF, M_WAITOK);
77652cad 1383 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
984263bc
MD
1384 M_DEVBUF, M_WAITOK);
1385
1386 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
7dc62e37 1387 ccio->ccio_ndisks * sizeof(char **));
984263bc 1388 if (error) {
efda3bd0
MD
1389 kfree(vpp, M_DEVBUF);
1390 kfree(cpp, M_DEVBUF);
984263bc
MD
1391 ccdunlock(cs);
1392 return (error);
1393 }
1394
1395#ifdef DEBUG
7dc62e37 1396 if (ccddebug & CCDB_INIT) {
984263bc 1397 for (i = 0; i < ccio->ccio_ndisks; ++i)
e3869ec7 1398 kprintf("ccdioctl: component %d: 0x%x\n",
984263bc 1399 i, cpp[i]);
7dc62e37 1400 }
984263bc
MD
1401#endif
1402
1403 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1404#ifdef DEBUG
1405 if (ccddebug & CCDB_INIT)
e3869ec7 1406 kprintf("ccdioctl: lookedup = %d\n", lookedup);
984263bc 1407#endif
fef8985e 1408 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
984263bc 1409 for (j = 0; j < lookedup; ++j)
87de5057 1410 (void)vn_close(vpp[j], FREAD|FWRITE);
efda3bd0
MD
1411 kfree(vpp, M_DEVBUF);
1412 kfree(cpp, M_DEVBUF);
984263bc
MD
1413 ccdunlock(cs);
1414 return (error);
1415 }
1416 ++lookedup;
1417 }
1418 ccd.ccd_cpp = cpp;
1419 ccd.ccd_vpp = vpp;
1420 ccd.ccd_ndev = ccio->ccio_ndisks;
1421
1422 /*
1423 * Initialize the ccd. Fills in the softc for us.
1424 */
fef8985e 1425 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
984263bc 1426 for (j = 0; j < lookedup; ++j)
87de5057 1427 (void)vn_close(vpp[j], FREAD|FWRITE);
efda3bd0
MD
1428 kfree(vpp, M_DEVBUF);
1429 kfree(cpp, M_DEVBUF);
984263bc
MD
1430 ccdunlock(cs);
1431 return (error);
1432 }
1433
1434 /*
1435 * The ccd has been successfully initialized, so
1436 * we can place it into the array and read the disklabel.
1437 */
1438 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1439 ccio->ccio_unit = unit;
1440 ccio->ccio_size = cs->sc_size;
7dc62e37
MD
1441
1442 bzero(&info, sizeof(info));
1443 info.d_media_blksize = cs->sc_geom.ccg_secsize;
1444 info.d_media_blocks = cs->sc_size;
1445 info.d_nheads = cs->sc_geom.ccg_ntracks;
1446 info.d_secpertrack = cs->sc_geom.ccg_nsectors;
1447 info.d_ncylinders = cs->sc_geom.ccg_ncylinders;
1448 info.d_secpercyl = info.d_nheads * info.d_secpertrack;
1449
1450 /*
1451 * For cases where a label is directly applied to the ccd,
1452 * without slices, DSO_COMPATMBR forces one sector be
1453 * reserved for backwards compatibility.
1454 */
1455 info.d_dsflags = DSO_COMPATMBR;
1456 disk_setdiskinfo(&cs->sc_disk, &info);
984263bc
MD
1457
1458 ccdunlock(cs);
1459
1460 break;
1461
1462 case CCDIOCCLR:
1463 if ((cs->sc_flags & CCDF_INITED) == 0)
1464 return (ENXIO);
1465
fef8985e 1466 if ((ap->a_fflag & FWRITE) == 0)
984263bc
MD
1467 return (EBADF);
1468
1469 if ((error = ccdlock(cs)) != 0)
1470 return (error);
1471
7dc62e37 1472 if (dev_drefs(cs->sc_dev) > 1) {
984263bc
MD
1473 ccdunlock(cs);
1474 return (EBUSY);
1475 }
1476
1477 /*
1478 * Free ccd_softc information and clear entry.
1479 */
1480
1481 /* Close the components and free their pathnames. */
1482 for (i = 0; i < cs->sc_nccdisks; ++i) {
1483 /*
1484 * XXX: this close could potentially fail and
1485 * cause Bad Things. Maybe we need to force
1486 * the close to happen?
1487 */
1488#ifdef DEBUG
1489 if (ccddebug & CCDB_VNODE)
1490 vprint("CCDIOCCLR: vnode info",
1491 cs->sc_cinfo[i].ci_vp);
1492#endif
87de5057 1493 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE);
efda3bd0 1494 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
984263bc
MD
1495 }
1496
1497 /* Free interleave index. */
1498 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
efda3bd0 1499 kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
984263bc
MD
1500
1501 /* Free component info and interleave table. */
efda3bd0
MD
1502 kfree(cs->sc_cinfo, M_DEVBUF);
1503 kfree(cs->sc_itable, M_DEVBUF);
7dc62e37
MD
1504 cs->sc_cinfo = NULL;
1505 cs->sc_itable = NULL;
984263bc
MD
1506 cs->sc_flags &= ~CCDF_INITED;
1507
1508 /*
1509 * Free ccddevice information and clear entry.
1510 */
efda3bd0
MD
1511 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1512 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
984263bc
MD
1513 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1514
1515 /*
1516 * And remove the devstat entry.
1517 */
1518 devstat_remove_entry(&cs->device_stats);
1519
1520 /* This must be atomic. */
abe2ad7c 1521 crit_enter();
984263bc 1522 ccdunlock(cs);
abe2ad7c 1523 crit_exit();
984263bc
MD
1524
1525 break;
1526
984263bc
MD
1527 default:
1528 return (ENOTTY);
1529 }
1530
1531 return (0);
1532}
1533
984263bc 1534static int
fef8985e 1535ccddump(struct dev_dump_args *ap)
984263bc 1536{
984263bc
MD
1537 /* Not implemented. */
1538 return ENXIO;
1539}
1540
1541/*
1542 * Lookup the provided name in the filesystem. If the file exists,
1543 * is a valid block device, and isn't being used by anyone else,
1544 * set *vpp to the file's vnode.
1545 */
1546static int
fef8985e 1547ccdlookup(char *path, struct vnode **vpp)
984263bc 1548{
fad57d0e 1549 struct nlookupdata nd;
984263bc
MD
1550 struct vnode *vp;
1551 int error;
dadab5e9 1552
fad57d0e 1553 *vpp = NULL;
984263bc 1554
fad57d0e
MD
1555 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1556 if (error)
1557 return (error);
1558 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
984263bc 1559#ifdef DEBUG
d68d113c 1560 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
e3869ec7 1561 kprintf("ccdlookup: vn_open error = %d\n", error);
984263bc 1562#endif
fad57d0e 1563 goto done;
984263bc 1564 }
fad57d0e 1565 vp = nd.nl_open_vp;
984263bc 1566
3c37c940 1567 if (vp->v_opencount > 1) {
984263bc 1568 error = EBUSY;
fad57d0e 1569 goto done;
984263bc
MD
1570 }
1571
1572 if (!vn_isdisk(vp, &error))
fad57d0e 1573 goto done;
984263bc
MD
1574
1575#ifdef DEBUG
1576 if (ccddebug & CCDB_VNODE)
1577 vprint("ccdlookup: vnode info", vp);
1578#endif
1579
a11aaa81 1580 vn_unlock(vp);
fad57d0e
MD
1581 nd.nl_open_vp = NULL;
1582 nlookup_done(&nd);
1583 *vpp = vp; /* leave ref intact */
984263bc 1584 return (0);
fad57d0e
MD
1585done:
1586 nlookup_done(&nd);
984263bc
MD
1587 return (error);
1588}
1589
984263bc
MD
1590/*
1591 * Wait interruptibly for an exclusive lock.
1592 *
1593 * XXX
1594 * Several drivers do this; it should be abstracted and made MP-safe.
1595 */
1596static int
c436375a 1597ccdlock(struct ccd_softc *cs)
984263bc
MD
1598{
1599 int error;
1600
1601 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1602 cs->sc_flags |= CCDF_WANTED;
377d4740 1603 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
984263bc
MD
1604 return (error);
1605 }
1606 cs->sc_flags |= CCDF_LOCKED;
1607 return (0);
1608}
1609
1610/*
1611 * Unlock and wake up any waiters.
1612 */
1613static void
c436375a 1614ccdunlock(struct ccd_softc *cs)
984263bc
MD
1615{
1616
1617 cs->sc_flags &= ~CCDF_LOCKED;
1618 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1619 cs->sc_flags &= ~CCDF_WANTED;
1620 wakeup(cs);
1621 }
1622}
1623
1624#ifdef DEBUG
1625static void
c436375a 1626printiinfo(struct ccdiinfo *ii)
984263bc
MD
1627{
1628 int ix, i;
1629
1630 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
e3869ec7 1631 kprintf(" itab[%d]: #dk %d sblk %d soff %d",
984263bc
MD
1632 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1633 for (i = 0; i < ii->ii_ndisk; i++)
e3869ec7
SW
1634 kprintf(" %d", ii->ii_index[i]);
1635 kprintf("\n");
984263bc
MD
1636 }
1637}
1638#endif
1639
1640\f
1641/* Local Variables: */
1642/* c-argdecl-indent: 8 */
1643/* c-continued-statement-offset: 8 */
1644/* c-indent-level: 8 */
1645/* End: */