The thread/proc pointer argument in the VFS subsystem originally existed
[dragonfly.git] / sys / dev / disk / ccd / ccd.c
CommitLineData
984263bc 1/* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
87de5057 2/* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.32 2006/05/06 02:43:02 dillon Exp $ */
984263bc
MD
3
4/* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
5
6/*
7 * Copyright (c) 1995 Jason R. Thorpe.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project
21 * by Jason R. Thorpe.
22 * 4. The name of the author may not be used to endorse or promote products
23 * derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80/*
81 * "Concatenated" disk driver.
82 *
83 * Dynamic configuration and disklabel support by:
84 * Jason R. Thorpe <thorpej@nas.nasa.gov>
85 * Numerical Aerodynamic Simulation Facility
86 * Mail Stop 258-6
87 * NASA Ames Research Center
88 * Moffett Field, CA 94035
89 */
90
1f2de5d4 91#include "use_ccd.h"
984263bc
MD
92
93#include <sys/param.h>
94#include <sys/systm.h>
95#include <sys/kernel.h>
96#include <sys/module.h>
97#include <sys/proc.h>
98#include <sys/buf.h>
99#include <sys/malloc.h>
fad57d0e 100#include <sys/nlookup.h>
984263bc
MD
101#include <sys/conf.h>
102#include <sys/stat.h>
103#include <sys/sysctl.h>
104#include <sys/disklabel.h>
984263bc
MD
105#include <sys/devicestat.h>
106#include <sys/fcntl.h>
107#include <sys/vnode.h>
3020e3be 108#include <sys/buf2.h>
984263bc
MD
109#include <sys/ccdvar.h>
110
111#include <vm/vm_zone.h>
112
50e58362
MD
113#include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */
114#include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */
115
116#include <sys/thread2.h>
117
984263bc
MD
118#if defined(CCDDEBUG) && !defined(DEBUG)
119#define DEBUG
120#endif
121
122#ifdef DEBUG
123#define CCDB_FOLLOW 0x01
124#define CCDB_INIT 0x02
125#define CCDB_IO 0x04
126#define CCDB_LABEL 0x08
127#define CCDB_VNODE 0x10
128static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
129 CCDB_VNODE;
130SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
131#undef DEBUG
132#endif
133
134#define ccdunit(x) dkunit(x)
135#define ccdpart(x) dkpart(x)
136
137/*
138 This is how mirroring works (only writes are special):
139
140 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
141 linked together by the cb_mirror field. "cb_pflags &
142 CCDPF_MIRROR_DONE" is set to 0 on both of them.
143
144 When a component returns to ccdiodone(), it checks if "cb_pflags &
145 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
146 flag and returns. If it is, it means its partner has already
147 returned, so it will go to the regular cleanup.
148
149 */
150
151struct ccdbuf {
152 struct buf cb_buf; /* new I/O buf */
a8f169e2 153 struct vnode *cb_vp; /* related vnode */
81b5c339 154 struct bio *cb_obio; /* ptr. to original I/O buf */
984263bc
MD
155 struct ccdbuf *cb_freenext; /* free list link */
156 int cb_unit; /* target unit */
157 int cb_comp; /* target component */
158 int cb_pflags; /* mirror/parity status flag */
159 struct ccdbuf *cb_mirror; /* mirror counterpart */
160};
161
162/* bits in cb_pflags */
163#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
164
165#define CCDLABELDEV(dev) \
e4c9c0c8 166 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
984263bc
MD
167
168static d_open_t ccdopen;
169static d_close_t ccdclose;
170static d_strategy_t ccdstrategy;
171static d_ioctl_t ccdioctl;
172static d_dump_t ccddump;
173static d_psize_t ccdsize;
174
175#define NCCDFREEHIWAT 16
176
177#define CDEV_MAJOR 74
984263bc
MD
178
179static struct cdevsw ccd_cdevsw = {
fabb8ceb
MD
180 /* name */ "ccd",
181 /* maj */ CDEV_MAJOR,
182 /* flags */ D_DISK,
183 /* port */ NULL,
455fcd7e 184 /* clone */ NULL,
fabb8ceb 185
984263bc
MD
186 /* open */ ccdopen,
187 /* close */ ccdclose,
188 /* read */ physread,
189 /* write */ physwrite,
190 /* ioctl */ ccdioctl,
191 /* poll */ nopoll,
192 /* mmap */ nommap,
193 /* strategy */ ccdstrategy,
984263bc 194 /* dump */ ccddump,
fabb8ceb 195 /* psize */ ccdsize
984263bc
MD
196};
197
198/* called during module initialization */
38e94a25
RG
199static void ccdattach (void);
200static int ccd_modevent (module_t, int, void *);
984263bc
MD
201
202/* called by biodone() at interrupt time */
81b5c339 203static void ccdiodone (struct bio *bio);
38e94a25 204
81b5c339 205static void ccdstart (struct ccd_softc *, struct bio *);
38e94a25 206static void ccdinterleave (struct ccd_softc *, int);
81b5c339 207static void ccdintr (struct ccd_softc *, struct bio *);
38e94a25
RG
208static int ccdinit (struct ccddevice *, char **, struct thread *);
209static int ccdlookup (char *, struct thread *td, struct vnode **);
210static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
54078292 211 struct bio *, off_t, caddr_t, long);
38e94a25
RG
212static void ccdgetdisklabel (dev_t);
213static void ccdmakedisklabel (struct ccd_softc *);
214static int ccdlock (struct ccd_softc *);
215static void ccdunlock (struct ccd_softc *);
984263bc
MD
216
217#ifdef DEBUG
38e94a25 218static void printiinfo (struct ccdiinfo *);
984263bc
MD
219#endif
220
221/* Non-private for the benefit of libkvm. */
222struct ccd_softc *ccd_softc;
223struct ccddevice *ccddevs;
224struct ccdbuf *ccdfreebufs;
225static int numccdfreebufs;
226static int numccd = 0;
227
228/*
229 * getccdbuf() - Allocate and zero a ccd buffer.
230 *
231 * This routine is called at splbio().
232 */
233
234static __inline
235struct ccdbuf *
4483eb0d 236getccdbuf(void)
984263bc
MD
237{
238 struct ccdbuf *cbp;
239
240 /*
241 * Allocate from freelist or malloc as necessary
242 */
243 if ((cbp = ccdfreebufs) != NULL) {
244 ccdfreebufs = cbp->cb_freenext;
245 --numccdfreebufs;
81b5c339 246 reinitbufbio(&cbp->cb_buf);
984263bc 247 } else {
4483eb0d 248 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO);
81b5c339 249 initbufbio(&cbp->cb_buf);
984263bc
MD
250 }
251
984263bc
MD
252 /*
253 * independant struct buf initialization
254 */
255 LIST_INIT(&cbp->cb_buf.b_dep);
256 BUF_LOCKINIT(&cbp->cb_buf);
257 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
258 BUF_KERNPROC(&cbp->cb_buf);
4414f2c9 259 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
984263bc
MD
260
261 return(cbp);
262}
263
264/*
265 * putccdbuf() - Free a ccd buffer.
266 *
267 * This routine is called at splbio().
268 */
269
270static __inline
271void
272putccdbuf(struct ccdbuf *cbp)
273{
274 BUF_UNLOCK(&cbp->cb_buf);
275 BUF_LOCKFREE(&cbp->cb_buf);
276
277 if (numccdfreebufs < NCCDFREEHIWAT) {
278 cbp->cb_freenext = ccdfreebufs;
279 ccdfreebufs = cbp;
280 ++numccdfreebufs;
281 } else {
282 free((caddr_t)cbp, M_DEVBUF);
283 }
284}
285
286
287/*
288 * Number of blocks to untouched in front of a component partition.
289 * This is to avoid violating its disklabel area when it starts at the
290 * beginning of the slice.
291 */
292#if !defined(CCD_OFFSET)
293#define CCD_OFFSET 16
294#endif
295
296/*
297 * Called by main() during pseudo-device attachment. All we need
298 * to do is allocate enough space for devices to be configured later, and
299 * add devsw entries.
300 */
301static void
c436375a 302ccdattach(void)
984263bc
MD
303{
304 int i;
305 int num = NCCD;
306
307 if (num > 1)
308 printf("ccd0-%d: Concatenated disk drivers\n", num-1);
309 else
310 printf("ccd0: Concatenated disk driver\n");
311
3aed1355
MD
312 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF,
313 M_WAITOK | M_ZERO);
314 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF,
315 M_WAITOK | M_ZERO);
984263bc 316 numccd = num;
984263bc 317
e4c9c0c8 318 cdevsw_add(&ccd_cdevsw, 0, 0);
984263bc
MD
319 /* XXX: is this necessary? */
320 for (i = 0; i < numccd; ++i)
321 ccddevs[i].ccd_dk = -1;
322}
323
324static int
c436375a 325ccd_modevent(module_t mod, int type, void *data)
984263bc
MD
326{
327 int error = 0;
328
329 switch (type) {
330 case MOD_LOAD:
331 ccdattach();
332 break;
333
334 case MOD_UNLOAD:
335 printf("ccd0: Unload not supported!\n");
336 error = EOPNOTSUPP;
337 break;
338
339 default: /* MOD_SHUTDOWN etc */
340 break;
341 }
342 return (error);
343}
344
345DEV_MODULE(ccd, ccd_modevent, NULL);
346
347static int
dadab5e9 348ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td)
984263bc
MD
349{
350 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
351 struct ccdcinfo *ci = NULL; /* XXX */
352 size_t size;
353 int ix;
354 struct vnode *vp;
355 size_t minsize;
356 int maxsecsize;
357 struct partinfo dpart;
358 struct ccdgeom *ccg = &cs->sc_geom;
359 char tmppath[MAXPATHLEN];
360 int error = 0;
dadab5e9
MD
361 struct ucred *cred;
362
363 KKASSERT(td->td_proc);
364 cred = td->td_proc->p_ucred;
984263bc
MD
365
366#ifdef DEBUG
367 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
368 printf("ccdinit: unit %d\n", ccd->ccd_unit);
369#endif
370
371 cs->sc_size = 0;
372 cs->sc_ileave = ccd->ccd_interleave;
373 cs->sc_nccdisks = ccd->ccd_ndev;
374
375 /* Allocate space for the component info. */
376 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
377 M_DEVBUF, M_WAITOK);
378
379 /*
380 * Verify that each component piece exists and record
381 * relevant information about it.
382 */
383 maxsecsize = 0;
384 minsize = 0;
385 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
386 vp = ccd->ccd_vpp[ix];
387 ci = &cs->sc_cinfo[ix];
388 ci->ci_vp = vp;
389
390 /*
391 * Copy in the pathname of the component.
392 */
393 bzero(tmppath, sizeof(tmppath)); /* sanity */
394 if ((error = copyinstr(cpaths[ix], tmppath,
395 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
396#ifdef DEBUG
397 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
398 printf("ccd%d: can't copy path, error = %d\n",
399 ccd->ccd_unit, error);
400#endif
401 goto fail;
402 }
403 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
404 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
405
406 ci->ci_dev = vn_todev(vp);
407
408 /*
409 * Get partition information for the component.
410 */
411 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
87de5057 412 FREAD, cred)) != 0) {
984263bc
MD
413#ifdef DEBUG
414 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
415 printf("ccd%d: %s: ioctl failed, error = %d\n",
416 ccd->ccd_unit, ci->ci_path, error);
417#endif
418 goto fail;
419 }
420 if (dpart.part->p_fstype == FS_BSDFFS) {
421 maxsecsize =
422 ((dpart.disklab->d_secsize > maxsecsize) ?
423 dpart.disklab->d_secsize : maxsecsize);
424 size = dpart.part->p_size - CCD_OFFSET;
425 } else {
426#ifdef DEBUG
427 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
428 printf("ccd%d: %s: incorrect partition type\n",
429 ccd->ccd_unit, ci->ci_path);
430#endif
431 error = EFTYPE;
432 goto fail;
433 }
434
435 /*
436 * Calculate the size, truncating to an interleave
437 * boundary if necessary.
438 */
439
440 if (cs->sc_ileave > 1)
441 size -= size % cs->sc_ileave;
442
443 if (size == 0) {
444#ifdef DEBUG
445 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
446 printf("ccd%d: %s: size == 0\n",
447 ccd->ccd_unit, ci->ci_path);
448#endif
449 error = ENODEV;
450 goto fail;
451 }
452
453 if (minsize == 0 || size < minsize)
454 minsize = size;
455 ci->ci_size = size;
456 cs->sc_size += size;
457 }
458
459 /*
460 * Don't allow the interleave to be smaller than
461 * the biggest component sector.
462 */
463 if ((cs->sc_ileave > 0) &&
464 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
465#ifdef DEBUG
466 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
467 printf("ccd%d: interleave must be at least %d\n",
468 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
469#endif
470 error = EINVAL;
471 goto fail;
472 }
473
474 /*
475 * If uniform interleave is desired set all sizes to that of
476 * the smallest component. This will guarentee that a single
477 * interleave table is generated.
478 *
479 * Lost space must be taken into account when calculating the
480 * overall size. Half the space is lost when CCDF_MIRROR is
481 * specified. One disk is lost when CCDF_PARITY is specified.
482 */
483 if (ccd->ccd_flags & CCDF_UNIFORM) {
484 for (ci = cs->sc_cinfo;
485 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
486 ci->ci_size = minsize;
487 }
488 if (ccd->ccd_flags & CCDF_MIRROR) {
489 /*
490 * Check to see if an even number of components
491 * have been specified. The interleave must also
492 * be non-zero in order for us to be able to
493 * guarentee the topology.
494 */
495 if (cs->sc_nccdisks % 2) {
496 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
497 error = EINVAL;
498 goto fail;
499 }
500 if (cs->sc_ileave == 0) {
501 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
502 error = EINVAL;
503 goto fail;
504 }
505 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
506 } else if (ccd->ccd_flags & CCDF_PARITY) {
507 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
508 } else {
509 if (cs->sc_ileave == 0) {
510 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
511 error = EINVAL;
512 goto fail;
513 }
514 cs->sc_size = cs->sc_nccdisks * minsize;
515 }
516 }
517
518 /*
519 * Construct the interleave table.
520 */
521 ccdinterleave(cs, ccd->ccd_unit);
522
523 /*
524 * Create pseudo-geometry based on 1MB cylinders. It's
525 * pretty close.
526 */
527 ccg->ccg_secsize = maxsecsize;
528 ccg->ccg_ntracks = 1;
529 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
530 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
531
532 /*
533 * Add an devstat entry for this device.
534 */
535 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
536 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
537 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
538 DEVSTAT_PRIORITY_ARRAY);
539
540 cs->sc_flags |= CCDF_INITED;
541 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
542 cs->sc_unit = ccd->ccd_unit;
543 return (0);
544fail:
545 while (ci > cs->sc_cinfo) {
546 ci--;
547 free(ci->ci_path, M_DEVBUF);
548 }
549 free(cs->sc_cinfo, M_DEVBUF);
550 return (error);
551}
552
553static void
c436375a 554ccdinterleave(struct ccd_softc *cs, int unit)
984263bc
MD
555{
556 struct ccdcinfo *ci, *smallci;
557 struct ccdiinfo *ii;
558 daddr_t bn, lbn;
559 int ix;
560 u_long size;
561
562#ifdef DEBUG
563 if (ccddebug & CCDB_INIT)
564 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
565#endif
566
567 /*
568 * Allocate an interleave table. The worst case occurs when each
569 * of N disks is of a different size, resulting in N interleave
570 * tables.
571 *
572 * Chances are this is too big, but we don't care.
573 */
574 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
575 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
576 bzero((caddr_t)cs->sc_itable, size);
577
578 /*
579 * Trivial case: no interleave (actually interleave of disk size).
580 * Each table entry represents a single component in its entirety.
581 *
582 * An interleave of 0 may not be used with a mirror or parity setup.
583 */
584 if (cs->sc_ileave == 0) {
585 bn = 0;
586 ii = cs->sc_itable;
587
588 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
589 /* Allocate space for ii_index. */
590 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
591 ii->ii_ndisk = 1;
592 ii->ii_startblk = bn;
593 ii->ii_startoff = 0;
594 ii->ii_index[0] = ix;
595 bn += cs->sc_cinfo[ix].ci_size;
596 ii++;
597 }
598 ii->ii_ndisk = 0;
599#ifdef DEBUG
600 if (ccddebug & CCDB_INIT)
601 printiinfo(cs->sc_itable);
602#endif
603 return;
604 }
605
606 /*
607 * The following isn't fast or pretty; it doesn't have to be.
608 */
609 size = 0;
610 bn = lbn = 0;
611 for (ii = cs->sc_itable; ; ii++) {
612 /*
613 * Allocate space for ii_index. We might allocate more then
614 * we use.
615 */
616 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
617 M_DEVBUF, M_WAITOK);
618
619 /*
620 * Locate the smallest of the remaining components
621 */
622 smallci = NULL;
623 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
624 ci++) {
625 if (ci->ci_size > size &&
626 (smallci == NULL ||
627 ci->ci_size < smallci->ci_size)) {
628 smallci = ci;
629 }
630 }
631
632 /*
633 * Nobody left, all done
634 */
635 if (smallci == NULL) {
636 ii->ii_ndisk = 0;
637 break;
638 }
639
640 /*
641 * Record starting logical block using an sc_ileave blocksize.
642 */
643 ii->ii_startblk = bn / cs->sc_ileave;
644
645 /*
646 * Record starting comopnent block using an sc_ileave
647 * blocksize. This value is relative to the beginning of
648 * a component disk.
649 */
650 ii->ii_startoff = lbn;
651
652 /*
653 * Determine how many disks take part in this interleave
654 * and record their indices.
655 */
656 ix = 0;
657 for (ci = cs->sc_cinfo;
658 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
659 if (ci->ci_size >= smallci->ci_size) {
660 ii->ii_index[ix++] = ci - cs->sc_cinfo;
661 }
662 }
663 ii->ii_ndisk = ix;
664 bn += ix * (smallci->ci_size - size);
665 lbn = smallci->ci_size / cs->sc_ileave;
666 size = smallci->ci_size;
667 }
668#ifdef DEBUG
669 if (ccddebug & CCDB_INIT)
670 printiinfo(cs->sc_itable);
671#endif
672}
673
674/* ARGSUSED */
675static int
41c20dac 676ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td)
984263bc
MD
677{
678 int unit = ccdunit(dev);
679 struct ccd_softc *cs;
680 struct disklabel *lp;
681 int error = 0, part, pmask;
682
683#ifdef DEBUG
684 if (ccddebug & CCDB_FOLLOW)
685 printf("ccdopen(%x, %x)\n", dev, flags);
686#endif
687 if (unit >= numccd)
688 return (ENXIO);
689 cs = &ccd_softc[unit];
690
691 if ((error = ccdlock(cs)) != 0)
692 return (error);
693
694 lp = &cs->sc_label;
695
696 part = ccdpart(dev);
697 pmask = (1 << part);
698
699 /*
700 * If we're initialized, check to see if there are any other
701 * open partitions. If not, then it's safe to update
702 * the in-core disklabel.
703 */
704 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
705 ccdgetdisklabel(dev);
706
707 /* Check that the partition exists. */
708 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
709 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
710 error = ENXIO;
711 goto done;
712 }
713
714 cs->sc_openmask |= pmask;
715 done:
716 ccdunlock(cs);
717 return (0);
718}
719
720/* ARGSUSED */
721static int
41c20dac 722ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td)
984263bc
MD
723{
724 int unit = ccdunit(dev);
725 struct ccd_softc *cs;
726 int error = 0, part;
727
728#ifdef DEBUG
729 if (ccddebug & CCDB_FOLLOW)
730 printf("ccdclose(%x, %x)\n", dev, flags);
731#endif
732
733 if (unit >= numccd)
734 return (ENXIO);
735 cs = &ccd_softc[unit];
736
737 if ((error = ccdlock(cs)) != 0)
738 return (error);
739
740 part = ccdpart(dev);
741
742 /* ...that much closer to allowing unconfiguration... */
743 cs->sc_openmask &= ~(1 << part);
744 ccdunlock(cs);
745 return (0);
746}
747
748static void
81b5c339 749ccdstrategy(dev_t dev, struct bio *bio)
984263bc 750{
81b5c339
MD
751 int unit = ccdunit(dev);
752 struct bio *nbio;
753 struct buf *bp = bio->bio_buf;
984263bc 754 struct ccd_softc *cs = &ccd_softc[unit];
984263bc
MD
755 int wlabel;
756 struct disklabel *lp;
757
758#ifdef DEBUG
759 if (ccddebug & CCDB_FOLLOW)
760 printf("ccdstrategy(%x): unit %d\n", bp, unit);
761#endif
762 if ((cs->sc_flags & CCDF_INITED) == 0) {
763 bp->b_error = ENXIO;
4414f2c9 764 goto error;
984263bc
MD
765 }
766
767 /* If it's a nil transfer, wake up the top half now. */
4414f2c9
MD
768 if (bp->b_bcount == 0) {
769 bp->b_resid = 0;
984263bc 770 goto done;
4414f2c9 771 }
984263bc
MD
772
773 lp = &cs->sc_label;
774
775 /*
776 * Do bounds checking and adjust transfer. If there's an
777 * error, the bounds check will flag that for us.
778 */
779 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
81b5c339
MD
780 if (ccdpart(dev) != RAW_PART) {
781 nbio = bounds_check_with_label(dev, bio, lp, wlabel);
782 if (nbio == NULL)
984263bc
MD
783 goto done;
784 } else {
785 int pbn; /* in sc_secsize chunks */
786 long sz; /* in sc_secsize chunks */
787
54078292 788 pbn = (int)(bio->bio_offset / cs->sc_geom.ccg_secsize);
984263bc
MD
789 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
790
791 /*
4414f2c9
MD
792 * If out of bounds return an error. If the request goes
793 * past EOF, clip the request as appropriate. If exactly
794 * at EOF, return success (don't clip), but with 0 bytes
795 * of I/O.
796 *
797 * Mark EOF B_INVAL (just like bad), indicating that the
798 * contents of the buffer, if any, is invalid.
984263bc 799 */
4414f2c9
MD
800 if (pbn < 0)
801 goto bad;
984263bc 802 if (pbn + sz > cs->sc_size) {
4414f2c9
MD
803 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
804 goto bad;
805 if (pbn == cs->sc_size) {
806 bp->b_resid = bp->b_bcount;
807 bp->b_flags |= B_INVAL;
808 goto done;
809 }
810 sz = cs->sc_size - pbn;
811 bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
984263bc 812 }
81b5c339 813 nbio = bio;
984263bc
MD
814 }
815
816 bp->b_resid = bp->b_bcount;
81b5c339 817 nbio->bio_driver_info = dev;
984263bc
MD
818
819 /*
820 * "Start" the unit.
821 */
abe2ad7c 822 crit_enter();
81b5c339 823 ccdstart(cs, nbio);
abe2ad7c 824 crit_exit();
984263bc 825 return;
81b5c339
MD
826
827 /*
828 * note: bio, not nbio, is valid at the done label.
829 */
4414f2c9
MD
830bad:
831 bp->b_error = EINVAL;
832error:
833 bp->b_resid = bp->b_bcount;
834 bp->b_flags |= B_ERROR | B_INVAL;
984263bc 835done:
81b5c339 836 biodone(bio);
984263bc
MD
837}
838
839static void
81b5c339 840ccdstart(struct ccd_softc *cs, struct bio *bio)
984263bc
MD
841{
842 long bcount, rcount;
843 struct ccdbuf *cbp[4];
81b5c339
MD
844 struct buf *bp = bio->bio_buf;
845 dev_t dev = bio->bio_driver_info;
984263bc
MD
846 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
847 caddr_t addr;
54078292 848 off_t doffset;
984263bc
MD
849 struct partition *pp;
850
851#ifdef DEBUG
852 if (ccddebug & CCDB_FOLLOW)
853 printf("ccdstart(%x, %x)\n", cs, bp);
854#endif
855
856 /* Record the transaction start */
857 devstat_start_transaction(&cs->device_stats);
858
859 /*
860 * Translate the partition-relative block number to an absolute.
861 */
54078292 862 doffset = bio->bio_offset;
81b5c339
MD
863 if (ccdpart(dev) != RAW_PART) {
864 pp = &cs->sc_label.d_partitions[ccdpart(dev)];
54078292 865 doffset += pp->p_offset * cs->sc_label.d_secsize;
984263bc
MD
866 }
867
868 /*
869 * Allocate component buffers and fire off the requests
870 */
871 addr = bp->b_data;
872 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
54078292 873 ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
984263bc
MD
874 rcount = cbp[0]->cb_buf.b_bcount;
875
876 if (cs->sc_cflags & CCDF_MIRROR) {
877 /*
878 * Mirroring. Writes go to both disks, reads are
879 * taken from whichever disk seems most appropriate.
880 *
881 * We attempt to localize reads to the disk whos arm
882 * is nearest the read request. We ignore seeks due
883 * to writes when making this determination and we
884 * also try to avoid hogging.
885 */
10f3fee5 886 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
a8f169e2
MD
887 vn_strategy(cbp[0]->cb_vp,
888 &cbp[0]->cb_buf.b_bio1);
889 vn_strategy(cbp[1]->cb_vp,
890 &cbp[1]->cb_buf.b_bio1);
984263bc
MD
891 } else {
892 int pick = cs->sc_pick;
54078292 893 daddr_t range = cs->sc_size / 16 * cs->sc_label.d_secsize;
984263bc 894
54078292
MD
895 if (doffset < cs->sc_blk[pick] - range ||
896 doffset > cs->sc_blk[pick] + range
984263bc
MD
897 ) {
898 cs->sc_pick = pick = 1 - pick;
899 }
54078292 900 cs->sc_blk[pick] = doffset + rcount;
a8f169e2
MD
901 vn_strategy(cbp[pick]->cb_vp,
902 &cbp[pick]->cb_buf.b_bio1);
984263bc
MD
903 }
904 } else {
905 /*
906 * Not mirroring
907 */
a8f169e2 908 vn_strategy(cbp[0]->cb_vp,
81b5c339 909 &cbp[0]->cb_buf.b_bio1);
984263bc 910 }
54078292 911 doffset += rcount;
984263bc
MD
912 addr += rcount;
913 }
914}
915
916/*
917 * Build a component buffer header.
918 */
919static void
54078292
MD
920ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
921 off_t doffset, caddr_t addr, long bcount)
984263bc
MD
922{
923 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
924 struct ccdbuf *cbp;
54078292 925 daddr_t bn, cbn, cboff;
984263bc
MD
926 off_t cbc;
927
928#ifdef DEBUG
929 if (ccddebug & CCDB_IO)
930 printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
931 cs, bp, bn, addr, bcount);
932#endif
933 /*
934 * Determine which component bn falls in.
935 */
54078292 936 bn = (daddr_t)(doffset / cs->sc_geom.ccg_secsize);
984263bc
MD
937 cbn = bn;
938 cboff = 0;
939
940 if (cs->sc_ileave == 0) {
941 /*
942 * Serially concatenated and neither a mirror nor a parity
943 * config. This is a special case.
944 */
945 daddr_t sblk;
946
947 sblk = 0;
948 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
949 sblk += ci->ci_size;
950 cbn -= sblk;
951 } else {
952 struct ccdiinfo *ii;
953 int ccdisk, off;
954
955 /*
956 * Calculate cbn, the logical superblock (sc_ileave chunks),
957 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
958 * to cbn.
959 */
960 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
961 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
962
963 /*
964 * Figure out which interleave table to use.
965 */
966 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
967 if (ii->ii_startblk > cbn)
968 break;
969 }
970 ii--;
971
972 /*
973 * off is the logical superblock relative to the beginning
974 * of this interleave block.
975 */
976 off = cbn - ii->ii_startblk;
977
978 /*
979 * We must calculate which disk component to use (ccdisk),
980 * and recalculate cbn to be the superblock relative to
981 * the beginning of the component. This is typically done by
982 * adding 'off' and ii->ii_startoff together. However, 'off'
983 * must typically be divided by the number of components in
984 * this interleave array to be properly convert it from a
985 * CCD-relative logical superblock number to a
986 * component-relative superblock number.
987 */
988 if (ii->ii_ndisk == 1) {
989 /*
990 * When we have just one disk, it can't be a mirror
991 * or a parity config.
992 */
993 ccdisk = ii->ii_index[0];
994 cbn = ii->ii_startoff + off;
995 } else {
996 if (cs->sc_cflags & CCDF_MIRROR) {
997 /*
998 * We have forced a uniform mapping, resulting
999 * in a single interleave array. We double
1000 * up on the first half of the available
1001 * components and our mirror is in the second
1002 * half. This only works with a single
1003 * interleave array because doubling up
1004 * doubles the number of sectors, so there
1005 * cannot be another interleave array because
1006 * the next interleave array's calculations
1007 * would be off.
1008 */
1009 int ndisk2 = ii->ii_ndisk / 2;
1010 ccdisk = ii->ii_index[off % ndisk2];
1011 cbn = ii->ii_startoff + off / ndisk2;
1012 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1013 } else if (cs->sc_cflags & CCDF_PARITY) {
1014 /*
1015 * XXX not implemented yet
1016 */
1017 int ndisk2 = ii->ii_ndisk - 1;
1018 ccdisk = ii->ii_index[off % ndisk2];
1019 cbn = ii->ii_startoff + off / ndisk2;
1020 if (cbn % ii->ii_ndisk <= ccdisk)
1021 ccdisk++;
1022 } else {
1023 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1024 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1025 }
1026 }
1027
1028 ci = &cs->sc_cinfo[ccdisk];
1029
1030 /*
1031 * Convert cbn from a superblock to a normal block so it
1032 * can be used to calculate (along with cboff) the normal
1033 * block index into this particular disk.
1034 */
1035 cbn *= cs->sc_ileave;
1036 }
1037
1038 /*
1039 * Fill in the component buf structure.
9a71d53f
MD
1040 *
1041 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1042 * will be truncated on device EOF so we use b_bufsize to detect
1043 * the case.
984263bc 1044 */
4483eb0d 1045 cbp = getccdbuf();
c8bcf978 1046 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
4414f2c9 1047 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
984263bc 1048 cbp->cb_buf.b_data = addr;
a8f169e2 1049 cbp->cb_vp = ci->ci_vp;
984263bc
MD
1050 if (cs->sc_ileave == 0)
1051 cbc = dbtob((off_t)(ci->ci_size - cbn));
1052 else
1053 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1054 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1055 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1056
81b5c339
MD
1057 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1058 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
81b5c339
MD
1059 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1060
984263bc
MD
1061 /*
1062 * context for ccdiodone
1063 */
81b5c339 1064 cbp->cb_obio = bio;
984263bc
MD
1065 cbp->cb_unit = cs - ccd_softc;
1066 cbp->cb_comp = ci - cs->sc_cinfo;
1067
1068#ifdef DEBUG
1069 if (ccddebug & CCDB_IO)
54078292 1070 printf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
81b5c339 1071 ci->ci_dev, ci-cs->sc_cinfo, cbp,
54078292 1072 cbp->cb_buf.b_bio1.bio_offset,
984263bc
MD
1073 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1074#endif
1075 cb[0] = cbp;
1076
1077 /*
1078 * Note: both I/O's setup when reading from mirror, but only one
1079 * will be executed.
1080 */
1081 if (cs->sc_cflags & CCDF_MIRROR) {
1082 /* mirror, setup second I/O */
4483eb0d
MD
1083 cbp = getccdbuf();
1084
c8bcf978 1085 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
4414f2c9 1086 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
4483eb0d 1087 cbp->cb_buf.b_data = addr;
a8f169e2 1088 cbp->cb_vp = ci2->ci_vp;
4483eb0d
MD
1089 if (cs->sc_ileave == 0)
1090 cbc = dbtob((off_t)(ci->ci_size - cbn));
1091 else
1092 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1093 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1094 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1095
1096 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1097 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
4483eb0d
MD
1098 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1099
1100 /*
1101 * context for ccdiodone
1102 */
1103 cbp->cb_obio = bio;
1104 cbp->cb_unit = cs - ccd_softc;
984263bc
MD
1105 cbp->cb_comp = ci2 - cs->sc_cinfo;
1106 cb[1] = cbp;
1107 /* link together the ccdbuf's and clear "mirror done" flag */
1108 cb[0]->cb_mirror = cb[1];
1109 cb[1]->cb_mirror = cb[0];
1110 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1111 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1112 }
1113}
1114
1115static void
81b5c339 1116ccdintr(struct ccd_softc *cs, struct bio *bio)
984263bc 1117{
81b5c339
MD
1118 struct buf *bp = bio->bio_buf;
1119
984263bc
MD
1120#ifdef DEBUG
1121 if (ccddebug & CCDB_FOLLOW)
1122 printf("ccdintr(%x, %x)\n", cs, bp);
1123#endif
1124 /*
1125 * Request is done for better or worse, wakeup the top half.
1126 */
1127 if (bp->b_flags & B_ERROR)
1128 bp->b_resid = bp->b_bcount;
1129 devstat_end_transaction_buf(&cs->device_stats, bp);
81b5c339 1130 biodone(bio);
984263bc
MD
1131}
1132
1133/*
1134 * Called at interrupt time.
1135 * Mark the component as done and if all components are done,
1136 * take a ccd interrupt.
1137 */
1138static void
81b5c339 1139ccdiodone(struct bio *bio)
984263bc 1140{
81b5c339
MD
1141 struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1142 struct bio *obio = cbp->cb_obio;
1143 struct buf *obp = obio->bio_buf;
984263bc 1144 int unit = cbp->cb_unit;
abe2ad7c 1145 int count;
984263bc 1146
81b5c339
MD
1147 /*
1148 * Since we do not have exclusive access to underlying devices,
1149 * we can't keep cache translations around.
1150 */
1151 clearbiocache(bio->bio_next);
1152
abe2ad7c 1153 crit_enter();
984263bc
MD
1154#ifdef DEBUG
1155 if (ccddebug & CCDB_FOLLOW)
1156 printf("ccdiodone(%x)\n", cbp);
1157 if (ccddebug & CCDB_IO) {
1158 printf("ccdiodone: bp %x bcount %d resid %d\n",
81b5c339 1159 obp, obp->b_bcount, obp->b_resid);
54078292 1160 printf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
984263bc 1161 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
54078292 1162 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
984263bc
MD
1163 cbp->cb_buf.b_bcount);
1164 }
1165#endif
9a71d53f 1166
984263bc
MD
1167 /*
1168 * If an error occured, report it. If this is a mirrored
1169 * configuration and the first of two possible reads, do not
1170 * set the error in the bp yet because the second read may
1171 * succeed.
1172 */
984263bc
MD
1173 if (cbp->cb_buf.b_flags & B_ERROR) {
1174 const char *msg = "";
1175
1176 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
10f3fee5 1177 (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
984263bc
MD
1178 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1179 /*
1180 * We will try our read on the other disk down
1181 * below, also reverse the default pick so if we
1182 * are doing a scan we do not keep hitting the
1183 * bad disk first.
1184 */
1185 struct ccd_softc *cs = &ccd_softc[unit];
1186
1187 msg = ", trying other disk";
1188 cs->sc_pick = 1 - cs->sc_pick;
54078292 1189 cs->sc_blk[cs->sc_pick] = obio->bio_offset;
984263bc 1190 } else {
81b5c339
MD
1191 obp->b_flags |= B_ERROR;
1192 obp->b_error = cbp->cb_buf.b_error ?
984263bc
MD
1193 cbp->cb_buf.b_error : EIO;
1194 }
54078292 1195 printf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n",
81b5c339 1196 unit, obp->b_error, cbp->cb_comp,
54078292
MD
1197 cbp->cb_buf.b_bio2.bio_offset,
1198 obio->bio_offset, msg);
984263bc
MD
1199 }
1200
1201 /*
1202 * Process mirror. If we are writing, I/O has been initiated on both
1203 * buffers and we fall through only after both are finished.
1204 *
1205 * If we are reading only one I/O is initiated at a time. If an
1206 * error occurs we initiate the second I/O and return, otherwise
1207 * we free the second I/O without initiating it.
1208 */
1209
1210 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
10f3fee5 1211 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
984263bc
MD
1212 /*
1213 * When writing, handshake with the second buffer
1214 * to determine when both are done. If both are not
1215 * done, return here.
1216 */
1217 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1218 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1219 putccdbuf(cbp);
abe2ad7c 1220 crit_exit();
984263bc
MD
1221 return;
1222 }
1223 } else {
1224 /*
1225 * When reading, either dispose of the second buffer
1226 * or initiate I/O on the second buffer if an error
1227 * occured with this one.
1228 */
1229 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1230 if (cbp->cb_buf.b_flags & B_ERROR) {
1231 cbp->cb_mirror->cb_pflags |=
1232 CCDPF_MIRROR_DONE;
81b5c339 1233 vn_strategy(
a8f169e2 1234 cbp->cb_mirror->cb_vp,
81b5c339 1235 &cbp->cb_mirror->cb_buf.b_bio1
984263bc
MD
1236 );
1237 putccdbuf(cbp);
abe2ad7c 1238 crit_exit();
984263bc
MD
1239 return;
1240 } else {
1241 putccdbuf(cbp->cb_mirror);
1242 /* fall through */
1243 }
1244 }
1245 }
1246 }
1247
1248 /*
9a71d53f 1249 * Use our saved b_bufsize to determine if an unexpected EOF occured.
984263bc
MD
1250 */
1251 count = cbp->cb_buf.b_bufsize;
1252 putccdbuf(cbp);
1253
1254 /*
1255 * If all done, "interrupt".
1256 */
81b5c339
MD
1257 obp->b_resid -= count;
1258 if (obp->b_resid < 0)
984263bc 1259 panic("ccdiodone: count");
81b5c339
MD
1260 if (obp->b_resid == 0)
1261 ccdintr(&ccd_softc[unit], obio);
abe2ad7c 1262 crit_exit();
984263bc
MD
1263}
1264
1265static int
41c20dac 1266ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
984263bc
MD
1267{
1268 int unit = ccdunit(dev);
1269 int i, j, lookedup = 0, error = 0;
abe2ad7c 1270 int part, pmask;
984263bc
MD
1271 struct ccd_softc *cs;
1272 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1273 struct ccddevice ccd;
1274 char **cpp;
1275 struct vnode **vpp;
dadab5e9 1276 struct ucred *cred;
41c20dac 1277
dadab5e9
MD
1278 KKASSERT(td->td_proc != NULL);
1279 cred = td->td_proc->p_ucred;
984263bc
MD
1280
1281 if (unit >= numccd)
1282 return (ENXIO);
1283 cs = &ccd_softc[unit];
1284
1285 bzero(&ccd, sizeof(ccd));
1286
1287 switch (cmd) {
1288 case CCDIOCSET:
1289 if (cs->sc_flags & CCDF_INITED)
1290 return (EBUSY);
1291
1292 if ((flag & FWRITE) == 0)
1293 return (EBADF);
1294
1295 if ((error = ccdlock(cs)) != 0)
1296 return (error);
1297
1298 if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1299 return (EINVAL);
1300
1301 /* Fill in some important bits. */
1302 ccd.ccd_unit = unit;
1303 ccd.ccd_interleave = ccio->ccio_ileave;
1304 if (ccd.ccd_interleave == 0 &&
1305 ((ccio->ccio_flags & CCDF_MIRROR) ||
1306 (ccio->ccio_flags & CCDF_PARITY))) {
1307 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1308 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1309 }
1310 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1311 (ccio->ccio_flags & CCDF_PARITY)) {
1312 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1313 ccio->ccio_flags &= ~CCDF_PARITY;
1314 }
1315 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1316 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1317 printf("ccd%d: mirror/parity forces uniform flag\n",
1318 unit);
1319 ccio->ccio_flags |= CCDF_UNIFORM;
1320 }
1321 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1322
1323 /*
1324 * Allocate space for and copy in the array of
1325 * componet pathnames and device numbers.
1326 */
1327 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1328 M_DEVBUF, M_WAITOK);
1329 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1330 M_DEVBUF, M_WAITOK);
1331
1332 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1333 ccio->ccio_ndisks * sizeof(char **));
1334 if (error) {
1335 free(vpp, M_DEVBUF);
1336 free(cpp, M_DEVBUF);
1337 ccdunlock(cs);
1338 return (error);
1339 }
1340
1341#ifdef DEBUG
1342 if (ccddebug & CCDB_INIT)
1343 for (i = 0; i < ccio->ccio_ndisks; ++i)
1344 printf("ccdioctl: component %d: 0x%x\n",
1345 i, cpp[i]);
1346#endif
1347
1348 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1349#ifdef DEBUG
1350 if (ccddebug & CCDB_INIT)
1351 printf("ccdioctl: lookedup = %d\n", lookedup);
1352#endif
dadab5e9 1353 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
984263bc 1354 for (j = 0; j < lookedup; ++j)
87de5057 1355 (void)vn_close(vpp[j], FREAD|FWRITE);
984263bc
MD
1356 free(vpp, M_DEVBUF);
1357 free(cpp, M_DEVBUF);
1358 ccdunlock(cs);
1359 return (error);
1360 }
1361 ++lookedup;
1362 }
1363 ccd.ccd_cpp = cpp;
1364 ccd.ccd_vpp = vpp;
1365 ccd.ccd_ndev = ccio->ccio_ndisks;
1366
1367 /*
1368 * Initialize the ccd. Fills in the softc for us.
1369 */
dadab5e9 1370 if ((error = ccdinit(&ccd, cpp, td)) != 0) {
984263bc 1371 for (j = 0; j < lookedup; ++j)
87de5057 1372 (void)vn_close(vpp[j], FREAD|FWRITE);
984263bc
MD
1373 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1374 free(vpp, M_DEVBUF);
1375 free(cpp, M_DEVBUF);
1376 ccdunlock(cs);
1377 return (error);
1378 }
1379
1380 /*
1381 * The ccd has been successfully initialized, so
1382 * we can place it into the array and read the disklabel.
1383 */
1384 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1385 ccio->ccio_unit = unit;
1386 ccio->ccio_size = cs->sc_size;
1387 ccdgetdisklabel(dev);
1388
1389 ccdunlock(cs);
1390
1391 break;
1392
1393 case CCDIOCCLR:
1394 if ((cs->sc_flags & CCDF_INITED) == 0)
1395 return (ENXIO);
1396
1397 if ((flag & FWRITE) == 0)
1398 return (EBADF);
1399
1400 if ((error = ccdlock(cs)) != 0)
1401 return (error);
1402
1403 /* Don't unconfigure if any other partitions are open */
1404 part = ccdpart(dev);
1405 pmask = (1 << part);
1406 if ((cs->sc_openmask & ~pmask)) {
1407 ccdunlock(cs);
1408 return (EBUSY);
1409 }
1410
1411 /*
1412 * Free ccd_softc information and clear entry.
1413 */
1414
1415 /* Close the components and free their pathnames. */
1416 for (i = 0; i < cs->sc_nccdisks; ++i) {
1417 /*
1418 * XXX: this close could potentially fail and
1419 * cause Bad Things. Maybe we need to force
1420 * the close to happen?
1421 */
1422#ifdef DEBUG
1423 if (ccddebug & CCDB_VNODE)
1424 vprint("CCDIOCCLR: vnode info",
1425 cs->sc_cinfo[i].ci_vp);
1426#endif
87de5057 1427 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE);
984263bc
MD
1428 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1429 }
1430
1431 /* Free interleave index. */
1432 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1433 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1434
1435 /* Free component info and interleave table. */
1436 free(cs->sc_cinfo, M_DEVBUF);
1437 free(cs->sc_itable, M_DEVBUF);
1438 cs->sc_flags &= ~CCDF_INITED;
1439
1440 /*
1441 * Free ccddevice information and clear entry.
1442 */
1443 free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1444 free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1445 ccd.ccd_dk = -1;
1446 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1447
1448 /*
1449 * And remove the devstat entry.
1450 */
1451 devstat_remove_entry(&cs->device_stats);
1452
1453 /* This must be atomic. */
abe2ad7c 1454 crit_enter();
984263bc
MD
1455 ccdunlock(cs);
1456 bzero(cs, sizeof(struct ccd_softc));
abe2ad7c 1457 crit_exit();
984263bc
MD
1458
1459 break;
1460
1461 case DIOCGDINFO:
1462 if ((cs->sc_flags & CCDF_INITED) == 0)
1463 return (ENXIO);
1464
1465 *(struct disklabel *)data = cs->sc_label;
1466 break;
1467
1468 case DIOCGPART:
1469 if ((cs->sc_flags & CCDF_INITED) == 0)
1470 return (ENXIO);
1471
1472 ((struct partinfo *)data)->disklab = &cs->sc_label;
1473 ((struct partinfo *)data)->part =
1474 &cs->sc_label.d_partitions[ccdpart(dev)];
1475 break;
1476
1477 case DIOCWDINFO:
1478 case DIOCSDINFO:
1479 if ((cs->sc_flags & CCDF_INITED) == 0)
1480 return (ENXIO);
1481
1482 if ((flag & FWRITE) == 0)
1483 return (EBADF);
1484
1485 if ((error = ccdlock(cs)) != 0)
1486 return (error);
1487
1488 cs->sc_flags |= CCDF_LABELLING;
1489
1490 error = setdisklabel(&cs->sc_label,
1491 (struct disklabel *)data, 0);
1492 if (error == 0) {
e4c9c0c8
MD
1493 if (cmd == DIOCWDINFO) {
1494 dev_t cdev = CCDLABELDEV(dev);
1495 error = writedisklabel(cdev, &cs->sc_label);
1496 }
984263bc
MD
1497 }
1498
1499 cs->sc_flags &= ~CCDF_LABELLING;
1500
1501 ccdunlock(cs);
1502
1503 if (error)
1504 return (error);
1505 break;
1506
1507 case DIOCWLABEL:
1508 if ((cs->sc_flags & CCDF_INITED) == 0)
1509 return (ENXIO);
1510
1511 if ((flag & FWRITE) == 0)
1512 return (EBADF);
1513 if (*(int *)data != 0)
1514 cs->sc_flags |= CCDF_WLABEL;
1515 else
1516 cs->sc_flags &= ~CCDF_WLABEL;
1517 break;
1518
1519 default:
1520 return (ENOTTY);
1521 }
1522
1523 return (0);
1524}
1525
1526static int
41c20dac 1527ccdsize(dev_t dev)
984263bc
MD
1528{
1529 struct ccd_softc *cs;
1530 int part, size;
1531
41c20dac 1532 if (ccdopen(dev, 0, S_IFCHR, curthread))
984263bc
MD
1533 return (-1);
1534
1535 cs = &ccd_softc[ccdunit(dev)];
1536 part = ccdpart(dev);
1537
1538 if ((cs->sc_flags & CCDF_INITED) == 0)
1539 return (-1);
1540
1541 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1542 size = -1;
1543 else
1544 size = cs->sc_label.d_partitions[part].p_size;
1545
41c20dac 1546 if (ccdclose(dev, 0, S_IFCHR, curthread))
984263bc
MD
1547 return (-1);
1548
1549 return (size);
1550}
1551
1552static int
e4c9c0c8 1553ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize)
984263bc 1554{
984263bc
MD
1555 /* Not implemented. */
1556 return ENXIO;
1557}
1558
1559/*
1560 * Lookup the provided name in the filesystem. If the file exists,
1561 * is a valid block device, and isn't being used by anyone else,
1562 * set *vpp to the file's vnode.
1563 */
1564static int
dadab5e9 1565ccdlookup(char *path, struct thread *td, struct vnode **vpp)
984263bc 1566{
fad57d0e
MD
1567 struct nlookupdata nd;
1568 struct ucred *cred;
984263bc
MD
1569 struct vnode *vp;
1570 int error;
dadab5e9
MD
1571
1572 KKASSERT(td->td_proc);
1573 cred = td->td_proc->p_ucred;
fad57d0e 1574 *vpp = NULL;
984263bc 1575
fad57d0e
MD
1576 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1577 if (error)
1578 return (error);
1579 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
984263bc
MD
1580#ifdef DEBUG
1581 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1582 printf("ccdlookup: vn_open error = %d\n", error);
1583#endif
fad57d0e 1584 goto done;
984263bc 1585 }
fad57d0e 1586 vp = nd.nl_open_vp;
984263bc
MD
1587
1588 if (vp->v_usecount > 1) {
1589 error = EBUSY;
fad57d0e 1590 goto done;
984263bc
MD
1591 }
1592
1593 if (!vn_isdisk(vp, &error))
fad57d0e 1594 goto done;
984263bc
MD
1595
1596#ifdef DEBUG
1597 if (ccddebug & CCDB_VNODE)
1598 vprint("ccdlookup: vnode info", vp);
1599#endif
1600
ca466bae 1601 VOP_UNLOCK(vp, 0);
fad57d0e
MD
1602 nd.nl_open_vp = NULL;
1603 nlookup_done(&nd);
1604 *vpp = vp; /* leave ref intact */
984263bc 1605 return (0);
fad57d0e
MD
1606done:
1607 nlookup_done(&nd);
984263bc
MD
1608 return (error);
1609}
1610
1611/*
1612 * Read the disklabel from the ccd. If one is not present, fake one
1613 * up.
1614 */
1615static void
c436375a 1616ccdgetdisklabel(dev_t dev)
984263bc
MD
1617{
1618 int unit = ccdunit(dev);
1619 struct ccd_softc *cs = &ccd_softc[unit];
1620 char *errstring;
1621 struct disklabel *lp = &cs->sc_label;
1622 struct ccdgeom *ccg = &cs->sc_geom;
e4c9c0c8 1623 dev_t cdev;
984263bc
MD
1624
1625 bzero(lp, sizeof(*lp));
1626
1627 lp->d_secperunit = cs->sc_size;
1628 lp->d_secsize = ccg->ccg_secsize;
1629 lp->d_nsectors = ccg->ccg_nsectors;
1630 lp->d_ntracks = ccg->ccg_ntracks;
1631 lp->d_ncylinders = ccg->ccg_ncylinders;
1632 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1633
1634 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1635 lp->d_type = DTYPE_CCD;
1636 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1637 lp->d_rpm = 3600;
1638 lp->d_interleave = 1;
1639 lp->d_flags = 0;
1640
1641 lp->d_partitions[RAW_PART].p_offset = 0;
1642 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1643 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1644 lp->d_npartitions = RAW_PART + 1;
1645
1646 lp->d_bbsize = BBSIZE; /* XXX */
1647 lp->d_sbsize = SBSIZE; /* XXX */
1648
1649 lp->d_magic = DISKMAGIC;
1650 lp->d_magic2 = DISKMAGIC;
1651 lp->d_checksum = dkcksum(&cs->sc_label);
1652
1653 /*
1654 * Call the generic disklabel extraction routine.
1655 */
e4c9c0c8
MD
1656 cdev = CCDLABELDEV(dev);
1657 errstring = readdisklabel(cdev, &cs->sc_label);
984263bc
MD
1658 if (errstring != NULL)
1659 ccdmakedisklabel(cs);
1660
1661#ifdef DEBUG
1662 /* It's actually extremely common to have unlabeled ccds. */
1663 if (ccddebug & CCDB_LABEL)
1664 if (errstring != NULL)
1665 printf("ccd%d: %s\n", unit, errstring);
1666#endif
1667}
1668
1669/*
1670 * Take care of things one might want to take care of in the event
1671 * that a disklabel isn't present.
1672 */
1673static void
c436375a 1674ccdmakedisklabel(struct ccd_softc *cs)
984263bc
MD
1675{
1676 struct disklabel *lp = &cs->sc_label;
1677
1678 /*
1679 * For historical reasons, if there's no disklabel present
1680 * the raw partition must be marked FS_BSDFFS.
1681 */
1682 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1683
1684 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1685}
1686
1687/*
1688 * Wait interruptibly for an exclusive lock.
1689 *
1690 * XXX
1691 * Several drivers do this; it should be abstracted and made MP-safe.
1692 */
1693static int
c436375a 1694ccdlock(struct ccd_softc *cs)
984263bc
MD
1695{
1696 int error;
1697
1698 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1699 cs->sc_flags |= CCDF_WANTED;
377d4740 1700 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
984263bc
MD
1701 return (error);
1702 }
1703 cs->sc_flags |= CCDF_LOCKED;
1704 return (0);
1705}
1706
1707/*
1708 * Unlock and wake up any waiters.
1709 */
1710static void
c436375a 1711ccdunlock(struct ccd_softc *cs)
984263bc
MD
1712{
1713
1714 cs->sc_flags &= ~CCDF_LOCKED;
1715 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1716 cs->sc_flags &= ~CCDF_WANTED;
1717 wakeup(cs);
1718 }
1719}
1720
1721#ifdef DEBUG
1722static void
c436375a 1723printiinfo(struct ccdiinfo *ii)
984263bc
MD
1724{
1725 int ix, i;
1726
1727 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1728 printf(" itab[%d]: #dk %d sblk %d soff %d",
1729 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1730 for (i = 0; i < ii->ii_ndisk; i++)
1731 printf(" %d", ii->ii_index[i]);
1732 printf("\n");
1733 }
1734}
1735#endif
1736
1737\f
1738/* Local Variables: */
1739/* c-argdecl-indent: 8 */
1740/* c-continued-statement-offset: 8 */
1741/* c-indent-level: 8 */
1742/* End: */