4 * Don't store drive configuration on the config DB: read each drive's header
5 * to decide where it is.
7 * Accept any old crap in the config_<foo> functions, and complain when
8 * we try to bring it up.
10 * When trying to bring volumes up, check that the complete address range
14 * Copyright (c) 1997, 1998
15 * Nan Yang Computer Services Limited. All rights reserved.
17 * This software is distributed under the so-called ``Berkeley
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 * must display the following acknowledgement:
30 * This product includes software developed by Nan Yang Computer
32 * 4. Neither the name of the Company nor the names of its contributors
33 * may be used to endorse or promote products derived from this software
34 * without specific prior written permission.
36 * This software is provided ``as is'', and any express or implied
37 * warranties, including, but not limited to, the implied warranties of
38 * merchantability and fitness for a particular purpose are disclaimed.
39 * In no event shall the company or contributors be liable for any
40 * direct, indirect, incidental, special, exemplary, or consequential
41 * damages (including, but not limited to, procurement of substitute
42 * goods or services; loss of use, data, or profits; or business
43 * interruption) however caused and on any theory of liability, whether
44 * in contract, strict liability, or tort (including negligence or
45 * otherwise) arising in any way out of the use of this software, even if
46 * advised of the possibility of such damage.
48 * $Id: vinumconfig.c,v 1.30 2000/05/01 09:45:50 grog Exp grog $
49 * $FreeBSD: src/sys/dev/vinum/vinumconfig.c,v 1.32.2.6 2002/02/03 00:43:35 grog Exp $
50 * $DragonFly: src/sys/dev/raid/vinum/vinumconfig.c,v 1.12 2007/06/07 22:58:00 corecode Exp $
59 #define MAXTOKEN 64 /* maximum number of tokens in a line */
62 * We can afford the luxury of global variables here,
63 * since start_config ensures that these functions
64 * are single-threaded.
67 /* These are indices in vinum_conf of the last-mentioned of each kind of object */
68 static int current_drive; /* note the last drive we mention, for
70 static int current_plex; /* and the same for the last plex */
71 static int current_volume; /* and the last volme */
72 static struct _ioctl_reply *ioctl_reply; /* struct to return via ioctl */
74 static void made_sd(struct sd *sd);
75 static void made_vol(struct volume *vol);
76 static void made_plex(struct plex *plex);
78 /* These values are used by most of these routines, so set them as globals */
79 static char *token[MAXTOKEN]; /* pointers to individual tokens */
80 static int tokens; /* number of tokens */
91 #define MSG_MAX 1024 /* maximum length of a formatted message */
93 * Format an error message and return to the user in the reply.
94 * CARE: This routine is designed to be called only from the
95 * configuration routines, so it assumes it's the owner of
96 * the configuration lock, and unlocks it on exit
99 throw_rude_remark(int error, char *msg,...)
103 static int finishing; /* don't recurse */
106 if ((vinum_conf.flags & VF_LOCKED) == 0) /* bug catcher */
107 panic ("throw_rude_remark: called without config lock");
109 if ((ioctl_reply != NULL) /* we're called from the user */
110 &&(!(vinum_conf.flags & VF_READING_CONFIG))) { /* and not reading from disk: return msg */
112 * We can't just format to ioctl_reply, since it
113 * may contain our input parameters
115 kvasnrprintf(&text, MSG_MAX, 10, msg, ap);
116 strcpy(ioctl_reply->msg, text);
117 ioctl_reply->error = error; /* first byte is the error number */
121 kvprintf(msg, ap); /* print to the console */
126 if (vinum_conf.flags & VF_READING_CONFIG) { /* go through to the bitter end, */
127 if ((vinum_conf.flags & VF_READING_CONFIG) /* we're reading from disk, */
128 &&((daemon_options & daemon_noupdate) == 0)) {
129 log(LOG_NOTICE, "Disabling configuration updates\n");
130 daemon_options |= daemon_noupdate;
135 * We have a problem here: we want to unlock the
136 * configuration, which implies tidying up, but
137 * if we find an error while tidying up, we could
138 * recurse for ever. Use this kludge to only try
141 was_finishing = finishing;
143 finish_config(was_finishing); /* unlock anything we may be holding */
144 finishing = was_finishing;
145 longjmp(command_fail, error);
149 * Check a volume to see if the plex is already assigned to it.
150 * Return index in volume->plex, or -1 if not assigned
153 my_plex(int volno, int plexno)
158 vol = &VOL[volno]; /* point to volno */
159 for (i = 0; i < vol->plexes; i++)
160 if (vol->plex[i] == plexno)
162 return -1; /* not found */
166 * Check a plex to see if the subdisk is already assigned to it.
167 * Return index in plex->sd, or -1 if not assigned
170 my_sd(int plexno, int sdno)
175 plex = &PLEX[plexno];
176 for (i = 0; i < plex->subdisks; i++)
177 if (plex->sdnos[i] == sdno)
179 return -1; /* not found */
182 /* Add plex to the volume if possible */
184 give_plex_to_volume(int volno, int plexno)
190 * It's not an error for the plex to already
191 * belong to the volume, but we need to check a
192 * number of things to make sure it's done right.
195 if (my_plex(volno, plexno) >= 0)
196 return plexno; /* that's it */
198 vol = &VOL[volno]; /* point to volume */
199 if (vol->plexes == MAXPLEX) /* all plexes allocated */
200 throw_rude_remark(ENOSPC,
201 "Too many plexes for volume %s",
203 else if ((vol->plexes > 0) /* we have other plexes */
204 &&((vol->flags & VF_CONFIG_SETUPSTATE) == 0)) /* and we're not setting up state */
205 invalidate_subdisks(&PLEX[plexno], sd_stale); /* make the subdisks invalid */
206 vol->plex[vol->plexes] = plexno; /* this one */
207 vol->plexes++; /* add another plex */
208 PLEX[plexno].volno = volno; /* note the number of our volume */
210 /* Find out how big our volume is */
211 for (i = 0; i < vol->plexes; i++)
212 vol->size = u64max(vol->size, PLEX[vol->plex[i]].length);
213 return vol->plexes - 1; /* and return its index */
217 * Add subdisk to a plex if possible
220 give_sd_to_plex(int plexno, int sdno)
227 * It's not an error for the sd to already
228 * belong to the plex, but we need to check a
229 * number of things to make sure it's done right.
232 i = my_sd(plexno, sdno);
233 if (i >= 0) /* does it already belong to us? */
234 return i; /* that's it */
236 plex = &PLEX[plexno]; /* point to the plex */
237 sd = &SD[sdno]; /* and the subdisk */
239 /* Do we have an offset? Otherwise put it after the last one */
240 if (sd->plexoffset < 0) { /* no offset specified */
241 if (plex->subdisks > 0) {
242 struct sd *lastsd = &SD[plex->sdnos[plex->subdisks - 1]]; /* last subdisk */
244 if (plex->organization == plex_concat) /* concat, */
245 sd->plexoffset = lastsd->sectors + lastsd->plexoffset; /* starts here */
246 else /* striped, RAID-4 or RAID-5 */
247 sd->plexoffset = plex->stripesize * plex->subdisks; /* starts here */
248 } else /* first subdisk */
249 sd->plexoffset = 0; /* start at the beginning */
251 if (plex->subdisks == MAXSD) /* we already have our maximum */
252 throw_rude_remark(ENOSPC, /* crap out */
253 "Can't add %s to %s: plex full",
257 plex->subdisks++; /* another entry */
258 if (plex->subdisks >= plex->subdisks_allocated) /* need more space */
259 EXPAND(plex->sdnos, int, plex->subdisks_allocated, INITIAL_SUBDISKS_IN_PLEX);
261 /* Adjust size of plex and volume. */
262 if (isparity(plex)) /* RAID-4 or RAID-5 */
263 plex->length = (plex->subdisks - 1) * sd->sectors; /* size is one disk short */
265 plex->length += sd->sectors; /* plex gets this much bigger */
266 if (plex->volno >= 0) /* we have a volume */
267 VOL[plex->volno].size = u64max(VOL[plex->volno].size, plex->length); /* adjust its size */
270 * We need to check that the subdisks don't overlap,
271 * but we can't do that until a point where we *must*
272 * know the size of all the subdisks. That's not
273 * here. But we need to sort them by offset
275 for (i = 0; i < plex->subdisks - 1; i++) {
276 if (sd->plexoffset < SD[plex->sdnos[i]].plexoffset) { /* it fits before this one */
277 /* First move any remaining subdisks by one */
280 for (j = plex->subdisks - 1; j > i; j--) /* move up one at a time */
281 plex->sdnos[j] = plex->sdnos[j - 1];
282 plex->sdnos[i] = sdno;
283 sd->plexsdno = i; /* note where we are in the subdisk */
289 * The plex doesn't have any subdisk with a
290 * larger offset. Insert it here.
292 plex->sdnos[i] = sdno;
293 sd->plexsdno = i; /* note where we are in the subdisk */
294 sd->plexno = plex->plexno; /* and who we belong to */
299 * Add a subdisk to drive if possible. The
300 * pointer to the drive must already be stored in
301 * the sd structure, but the drive doesn't know
302 * about the subdisk yet.
305 give_sd_to_drive(int sdno)
307 struct sd *sd; /* pointer to subdisk */
308 struct drive *drive; /* and drive */
309 int fe; /* index in free list */
310 int sfe; /* and index of subdisk when assigning max */
312 sd = &SD[sdno]; /* point to sd */
313 drive = &DRIVE[sd->driveno]; /* and drive */
315 if (drive->state != drive_up) {
316 update_sd_state(sdno); /* that crashes the subdisk */
319 if (drive->flags & VF_HOTSPARE) /* the drive is a hot spare, */
320 throw_rude_remark(ENOSPC,
321 "Can't place %s on hot spare drive %s",
324 if ((drive->sectors_available == 0) /* no space left */
325 ||(sd->sectors > drive->sectors_available)) { /* or too big, */
326 sd->driveoffset = -1; /* don't be confusing */
328 throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name);
329 return; /* in case we come back here */
331 drive->subdisks_used++; /* one more subdisk */
333 if (sd->sectors == 0) { /* take the largest chunk */
334 sfe = 0; /* to keep the compiler happy */
335 for (fe = 0; fe < drive->freelist_entries; fe++) {
336 if (drive->freelist[fe].sectors >= sd->sectors) { /* more space here */
337 sd->sectors = drive->freelist[fe].sectors; /* take it */
338 sd->driveoffset = drive->freelist[fe].offset;
339 sfe = fe; /* and note the index for later */
342 if (sd->sectors == 0) { /* no luck, */
343 sd->driveoffset = -1; /* don't be confusing */
345 throw_rude_remark(ENOSPC, /* give up */
346 "No space for %s on %s",
350 if (sfe < (drive->freelist_entries - 1)) /* not the last one, */
351 bcopy(&drive->freelist[sfe + 1],
352 &drive->freelist[sfe],
353 (drive->freelist_entries - sfe) * sizeof(struct drive_freelist));
354 drive->freelist_entries--; /* one less entry */
355 drive->sectors_available -= sd->sectors; /* and note how much less space we have */
356 } else if (sd->driveoffset < 0) { /* no offset specified, find one */
357 for (fe = 0; fe < drive->freelist_entries; fe++) {
358 if (drive->freelist[fe].sectors >= sd->sectors) { /* it'll fit here */
359 sd->driveoffset = drive->freelist[fe].offset;
360 if (sd->sectors == drive->freelist[fe].sectors) { /* used up the entire entry */
361 if (fe < (drive->freelist_entries - 1)) /* not the last one, */
362 bcopy(&drive->freelist[fe + 1],
363 &drive->freelist[fe],
364 (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
365 drive->freelist_entries--; /* one less entry */
367 drive->freelist[fe].sectors -= sd->sectors; /* this much less space */
368 drive->freelist[fe].offset += sd->sectors; /* this much further on */
370 drive->sectors_available -= sd->sectors; /* and note how much less space we have */
374 if (sd->driveoffset < 0)
376 * Didn't find anything. Although the drive has
377 * enough space, it's too fragmented
381 throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name);
383 } else { /* specific offset */
385 * For a specific offset to work, the space must be
386 * entirely in a single freelist entry. Look for it.
388 u_int64_t sdend = sd->driveoffset + sd->sectors; /* end of our subdisk */
389 for (fe = 0; fe < drive->freelist_entries; fe++) {
390 u_int64_t dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of entry */
391 if (dend >= sdend) { /* fits before here */
392 if (drive->freelist[fe].offset > sd->driveoffset) { /* starts after the beginning of sd area */
393 sd->driveoffset = -1; /* don't be confusing */
394 set_sd_state(sd->sdno, sd_down, setstate_force);
395 throw_rude_remark(ENOSPC,
396 "No space for %s on drive %s at offset %jd",
399 (intmax_t)sd->driveoffset);
403 * We've found the space, and we can allocate it.
404 * We don't need to say that to the subdisk, which
405 * already knows about it. We need to tell it to
406 * the free list, though. We have four possibilities:
408 * 1. The subdisk exactly eats up the entry. That's the
410 * 2. The subdisk starts at the beginning and leaves space
412 * 3. The subdisk starts after the beginning and leaves
413 * space at the end as well: we end up with another
415 * 4. The subdisk leaves space at the beginning and finishes
418 drive->sectors_available -= sd->sectors; /* note how much less space we have */
419 if (sd->driveoffset == drive->freelist[fe].offset) { /* 1 or 2 */
420 if (sd->sectors == drive->freelist[fe].sectors) { /* 1: used up the entire entry */
421 if (fe < (drive->freelist_entries - 1)) /* not the last one, */
422 bcopy(&drive->freelist[fe + 1],
423 &drive->freelist[fe],
424 (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
425 drive->freelist_entries--; /* one less entry */
426 } else { /* 2: space at the end */
427 drive->freelist[fe].sectors -= sd->sectors; /* this much less space */
428 drive->freelist[fe].offset += sd->sectors; /* this much further on */
430 } else { /* 3 or 4 */
431 drive->freelist[fe].sectors = sd->driveoffset - drive->freelist[fe].offset;
432 if (dend > sdend) { /* 3: space at the end as well */
433 if (fe < (drive->freelist_entries - 1)) /* not the last one */
434 bcopy(&drive->freelist[fe], /* move the rest down */
435 &drive->freelist[fe + 1],
436 (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
437 drive->freelist_entries++; /* one less entry */
438 drive->freelist[fe + 1].offset = sdend; /* second entry starts after sd */
439 drive->freelist[fe + 1].sectors = dend - sdend; /* and is this long */
446 drive->opencount++; /* one more subdisk attached */
449 /* Get an empty drive entry from the drive table */
451 get_empty_drive(void)
456 /* first see if we have one which has been deallocated */
457 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
458 if (DRIVE[driveno].state == drive_unallocated) /* bingo */
462 if (driveno >= vinum_conf.drives_allocated) /* we've used all our allocation */
463 EXPAND(DRIVE, struct drive, vinum_conf.drives_allocated, INITIAL_DRIVES);
465 /* got a drive entry. Make it pretty */
466 drive = &DRIVE[driveno];
467 bzero(drive, sizeof(struct drive));
468 drive->driveno = driveno; /* put number in structure */
469 drive->flags |= VF_NEWBORN; /* newly born drive */
470 strcpy(drive->devicename, "unknown"); /* and make the name ``unknown'' */
471 return driveno; /* return the index */
475 * Find the named drive in vinum_conf.drive, return a pointer
476 * return the index in vinum_conf.drive.
477 * Don't mark the drive as allocated (XXX SMP)
478 * If create != 0, create an entry if it doesn't exist
480 /* XXX check if we have it open from attach */
482 find_drive(const char *name, int create)
488 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
489 drive = &DRIVE[driveno]; /* point to drive */
490 if ((drive->label.name[0] != '\0') /* it has a name */
491 &&(strcmp(drive->label.name, name) == 0) /* and it's this one */
492 &&(drive->state > drive_unallocated)) /* and it's a real one: found */
496 /* the drive isn't in the list. Add it if he wants */
497 if (create == 0) /* don't want to create */
498 return -1; /* give up */
500 driveno = get_empty_drive();
501 drive = &DRIVE[driveno];
503 ksnprintf(drive->label.name, sizeof(drive->label.name), "%s", name);
504 drive->state = drive_referenced; /* in use, nothing worthwhile there */
505 return driveno; /* return the index */
509 * Find a drive given its device name.
510 * devname must be valid.
511 * Otherwise the same as find_drive above
514 find_drive_by_dev(const char *devname, int create)
519 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
520 drive = &DRIVE[driveno];
521 if (strcmp(drive->devicename, devname) == 0 &&
522 drive->state > drive_unallocated
531 driveno = get_empty_drive();
532 drive = &DRIVE[driveno];
533 ksnprintf(drive->devicename, sizeof(drive->devicename), "%s", devname);
534 /* in use, nothing worthwhile there */
535 drive->state = drive_referenced;
539 /* Find an empty subdisk in the subdisk table */
546 /* first see if we have one which has been deallocated */
547 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
548 if (SD[sdno].state == sd_unallocated) /* bingo */
551 if (sdno >= vinum_conf.subdisks_allocated)
553 * We've run out of space. sdno is pointing
554 * where we want it, but at the moment we
555 * don't have the space. Get it.
557 EXPAND(SD, struct sd, vinum_conf.subdisks_allocated, INITIAL_SUBDISKS);
559 /* initialize some things */
560 sd = &SD[sdno]; /* point to it */
561 bzero(sd, sizeof(struct sd)); /* initialize */
562 sd->flags |= VF_NEWBORN; /* newly born subdisk */
563 sd->plexno = -1; /* no plex */
564 sd->sectors = -1; /* no space */
565 sd->driveno = -1; /* no drive */
566 sd->plexoffset = -1; /* and no offsets */
567 sd->driveoffset = -1;
568 return sdno; /* return the index */
571 /* return a drive to the free pool */
573 free_drive(struct drive *drive)
575 if ((drive->state > drive_referenced) /* real drive */
576 ||(drive->flags & VF_OPEN)) { /* how can it be open without a state? */
578 if (drive->flags & VF_OPEN) { /* it's open, */
579 close_locked_drive(drive); /* close it */
580 drive->state = drive_down; /* and note the fact */
583 Free(drive->freelist);
584 bzero(drive, sizeof(struct drive)); /* this also sets drive_unallocated */
590 * Find the named subdisk in vinum_conf.sd.
592 * If create != 0, create an entry if it doesn't exist
594 * Return index in vinum_conf.sd
597 find_subdisk(const char *name, int create)
602 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
603 if (strcmp(SD[sdno].name, name) == 0) /* found it */
607 /* the subdisk isn't in the list. Add it if he wants */
608 if (create == 0) /* don't want to create */
609 return -1; /* give up */
611 /* Allocate one and insert the name */
612 sdno = get_empty_sd();
614 ksnprintf(sd->name, sizeof(sd->name), "%s", name);
615 return sdno; /* return the pointer */
618 /* Return space to a drive */
620 return_drive_space(int driveno, int64_t offset, int length)
623 int fe; /* free list entry */
624 u_int64_t sdend; /* end of our subdisk */
625 u_int64_t dend; /* end of our freelist entry */
627 drive = &DRIVE[driveno];
628 if (drive->state == drive_up) {
629 sdend = offset + length; /* end of our subdisk */
631 /* Look for where to return the sd address space */
633 (fe < drive->freelist_entries) && (drive->freelist[fe].offset < offset);
636 * Now we are pointing to the last entry, the first
637 * with a higher offset than the subdisk, or both.
639 if ((fe > 1) /* not the first entry */
640 &&((fe == drive->freelist_entries) /* gone past the end */
641 ||(drive->freelist[fe].offset > offset))) /* or past the block were looking for */
642 fe--; /* point to the block before */
643 dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of the entry */
646 * At this point, we are pointing to the correct
647 * place in the free list. A number of possibilities
650 * 1. The block to be freed starts at the end of the
651 * block to which we are pointing. This has two
654 * a. The block to be freed ends at the beginning
655 * of the following block. Merge the three
656 * areas into a single block.
658 * b. The block is shorter than the space between
659 * the current block and the next one. Enlarge
662 * 2. The block to be freed starts after the end
663 * of the block. Again, we have two cases:
665 * a. It ends before the start of the following block.
666 * Create a new free block.
668 * b. It ends at the start of the following block.
669 * Enlarge the following block downwards.
671 * When there is only one free space block, and the
672 * space to be returned is before it, the pointer is
673 * to a non-existent zeroth block. XXX check this
675 if (offset == dend) { /* Case 1: it starts at the end of this block */
676 if ((fe < drive->freelist_entries - 1) /* we're not the last block in the free list */
677 /* and the subdisk ends at the start of the next block */
678 &&(sdend == drive->freelist[fe + 1].offset)) {
679 drive->freelist[fe].sectors /* 1a: merge all three blocks */
680 = drive->freelist[fe + 1].sectors;
681 if (fe < drive->freelist_entries - 2) /* still more blocks after next */
682 bcopy(&drive->freelist[fe + 2], /* move down one */
683 &drive->freelist[fe + 1],
684 (drive->freelist_entries - 2 - fe)
685 * sizeof(struct drive_freelist));
686 drive->freelist_entries--; /* one less entry in the free list */
687 } else /* 1b: just enlarge this block */
688 drive->freelist[fe].sectors += length;
689 } else { /* Case 2 */
690 if (offset > dend) /* it starts after this block */
691 fe++; /* so look at the next block */
692 if ((fe < drive->freelist_entries) /* we're not the last block in the free list */
693 /* and the subdisk ends at the start of this block: case 4 */
694 &&(sdend == drive->freelist[fe].offset)) {
695 drive->freelist[fe].offset = offset; /* it starts where the sd was */
696 drive->freelist[fe].sectors += length; /* and it's this much bigger */
697 } else { /* case 3: non-contiguous */
698 if (fe < drive->freelist_entries) /* not after the last block, */
699 bcopy(&drive->freelist[fe], /* move the rest up one entry */
700 &drive->freelist[fe + 1],
701 (drive->freelist_entries - fe)
702 * sizeof(struct drive_freelist));
703 drive->freelist_entries++; /* one less entry */
704 drive->freelist[fe].offset = offset; /* this entry represents the sd */
705 drive->freelist[fe].sectors = length;
708 drive->sectors_available += length; /* the sectors are now available */
713 * Free an allocated sd entry.
714 * This performs memory management only. remove()
715 * is responsible for checking relationships.
723 if ((sd->driveno >= 0) /* we have a drive, */
724 &&(sd->sectors > 0)) /* and some space on it */
725 return_drive_space(sd->driveno, /* return the space */
729 PLEX[sd->plexno].subdisks--; /* one less subdisk */
730 sd->state = sd_unallocated;
732 bzero(sd, sizeof(struct sd)); /* and clear it out */
733 sd->state = sd_unallocated;
734 vinum_conf.subdisks_used--; /* one less sd */
738 made_sd(struct sd *sd)
740 if (sd->sd_dev == NULL && sd->state != sd_unallocated) {
741 sd->sd_dev = make_dev(&vinum_ops, VINUM_SD(sd->sdno),
742 UID_ROOT, GID_OPERATOR, 0640,
743 VINUM_BASE "sd/%s", sd->name);
744 udev_dict_set_cstr(sd->sd_dev, "subsystem", "raid");
745 udev_dict_set_cstr(sd->sd_dev, "disk-type", "raid");
747 if (sd->plexno >= 0 && PLEX[sd->plexno].volno >= 0) {
748 make_dev_alias(sd->sd_dev, "vol/%s.plex/%s",
749 VOL[PLEX[sd->plexno].volno].name,
750 plex->name, VOL[plex->volno].name);
754 if (sd->sd_dev && sd->state == sd_unallocated) {
755 destroy_dev(sd->sd_dev);
761 made_vol(struct volume *vol)
763 if (vol->vol_dev == NULL && vol->state != volume_unallocated) {
764 vol->vol_dev = make_dev(&vinum_ops,
765 VINUMDEV(vol->volno, 0, 0, VINUM_VOLUME_TYPE),
766 UID_ROOT, GID_OPERATOR, 0640,
767 VINUM_BASE "vol/%s", vol->name);
768 udev_dict_set_cstr(vol->vol_dev, "subsystem", "raid");
769 udev_dict_set_cstr(vol->vol_dev, "disk-type", "raid");
771 if (vol->vol_dev && vol->state == volume_unallocated) {
772 destroy_dev(vol->vol_dev);
778 made_plex(struct plex *plex)
780 if (plex->plex_dev == NULL && plex->state != plex_unallocated) {
781 plex->plex_dev = make_dev(&vinum_ops, VINUM_PLEX(plex->plexno),
782 UID_ROOT, GID_OPERATOR, 0640,
783 VINUM_BASE "plex/%s", plex->name);
784 udev_dict_set_cstr(plex->plex_dev, "subsystem", "raid");
785 udev_dict_set_cstr(plex->plex_dev, "disk-type", "raid");
786 if (plex->volno >= 0) {
787 make_dev_alias(plex->plex_dev, "vol/%s.plex/%s",
788 plex->name, VOL[plex->volno].name);
791 if (plex->plex_dev && plex->state == plex_unallocated) {
792 destroy_dev(plex->plex_dev);
793 plex->plex_dev = NULL;
797 /* Find an empty plex in the plex table */
802 struct plex *plex; /* if we allocate one */
804 /* first see if we have one which has been deallocated */
805 for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) {
806 if (PLEX[plexno].state == plex_unallocated) /* bingo */
807 break; /* and get out of here */
810 if (plexno >= vinum_conf.plexes_allocated)
811 EXPAND(PLEX, struct plex, vinum_conf.plexes_allocated, INITIAL_PLEXES);
813 /* Found a plex. Give it an sd structure */
814 plex = &PLEX[plexno]; /* this one is ours */
815 bzero(plex, sizeof(struct plex)); /* polish it up */
816 plex->sdnos = (int *) Malloc(sizeof(int) * INITIAL_SUBDISKS_IN_PLEX); /* allocate sd table */
817 CHECKALLOC(plex->sdnos, "vinum: Can't allocate plex subdisk table");
818 bzero(plex->sdnos, (sizeof(int) * INITIAL_SUBDISKS_IN_PLEX)); /* do we need this? */
819 plex->flags |= VF_NEWBORN; /* newly born plex */
820 plex->subdisks = 0; /* no subdisks in use */
821 plex->subdisks_allocated = INITIAL_SUBDISKS_IN_PLEX; /* and we have space for this many */
822 plex->organization = plex_disorg; /* and it's not organized */
823 plex->volno = -1; /* no volume yet */
824 return plexno; /* return the index */
828 * Find the named plex in vinum_conf.plex
830 * If create != 0, create an entry if it doesn't exist
831 * return index in vinum_conf.plex
834 find_plex(const char *name, int create)
839 for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) {
840 if (strcmp(PLEX[plexno].name, name) == 0) /* found it */
844 /* the plex isn't in the list. Add it if he wants */
845 if (create == 0) /* don't want to create */
846 return -1; /* give up */
848 /* Allocate one and insert the name */
849 plexno = get_empty_plex();
850 plex = &PLEX[plexno]; /* point to it */
851 ksnprintf(plex->name, sizeof(plex->name), "%s", name);
852 return plexno; /* return the pointer */
856 * Free an allocated plex entry
857 * and its associated memory areas
860 free_plex(int plexno)
864 plex = &PLEX[plexno];
869 plex->state = plex_unallocated;
871 bzero(plex, sizeof(struct plex)); /* and clear it out */
872 plex->state = plex_unallocated;
875 /* Find an empty volume in the volume table */
877 get_empty_volume(void)
883 /* first see if we have one which has been deallocated */
884 for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
885 if (VOL[volno].state == volume_unallocated) /* bingo */
889 if (volno >= vinum_conf.volumes_allocated)
890 EXPAND(VOL, struct volume, vinum_conf.volumes_allocated, INITIAL_VOLUMES);
892 /* Now initialize fields */
894 bzero(vol, sizeof(struct volume));
895 vol->flags |= VF_NEWBORN | VF_CREATED; /* newly born volume */
896 vol->preferred_plex = ROUND_ROBIN_READPOL; /* round robin */
897 for (i = 0; i < MAXPLEX; i++) /* mark the plexes missing */
899 return volno; /* return the index */
903 * Find the named volume in vinum_conf.volume.
905 * If create != 0, create an entry if it doesn't exist
906 * return the index in vinum_conf
909 find_volume(const char *name, int create)
914 for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
915 if (strcmp(VOL[volno].name, name) == 0) /* found it */
919 /* the volume isn't in the list. Add it if he wants */
920 if (create == 0) /* don't want to create */
921 return -1; /* give up */
923 /* Allocate one and insert the name */
924 volno = get_empty_volume();
926 ksnprintf(vol->name, sizeof(vol->name), "%s", name);
927 vol->blocksize = DEV_BSIZE; /* block size of this volume */
928 return volno; /* return the pointer */
932 * Free an allocated volume entry
933 * and its associated memory areas
936 free_volume(int volno)
941 vol->state = volume_unallocated;
943 bzero(vol, sizeof(struct volume)); /* and clear it out */
944 vol->state = volume_unallocated;
948 * Handle a drive definition. We store the information in the global variable
949 * drive, so we don't need to allocate.
951 * If we find an error, print a message and return
954 config_drive(int update)
956 enum drive_label_info partition_status; /* info about the partition */
958 int driveno; /* index of drive in vinum_conf */
959 struct drive *drive; /* and pointer to it */
960 int otherdriveno; /* index of possible second drive */
963 if (tokens < 2) /* not enough tokens */
964 throw_rude_remark(EINVAL, "Drive has no name\n");
965 driveno = find_drive(token[1], 1); /* allocate a drive to initialize */
966 drive = &DRIVE[driveno]; /* and get a pointer */
967 if (update && ((drive->flags & VF_NEWBORN) == 0)) /* this drive exists already */
968 return; /* don't do anything */
969 drive->flags &= ~VF_NEWBORN; /* no longer newly born */
971 if (drive->state != drive_referenced) { /* we already know this drive */
973 * XXX Check which definition is more up-to-date. Give
974 * preference for the definition on its own drive.
978 for (parameter = 2; parameter < tokens; parameter++) { /* look at the other tokens */
979 switch (get_keyword(token[parameter], &keyword_set)) {
982 otherdriveno = find_drive_by_dev(token[parameter], 0); /* see if it exists already */
983 if (otherdriveno >= 0) { /* yup, */
984 drive->state = drive_unallocated; /* deallocate the drive */
985 throw_rude_remark(EEXIST, /* and complain */
986 "Drive %s would have same device as drive %s",
988 DRIVE[otherdriveno].label.name);
990 if (drive->devicename[0] == '/') { /* we know this drive... */
991 if (strcmp(drive->devicename, token[parameter])) /* different name */
992 close_drive(drive); /* close it if it's open */
998 * open the device and get the configuration
1000 ksnprintf(drive->devicename, sizeof(drive->devicename),
1001 "%s", token[parameter]);
1002 partition_status = read_drive_label(drive, 1);
1004 switch (partition_status) {
1005 case DL_CANT_OPEN: /* not our kind */
1007 if (drive->lasterror == EFTYPE) /* wrong kind of partition */
1008 throw_rude_remark(drive->lasterror,
1009 "Drive %s has invalid partition type",
1011 else /* I/O error of some kind */
1012 throw_rude_remark(drive->lasterror,
1013 "Can't initialize drive %s",
1017 case DL_WRONG_DRIVE: /* valid drive, not the name we expected */
1018 if (vinum_conf.flags & VF_FORCECONFIG) { /* but we'll accept that */
1019 bcopy(token[1], drive->label.name, sizeof(drive->label.name));
1024 * There's a potential race condition here:
1025 * the rude remark refers to a field in an
1026 * unallocated drive, which potentially could
1027 * be reused. This works because we're the only
1028 * thread accessing the config at the moment.
1030 drive->state = drive_unallocated; /* throw it away completely */
1031 throw_rude_remark(drive->lasterror,
1032 "Incorrect drive name %s specified for drive %s",
1037 case DL_DELETED_LABEL: /* it was a drive, but we deleted it */
1038 case DL_NOT_OURS: /* nothing to do with the rest */
1043 * read_drive_label overwrites the device name.
1044 * If we get here, we can have the drive,
1045 * so put it back again
1047 ksnprintf(drive->devicename, sizeof(drive->devicename),
1048 "%s", token[parameter]);
1052 parameter++; /* skip the keyword */
1053 if (vinum_conf.flags & VF_READING_CONFIG)
1054 drive->state = DriveState(token[parameter]); /* set the state */
1057 case kw_hotspare: /* this drive is a hot spare */
1058 drive->flags |= VF_HOTSPARE;
1063 throw_rude_remark(EINVAL,
1064 "Drive %s, invalid keyword: %s",
1070 if (drive->devicename[0] != '/') {
1071 drive->state = drive_unallocated; /* deallocate the drive */
1072 throw_rude_remark(EINVAL, "No device name for %s", drive->label.name);
1074 vinum_conf.drives_used++; /* passed all hurdles: one more in use */
1076 * If we're replacing a drive, it could be that
1077 * we already have subdisks referencing this
1078 * drive. Note where they should be and change
1079 * their state to obsolete.
1081 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
1082 if ((SD[sdno].state > sd_referenced)
1083 && (SD[sdno].driveno == driveno)) {
1084 give_sd_to_drive(sdno);
1085 if (SD[sdno].state > sd_stale)
1086 SD[sdno].state = sd_stale;
1092 * Handle a subdisk definition. We store the information in the global variable
1093 * sd, so we don't need to allocate.
1095 * If we find an error, print a message and return
1098 config_subdisk(int update)
1101 int sdno; /* index of sd in vinum_conf */
1102 struct sd *sd; /* and pointer to it */
1104 int detached = 0; /* set to 1 if this is a detached subdisk */
1105 int sdindex = -1; /* index in plexes subdisk table */
1106 enum sdstate state = sd_unallocated; /* state to set, if specified */
1107 int autosize = 0; /* set if we autosize in give_sd_to_drive */
1108 int namedsdno; /* index of another with this name */
1109 char partition = 0; /* partition of external subdisk */
1111 sdno = get_empty_sd(); /* allocate an SD to initialize */
1112 sd = &SD[sdno]; /* and get a pointer */
1114 for (parameter = 1; parameter < tokens; parameter++) { /* look at the other tokens */
1115 switch (get_keyword(token[parameter], &keyword_set)) {
1117 * If we have a 'name' parameter, it must
1118 * come first, because we're too lazy to tidy
1119 * up dangling refs if it comes later.
1122 namedsdno = find_subdisk(token[++parameter], 0); /* find an existing sd with this name */
1123 if (namedsdno >= 0) { /* got one */
1124 if (SD[namedsdno].state == sd_referenced) { /* we've been told about this one */
1126 throw_rude_remark(EINVAL,
1127 "sd %s: name parameter must come first\n", /* no go */
1131 struct plex *plex; /* for tidying up dangling references */
1133 *sd = SD[namedsdno]; /* copy from the referenced one */
1136 SD[namedsdno].state = sd_unallocated; /* and deallocate the referenced one */
1137 made_sd(&SD[namedsdno]);
1138 plex = &PLEX[sd->plexno]; /* now take a look at our plex */
1139 for (i = 0; i < plex->subdisks; i++) { /* look for the pointer */
1140 if (plex->sdnos[i] == namedsdno) /* pointing to the old subdisk */
1141 plex->sdnos[i] = sdno; /* bend it to point here */
1145 if (update) /* are we updating? */
1146 return; /* that's OK, nothing more to do */
1148 throw_rude_remark(EINVAL, "Duplicate subdisk %s", token[parameter]);
1150 ksnprintf(sd->name, sizeof(sd->name),
1151 "%s", token[parameter]);
1160 size = sizespec(token[++parameter]);
1161 if ((size == -1) /* unallocated */
1162 &&(vinum_conf.flags & VF_READING_CONFIG)) /* reading from disk */
1163 break; /* invalid sd; just ignore it */
1164 if ((size % DEV_BSIZE) != 0)
1165 throw_rude_remark(EINVAL,
1166 "sd %s, bad plex offset alignment: %lld",
1170 sd->plexoffset = size / DEV_BSIZE;
1173 case kw_driveoffset:
1174 size = sizespec(token[++parameter]);
1175 if ((size == -1) /* unallocated */
1176 &&(vinum_conf.flags & VF_READING_CONFIG)) /* reading from disk */
1177 break; /* invalid sd; just ignore it */
1178 if ((size % DEV_BSIZE) != 0)
1179 throw_rude_remark(EINVAL,
1180 "sd %s, bad drive offset alignment: %lld",
1184 sd->driveoffset = size / DEV_BSIZE;
1188 if (get_keyword(token[++parameter], &keyword_set) == kw_max) /* select maximum size from drive */
1189 size = 0; /* this is how we say it :-) */
1191 size = sizespec(token[parameter]);
1192 if ((size % DEV_BSIZE) != 0)
1193 throw_rude_remark(EINVAL, "sd %s, length %jd not multiple of sector size", sd->name, (intmax_t)size);
1195 sd->sectors = size / DEV_BSIZE;
1197 * We have a problem with autosizing: we need to
1198 * give the drive to the plex before we give it
1199 * to the drive, in order to be clean if we give
1200 * up in the middle, but at this time the size hasn't
1201 * been set. Note that we have to fix up after
1202 * giving the subdisk to the drive.
1205 autosize = 1; /* note that we're autosizing */
1209 sd->driveno = find_drive(token[++parameter], 1); /* insert drive information */
1213 sd->plexno = find_plex(token[++parameter], 1); /* insert plex information */
1217 * Set the state. We can't do this directly,
1218 * because give_sd_to_plex may change it
1221 parameter++; /* skip the keyword */
1222 if (vinum_conf.flags & VF_READING_CONFIG)
1223 state = SdState(token[parameter]); /* set the state */
1227 parameter++; /* skip the keyword */
1228 if ((strlen(token[parameter]) != 1)
1229 || (token[parameter][0] < 'a')
1230 || (token[parameter][0] > 'p'))
1231 throw_rude_remark(EINVAL,
1232 "%s: invalid partition %c",
1234 token[parameter][0]);
1236 partition = token[parameter][0];
1239 case kw_retryerrors:
1240 sd->flags |= VF_RETRYERRORS;
1244 throw_rude_remark(EINVAL, "%s: invalid keyword: %s", sd->name, token[parameter]);
1248 /* Check we have a drive name */
1249 if (sd->driveno < 0) { /* didn't specify a drive */
1250 sd->driveno = current_drive; /* set to the current drive */
1251 if (sd->driveno < 0) /* no current drive? */
1252 throw_rude_remark(EINVAL, "Subdisk %s is not associated with a drive", sd->name);
1255 * This is tacky. If something goes wrong
1256 * with the checks, we may end up losing drive
1259 if (autosize != 0) /* need to find a size, */
1260 give_sd_to_drive(sdno); /* do it before the plex */
1262 /* Check for a plex name */
1263 if ((sd->plexno < 0) /* didn't specify a plex */
1264 &&(!detached)) /* and didn't say not to, */
1265 sd->plexno = current_plex; /* set to the current plex */
1267 if (sd->plexno >= 0)
1268 sdindex = give_sd_to_plex(sd->plexno, sdno); /* now tell the plex that it has this sd */
1270 sd->sdno = sdno; /* point to our entry in the table */
1272 /* Does the subdisk have a name? If not, give it one */
1273 if (sd->name[0] == '\0') { /* no name */
1274 char sdsuffix[8]; /* form sd name suffix here */
1276 /* Do we have a plex name? */
1277 if (sdindex >= 0) /* we have a plex */
1278 strcpy(sd->name, PLEX[sd->plexno].name); /* take it from there */
1280 throw_rude_remark(EINVAL, "Unnamed sd is not associated with a plex");
1281 ksprintf(sdsuffix, ".s%d", sdindex); /* form the suffix */
1282 strcat(sd->name, sdsuffix); /* and add it to the name */
1284 /* do we have complete info for this subdisk? */
1285 if (sd->sectors < 0)
1286 throw_rude_remark(EINVAL, "sd %s has no length spec", sd->name);
1288 if (state != sd_unallocated) { /* we had a specific state to set */
1289 sd->state = state; /* do it now */
1291 } else if (sd->state == sd_unallocated) { /* no, nothing set yet, */
1292 sd->state = sd_empty; /* must be empty */
1295 if (autosize == 0) /* no autoconfig, do the drive now */
1296 give_sd_to_drive(sdno);
1297 vinum_conf.subdisks_used++; /* one more in use */
1301 * Handle a plex definition.
1304 config_plex(int update)
1307 int plexno; /* index of plex in vinum_conf */
1308 struct plex *plex; /* and pointer to it */
1309 int pindex = MAXPLEX; /* index in volume's plex list */
1310 int detached = 0; /* don't give it to a volume */
1312 enum plexstate state = plex_init; /* state to set at end */
1314 current_plex = -1; /* forget the previous plex */
1315 plexno = get_empty_plex(); /* allocate a plex */
1316 plex = &PLEX[plexno]; /* and point to it */
1317 plex->plexno = plexno; /* and back to the config */
1319 for (parameter = 1; parameter < tokens; parameter++) { /* look at the other tokens */
1320 switch (get_keyword(token[parameter], &keyword_set)) {
1322 * If we have a 'name' parameter, it must
1323 * come first, because we're too lazy to tidy
1324 * up dangling refs if it comes later.
1327 namedplexno = find_plex(token[++parameter], 0); /* find an existing plex with this name */
1328 if (namedplexno >= 0) { /* plex exists already, */
1329 if (PLEX[namedplexno].state == plex_referenced) { /* we've been told about this one */
1330 if (parameter > 2) /* we've done other things first, */
1331 throw_rude_remark(EINVAL,
1332 "plex %s: name parameter must come first\n", /* no go */
1336 struct volume *vol; /* for tidying up dangling references */
1338 *plex = PLEX[namedplexno]; /* get the info */
1339 plex->plex_dev = NULL;
1341 PLEX[namedplexno].state = plex_unallocated; /* and deallocate the other one */
1342 made_plex(&PLEX[namedplexno]);
1343 vol = &VOL[plex->volno]; /* point to the volume */
1344 for (i = 0; i < MAXPLEX; i++) { /* for each plex */
1345 if (vol->plex[i] == namedplexno)
1346 vol->plex[i] = plexno; /* bend the pointer */
1349 break; /* use this one */
1351 if (update) /* are we updating? */
1352 return; /* yes: that's OK, just return */
1354 throw_rude_remark(EINVAL, "Duplicate plex %s", token[parameter]);
1356 ksnprintf(plex->name, sizeof(plex->name),
1357 "%s", token[parameter]);
1365 case kw_org: /* plex organization */
1366 switch (get_keyword(token[++parameter], &keyword_set)) {
1368 plex->organization = plex_concat;
1373 int stripesize = sizespec(token[++parameter]);
1375 plex->organization = plex_striped;
1376 if (stripesize % DEV_BSIZE != 0) /* not a multiple of block size, */
1377 throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
1381 plex->stripesize = stripesize / DEV_BSIZE;
1387 int stripesize = sizespec(token[++parameter]);
1389 plex->organization = plex_raid4;
1390 if (stripesize % DEV_BSIZE != 0) /* not a multiple of block size, */
1391 throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
1395 plex->stripesize = stripesize / DEV_BSIZE;
1401 int stripesize = sizespec(token[++parameter]);
1403 plex->organization = plex_raid5;
1404 if (stripesize % DEV_BSIZE != 0) /* not a multiple of block size, */
1405 throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
1409 plex->stripesize = stripesize / DEV_BSIZE;
1414 throw_rude_remark(EINVAL, "Invalid plex organization");
1417 && (plex->stripesize == 0)) /* didn't specify a valid stripe size */
1418 throw_rude_remark(EINVAL, "Need a stripe size parameter");
1422 plex->volno = find_volume(token[++parameter], 1); /* insert a pointer to the volume */
1425 case kw_sd: /* add a subdisk */
1429 sdno = find_subdisk(token[++parameter], 1); /* find a subdisk */
1430 SD[sdno].plexoffset = sizespec(token[++parameter]); /* get the offset */
1431 give_sd_to_plex(plexno, sdno); /* and insert it there */
1436 parameter++; /* skip the keyword */
1437 if (vinum_conf.flags & VF_READING_CONFIG)
1438 state = PlexState(token[parameter]); /* set the state */
1442 throw_rude_remark(EINVAL, "plex %s, invalid keyword: %s",
1448 if (plex->organization == plex_disorg)
1449 throw_rude_remark(EINVAL, "No plex organization specified");
1451 if ((plex->volno < 0) /* we don't have a volume */
1452 &&(!detached)) /* and we wouldn't object */
1453 plex->volno = current_volume;
1455 if (plex->volno >= 0)
1456 pindex = give_plex_to_volume(plex->volno, plexno); /* Now tell the volume that it has this plex */
1458 /* Does the plex have a name? If not, give it one */
1459 if (plex->name[0] == '\0') { /* no name */
1460 char plexsuffix[8]; /* form plex name suffix here */
1461 /* Do we have a volume name? */
1462 if (plex->volno >= 0) /* we have a volume */
1463 strcpy(plex->name, /* take it from there */
1464 VOL[plex->volno].name);
1466 throw_rude_remark(EINVAL, "Unnamed plex is not associated with a volume");
1467 ksprintf(plexsuffix, ".p%d", pindex); /* form the suffix */
1468 strcat(plex->name, plexsuffix); /* and add it to the name */
1470 if (isstriped(plex)) {
1471 plex->lock = (struct rangelock *)
1472 Malloc(PLEX_LOCKS * sizeof(struct rangelock));
1473 CHECKALLOC(plex->lock, "vinum: Can't allocate lock table\n");
1474 bzero((char *) plex->lock, PLEX_LOCKS * sizeof(struct rangelock));
1476 /* Note the last plex we configured */
1477 current_plex = plexno;
1478 plex->state = state; /* set whatever state we chose */
1480 vinum_conf.plexes_used++; /* one more in use */
1484 * Handle a volume definition.
1485 * If we find an error, print a message, deallocate the nascent volume, and return
1488 config_volume(int update)
1492 struct volume *vol; /* collect volume info here */
1495 if (tokens < 2) /* not enough tokens */
1496 throw_rude_remark(EINVAL, "Volume has no name");
1497 current_volume = -1; /* forget the previous volume */
1498 volno = find_volume(token[1], 1); /* allocate a volume to initialize */
1499 vol = &VOL[volno]; /* and get a pointer */
1500 if (update && ((vol->flags & VF_CREATED) == 0)) /* this volume exists already */
1501 return; /* don't do anything */
1502 vol->flags &= ~VF_CREATED; /* it exists now */
1504 for (parameter = 2; parameter < tokens; parameter++) { /* look at all tokens */
1505 switch (get_keyword(token[parameter], &keyword_set)) {
1508 int plexno; /* index of this plex */
1509 int myplexno; /* and index if it's already ours */
1511 plexno = find_plex(token[++parameter], 1); /* find a plex */
1512 if (plexno < 0) /* couldn't */
1513 break; /* we've already had an error message */
1514 myplexno = my_plex(volno, plexno); /* does it already belong to us? */
1515 if (myplexno > 0) /* yes, shouldn't get it again */
1516 throw_rude_remark(EINVAL,
1517 "Plex %s already belongs to volume %s",
1520 else if (vol->plexes + 1 > 8) /* another entry */
1521 throw_rude_remark(EINVAL,
1522 "Too many plexes for volume %s",
1524 vol->plex[vol->plexes] = plexno;
1526 PLEX[plexno].state = plex_referenced; /* we know something about it */
1527 PLEX[plexno].volno = volno; /* and this volume references it */
1532 switch (get_keyword(token[++parameter], &keyword_set)) { /* decide what to do */
1534 vol->preferred_plex = ROUND_ROBIN_READPOL; /* default */
1539 int myplexno; /* index of this plex */
1541 myplexno = find_plex(token[++parameter], 1); /* find a plex */
1542 if (myplexno < 0) /* couldn't */
1543 break; /* we've already had an error message */
1544 myplexno = my_plex(volno, myplexno); /* does it already belong to us? */
1545 if (myplexno > 0) /* yes */
1546 vol->preferred_plex = myplexno; /* just note the index */
1547 else if (++vol->plexes > 8) /* another entry */
1548 throw_rude_remark(EINVAL, "Too many plexes");
1549 else { /* space for the new plex */
1550 vol->plex[vol->plexes - 1] = myplexno; /* add it to our list */
1551 vol->preferred_plex = vol->plexes - 1; /* and note the index */
1557 throw_rude_remark(EINVAL, "Invalid read policy");
1561 vol->flags |= VF_CONFIG_SETUPSTATE; /* set the volume up later on */
1565 parameter++; /* skip the keyword */
1566 if (vinum_conf.flags & VF_READING_CONFIG) {
1567 vol->state = VolState(token[parameter]); /* set the state */
1568 vol->volno = volno; /* needs correct volno to make devs */
1574 * XXX experimental ideas. These are not
1575 * documented, and will not be until I
1576 * decide they're worth keeping
1578 case kw_writethrough: /* set writethrough mode */
1579 vol->flags |= VF_WRITETHROUGH;
1582 case kw_writeback: /* set writeback mode */
1583 vol->flags &= ~VF_WRITETHROUGH;
1587 vol->flags |= VF_RAW; /* raw volume (no label) */
1591 throw_rude_remark(EINVAL, "volume %s, invalid keyword: %s",
1596 current_volume = volno; /* note last referred volume */
1597 vol->volno = volno; /* also note in volume */
1600 * Before we can actually use the volume, we need
1601 * a volume label. We could start to fake one here,
1602 * but it will be a lot easier when we have some
1603 * to copy from the drives, so defer it until we
1604 * set up the configuration. XXX
1606 if (vol->state == volume_unallocated) {
1607 vol->state = volume_down; /* now ready to bring up at the end */
1611 /* Find out how big our volume is */
1612 for (i = 0; i < vol->plexes; i++)
1613 vol->size = u64max(vol->size, PLEX[vol->plex[i]].length);
1614 vinum_conf.volumes_used++; /* one more in use */
1618 * Parse a config entry. CARE! This destroys the original contents of the
1619 * config entry, which we don't really need after this. More specifically, it
1620 * places \0 characters at the end of each token.
1622 * Return 0 if all is well, otherwise EINVAL for invalid keyword,
1623 * or ENOENT if 'read' command doesn't find any drives.
1626 parse_config(char *cptr, struct keywordset *keyset, int update)
1630 status = 0; /* until proven otherwise */
1631 tokens = tokenize(cptr, token); /* chop up into tokens */
1633 if (tokens <= 0) /* screwed up or empty line */
1634 return tokens; /* give up */
1636 if (token[0][0] == '#') /* comment line */
1639 switch (get_keyword(token[0], keyset)) { /* decide what to do */
1640 case kw_read: /* read config from a specified drive */
1641 status = vinum_scandisk(&token[1], tokens - 1); /* read the config from disk */
1645 config_drive(update);
1649 config_subdisk(update);
1653 config_plex(update);
1657 config_volume(update);
1660 /* Anything else is invalid in this context */
1662 throw_rude_remark(EINVAL, /* should we die? */
1663 "Invalid configuration information: %s",
1670 * parse a line handed in from userland via ioctl.
1671 * This differs only by the error reporting mechanism:
1672 * we return the error indication in the reply to the
1673 * ioctl, so we need to set a global static pointer in
1674 * this file. This technique works because we have
1675 * ensured that configuration is performed in a single-
1679 parse_user_config(char *cptr, struct keywordset *keyset)
1683 ioctl_reply = (struct _ioctl_reply *) cptr;
1684 status = parse_config(cptr, keyset, 0);
1685 if (status == ENOENT) /* from scandisk, but it can't tell us */
1686 strcpy(ioctl_reply->msg, "no drives found");
1687 ioctl_reply = NULL; /* don't do this again */
1691 /* Remove an object */
1693 remove(struct vinum_ioctl_msg *msg)
1695 struct vinum_ioctl_msg message = *msg; /* make a copy to hand on */
1697 ioctl_reply = (struct _ioctl_reply *) msg; /* reinstate the address to reply to */
1698 ioctl_reply->error = 0; /* no error, */
1699 ioctl_reply->msg[0] = '\0'; /* no message */
1701 switch (message.type) {
1703 remove_drive_entry(message.index, message.force);
1708 remove_sd_entry(message.index, message.force, message.recurse);
1713 remove_plex_entry(message.index, message.force, message.recurse);
1718 remove_volume_entry(message.index, message.force, message.recurse);
1723 ioctl_reply->error = EINVAL;
1724 strcpy(ioctl_reply->msg, "Invalid object type");
1728 /* Remove a drive. */
1730 remove_drive_entry(int driveno, int force)
1732 struct drive *drive = &DRIVE[driveno];
1735 if ((driveno > vinum_conf.drives_allocated) /* not a valid drive */
1736 ||(drive->state == drive_unallocated)) { /* or nothing there */
1737 ioctl_reply->error = EINVAL;
1738 strcpy(ioctl_reply->msg, "No such drive");
1739 } else if (drive->opencount > 0) { /* we have subdisks */
1740 if (force) { /* do it at any cost */
1741 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
1742 if ((SD[sdno].state != sd_unallocated) /* subdisk is allocated */
1743 &&(SD[sdno].driveno == driveno)) /* and it belongs to this drive */
1744 remove_sd_entry(sdno, force, 0);
1746 remove_drive(driveno); /* now remove it */
1747 vinum_conf.drives_used--; /* one less drive */
1749 ioctl_reply->error = EBUSY; /* can't do that */
1751 remove_drive(driveno); /* just remove it */
1752 vinum_conf.drives_used--; /* one less drive */
1756 /* remove a subdisk */
1758 remove_sd_entry(int sdno, int force, int recurse)
1760 struct sd *sd = &SD[sdno];
1762 if ((sdno > vinum_conf.subdisks_allocated) /* not a valid sd */
1763 ||(sd->state == sd_unallocated)) { /* or nothing there */
1764 ioctl_reply->error = EINVAL;
1765 strcpy(ioctl_reply->msg, "No such subdisk");
1766 } else if (sd->flags & VF_OPEN) { /* we're open */
1767 ioctl_reply->error = EBUSY; /* no getting around that */
1769 } else if (sd->plexno >= 0) { /* we have a plex */
1770 if (force) { /* do it at any cost */
1771 struct plex *plex = &PLEX[sd->plexno]; /* point to our plex */
1774 for (mysdno = 0; /* look for ourselves */
1775 mysdno < plex->subdisks && &SD[plex->sdnos[mysdno]] != sd;
1777 if (mysdno == plex->subdisks) /* didn't find it */
1779 "Error removing subdisk %s: not found in plex %s\n",
1782 else { /* remove the subdisk from plex */
1783 if (mysdno < (plex->subdisks - 1)) /* not the last subdisk */
1784 bcopy(&plex->sdnos[mysdno + 1],
1785 &plex->sdnos[mysdno],
1786 (plex->subdisks - 1 - mysdno) * sizeof(int));
1788 sd->plexno = -1; /* disown the subdisk */
1792 * Removing a subdisk from a striped or
1793 * RAID-4 or RAID-5 plex really tears the
1794 * hell out of the structure, and it needs
1795 * to be reinitialized.
1797 if (plex->organization != plex_concat) /* not concatenated, */
1798 set_plex_state(plex->plexno, plex_faulty, setstate_force); /* need to reinitialize */
1799 log(LOG_INFO, "vinum: removing %s\n", sd->name);
1802 ioctl_reply->error = EBUSY; /* can't do that */
1804 log(LOG_INFO, "vinum: removing %s\n", sd->name);
1811 remove_plex_entry(int plexno, int force, int recurse)
1813 struct plex *plex = &PLEX[plexno];
1816 if ((plexno > vinum_conf.plexes_allocated) /* not a valid plex */
1817 ||(plex->state == plex_unallocated)) { /* or nothing there */
1818 ioctl_reply->error = EINVAL;
1819 strcpy(ioctl_reply->msg, "No such plex");
1820 } else if (plex->flags & VF_OPEN) { /* we're open */
1821 ioctl_reply->error = EBUSY; /* no getting around that */
1824 if (plex->subdisks) {
1825 if (force) { /* do it anyway */
1826 if (recurse) { /* remove all below */
1827 int sds = plex->subdisks;
1828 for (sdno = 0; sdno < sds; sdno++)
1829 free_sd(plex->sdnos[sdno]); /* free all subdisks */
1830 } else { /* just tear them out */
1831 int sds = plex->subdisks;
1832 for (sdno = 0; sdno < sds; sdno++)
1833 SD[plex->sdnos[sdno]].plexno = -1; /* no plex any more */
1835 } else { /* can't do it without force */
1836 ioctl_reply->error = EBUSY; /* can't do that */
1840 if (plex->volno >= 0) { /* we are part of a volume */
1841 if (force) { /* do it at any cost */
1842 struct volume *vol = &VOL[plex->volno];
1845 for (myplexno = 0; myplexno < vol->plexes; myplexno++)
1846 if (vol->plex[myplexno] == plexno) /* found it */
1848 if (myplexno == vol->plexes) /* didn't find it. Huh? */
1850 "Error removing plex %s: not found in volume %s\n",
1853 if (myplexno < (vol->plexes - 1)) /* not the last plex in the list */
1854 bcopy(&vol->plex[myplexno + 1],
1855 &vol->plex[myplexno],
1856 vol->plexes - 1 - myplexno);
1859 ioctl_reply->error = EBUSY; /* can't do that */
1863 log(LOG_INFO, "vinum: removing %s\n", plex->name);
1865 vinum_conf.plexes_used--; /* one less plex */
1868 /* remove a volume */
1870 remove_volume_entry(int volno, int force, int recurse)
1872 struct volume *vol = &VOL[volno];
1875 if ((volno > vinum_conf.volumes_allocated) /* not a valid volume */
1876 ||(vol->state == volume_unallocated)) { /* or nothing there */
1877 ioctl_reply->error = EINVAL;
1878 strcpy(ioctl_reply->msg, "No such volume");
1879 } else if (vol->flags & VF_OPEN) /* we're open */
1880 ioctl_reply->error = EBUSY; /* no getting around that */
1881 else if (vol->plexes) {
1882 if (recurse && force) { /* remove all below */
1883 int plexes = vol->plexes;
1885 /* for (plexno = plexes - 1; plexno >= 0; plexno--) */
1886 for (plexno = 0; plexno < plexes; plexno++)
1887 remove_plex_entry(vol->plex[plexno], force, recurse);
1888 log(LOG_INFO, "vinum: removing %s\n", vol->name);
1890 vinum_conf.volumes_used--; /* one less volume */
1892 ioctl_reply->error = EBUSY; /* can't do that */
1894 log(LOG_INFO, "vinum: removing %s\n", vol->name);
1896 vinum_conf.volumes_used--; /* one less volume */
1900 /* Currently called only from ioctl */
1902 update_sd_config(int sdno, int diskconfig)
1905 set_sd_state(sdno, sd_up, setstate_configuring);
1906 SD[sdno].flags &= ~VF_NEWBORN;
1910 update_plex_config(int plexno, int diskconfig)
1914 struct plex *plex = &PLEX[plexno];
1915 enum plexstate state = plex_up; /* state we want the plex in */
1916 int remainder; /* size of fractional stripe at end */
1917 int added_plex; /* set if we add a plex to a volume */
1918 int required_sds; /* number of subdisks we need */
1921 int data_sds = 0; /* number of sds carrying data */
1923 if (plex->state < plex_init) /* not a real plex, */
1926 if (plex->volno >= 0) { /* we have a volume */
1927 vol = &VOL[plex->volno];
1930 * If we're newly born,
1931 * and the volume isn't,
1932 * and it has other plexes,
1933 * and we didn't read this mess from disk,
1934 * we were added later.
1936 if ((plex->flags & VF_NEWBORN)
1937 && ((vol->flags & VF_NEWBORN) == 0)
1938 && (vol->plexes > 0)
1939 && (diskconfig == 0)) {
1941 state = plex_down; /* so take ourselves down */
1945 * Check that our subdisks make sense. For
1946 * striped, RAID-4 and RAID-5 plexes, we need at
1947 * least two subdisks, and they must all be the
1950 if (plex->organization == plex_striped) {
1951 data_sds = plex->subdisks;
1953 } else if (isparity(plex)) { /* RAID 4 or 5 */
1954 data_sds = plex->subdisks - 1;
1958 if (required_sds > 0) { /* striped, RAID-4 or RAID-5 */
1959 if (plex->subdisks < required_sds) {
1961 "vinum: plex %s does not have at least %d subdisks\n",
1964 state = plex_faulty;
1967 * Now see if the plex size is a multiple of
1968 * the stripe size. If not, trim off the end
1969 * of each subdisk and return it to the drive.
1971 if (plex->length > 0) {
1973 if (plex->stripesize > 0) {
1974 remainder = (int) (plex->length /* are we exact? */
1975 % ((u_int64_t) plex->stripesize * data_sds));
1976 if (remainder) { /* no */
1977 log(LOG_INFO, "vinum: removing %d blocks of partial stripe at the end of %s\n",
1980 plex->length -= remainder; /* shorten the plex */
1981 remainder /= data_sds; /* spread the remainder amongst the sds */
1982 for (sdno = 0; sdno < plex->subdisks; sdno++) {
1983 sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
1984 return_drive_space(sd->driveno, /* return the space */
1985 sd->driveoffset + sd->sectors - remainder,
1987 sd->sectors -= remainder; /* and shorten it */
1990 } else /* no data sds, */
1991 plex->length = 0; /* reset length */
1996 for (sdno = 0; sdno < plex->subdisks; sdno++) {
1997 sd = &SD[plex->sdnos[sdno]];
2000 && (sd->sectors != SD[plex->sdnos[sdno - 1]].sectors)) {
2001 log(LOG_ERR, "vinum: %s must have equal sized subdisks\n", plex->name);
2004 size += sd->sectors;
2005 if (added_plex) { /* we were added later */
2006 sd->state = sd_stale; /* stale until proven otherwise */
2011 if (plex->subdisks) { /* plex has subdisks, calculate size */
2013 * XXX We shouldn't need to calculate the size any
2014 * more. Check this some time
2017 size = size / plex->subdisks * (plex->subdisks - 1); /* less space for RAID-4 and RAID-5 */
2018 if (plex->length != size)
2020 "Correcting length of %s: was %lld, is %lld\n",
2022 (long long) plex->length,
2024 plex->length = size;
2025 } else { /* no subdisks, */
2026 plex->length = 0; /* no size */
2027 state = plex_down; /* take it down */
2029 update_plex_state(plexno); /* set the state */
2030 plex->flags &= ~VF_NEWBORN;
2034 update_volume_config(int volno, int diskconfig)
2036 struct volume *vol = &VOL[volno];
2040 if (vol->state != volume_unallocated)
2042 * Recalculate the size of the volume,
2043 * which might change if the original
2044 * plexes were not a multiple of the
2049 for (plexno = 0; plexno < vol->plexes; plexno++) {
2050 plex = &PLEX[vol->plex[plexno]];
2051 vol->size = u64max(plex->length, vol->size);
2052 plex->volplexno = plexno; /* note it in the plex */
2055 vol->flags &= ~VF_NEWBORN; /* no longer newly born */
2059 * Update the global configuration.
2060 * diskconfig is != 0 if we're reading in a config
2061 * from disk. In this case, we don't try to
2062 * bring the devices up, though we will bring
2063 * them down if there's some error which got
2064 * missed when writing to disk.
2067 updateconfig(int diskconfig)
2072 for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++)
2073 update_plex_config(plexno, diskconfig);
2075 for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
2076 if (VOL[volno].state > volume_uninit) {
2077 VOL[volno].flags &= ~VF_CONFIG_SETUPSTATE; /* no more setupstate */
2078 update_volume_state(volno);
2079 update_volume_config(volno, diskconfig);
2086 * Start manual changes to the configuration and lock out
2087 * others who may wish to do so.
2088 * XXX why do we need this and lock_config too?
2091 start_config(int force)
2095 current_drive = -1; /* note the last drive we mention, for
2097 current_plex = -1; /* and the same for the last plex */
2098 current_volume = -1; /* and the last volume */
2099 while ((vinum_conf.flags & VF_CONFIGURING) != 0) {
2100 vinum_conf.flags |= VF_WILL_CONFIGURE;
2101 if ((error = tsleep(&vinum_conf, PCATCH, "vincfg", 0)) != 0)
2105 * We need two flags here: VF_CONFIGURING
2106 * tells other processes to hold off (this
2107 * function), and VF_CONFIG_INCOMPLETE
2108 * tells the state change routines not to
2109 * propagate incrememntal state changes
2111 vinum_conf.flags |= VF_CONFIGURING | VF_CONFIG_INCOMPLETE;
2113 vinum_conf.flags |= VF_FORCECONFIG; /* overwrite differently named drives */
2114 current_drive = -1; /* reset the defaults */
2115 current_plex = -1; /* and the same for the last plex */
2116 current_volume = -1; /* and the last volme */
2121 * Update the config if update is 1, and unlock
2122 * it. We won't update the configuration if we
2123 * are called in a recursive loop via throw_rude_remark.
2126 finish_config(int update)
2128 /* we've finished our config */
2129 vinum_conf.flags &= ~(VF_CONFIG_INCOMPLETE | VF_READING_CONFIG | VF_FORCECONFIG);
2131 updateconfig(0); /* so update things */
2133 updateconfig(1); /* do some updates only */
2134 vinum_conf.flags &= ~VF_CONFIGURING; /* and now other people can take a turn */
2135 if ((vinum_conf.flags & VF_WILL_CONFIGURE) != 0) {
2136 vinum_conf.flags &= ~VF_WILL_CONFIGURE;
2137 wakeup_one(&vinum_conf);
2140 /* Local Variables: */
2141 /* fill-column: 50 */