Temporarily reenable writing to the label area for backwards compatibility.
[dragonfly.git] / sys / dev / raid / vinum / vinumio.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1997, 1998
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * This software is distributed under the so-called ``Berkeley
6 * License'':
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by Nan Yang Computer
19 * Services Limited.
20 * 4. Neither the name of the Company nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * This software is provided ``as is'', and any express or implied
25 * warranties, including, but not limited to, the implied warranties of
26 * merchantability and fitness for a particular purpose are disclaimed.
27 * In no event shall the company or contributors be liable for any
28 * direct, indirect, incidental, special, exemplary, or consequential
29 * damages (including, but not limited to, procurement of substitute
30 * goods or services; loss of use, data, or profits; or business
31 * interruption) however caused and on any theory of liability, whether
32 * in contract, strict liability, or tort (including negligence or
33 * otherwise) arising in any way out of the use of this software, even if
34 * advised of the possibility of such damage.
35 *
36 * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $
37 * $FreeBSD: src/sys/dev/vinum/vinumio.c,v 1.52.2.6 2002/05/02 08:43:44 grog Exp $
d1de5ee6 38 * $DragonFly: src/sys/dev/raid/vinum/vinumio.c,v 1.27 2007/07/16 21:31:06 dillon Exp $
984263bc
MD
39 */
40
1f2de5d4
MD
41#include "vinumhdr.h"
42#include "request.h"
984263bc
MD
43#include <vm/vm_zone.h>
44
45static char *sappend(char *txt, char *s);
46static int drivecmp(const void *va, const void *vb);
47
48/*
49 * Open the device associated with the drive, and set drive's vp.
50 * Return an error number
51 */
52int
53open_drive(struct drive *drive, struct proc *p, int verbose)
54{
55 int devmajor; /* major devs for disk device */
56 int devminor; /* minor devs for disk device */
57 int unit;
2f4ec5c1
MD
58 int slice;
59 int part;
984263bc 60 char *dname;
984263bc
MD
61
62 if (bcmp(drive->devicename, "/dev/", 5)) /* device name doesn't start with /dev */
63 return ENOENT; /* give up */
64 if (drive->flags & VF_OPEN) /* open already, */
65 return EBUSY; /* don't do it again */
66
67 /*
68 * Yes, Bruce, I know this is horrible, but we
69 * don't have a root file system when we first
70 * try to do this. If you can come up with a
71 * better solution, I'd really like it. I'm
72 * just putting it in now to add ammuntion to
73 * moving the system to devfs.
74 */
75 dname = &drive->devicename[5];
76 drive->dev = NULL; /* no device yet */
77
78 /* Find the device */
79 if (bcmp(dname, "ad", 2) == 0) /* IDE disk */
80 devmajor = 116;
81 else if (bcmp(dname, "wd", 2) == 0) /* IDE disk */
82 devmajor = 3;
83 else if (bcmp(dname, "da", 2) == 0)
84 devmajor = 13;
85 else if (bcmp(dname, "vn", 2) == 0)
86 devmajor = 43;
87 else if (bcmp(dname, "md", 2) == 0)
88 devmajor = 95;
c141875f
SW
89 else if (bcmp(dname, "vkd", 3) == 0) {
90 devmajor = 97;
91 dname += 1;
92 } else if (bcmp(dname, "amrd", 4) == 0) {
984263bc
MD
93 devmajor = 133;
94 dname += 2;
95 } else if (bcmp(dname, "mlxd", 4) == 0) {
96 devmajor = 131;
97 dname += 2;
98 } else if (bcmp(dname, "idad", 4) == 0) {
99 devmajor = 109;
100 dname += 2;
101 } else if (bcmp(dname, "twed", 4) == 0) { /* 3ware raid */
102 devmajor = 147;
103 dname += 2;
00aab263
JS
104 } else if (bcmp(dname, "ar", 2) == 0) {
105 devmajor = 157;
984263bc
MD
106 } else
107 return ENODEV;
108 dname += 2; /* point past */
109
110 /*
2f4ec5c1
MD
111 * Found the device. Require the form
112 * <unit>s<slice><partition>
984263bc 113 */
2f4ec5c1
MD
114 if (*dname < '0' || *dname > '9')
115 return(ENODEV);
116 unit = strtol(dname, &dname, 10);
117 if (*dname != 's')
118 return(ENODEV);
119 ++dname;
984263bc 120
2f4ec5c1
MD
121 /*
122 * Convert slice number to value suitable for
123 * dkmakeminor(). 0->0, 1->2, 2->3, etc.
124 */
125 slice = strtol(dname, &dname, 10);
126 if (slice > 0)
127 ++slice;
128
129 if (*dname < 'a' || *dname > 'p')
130 return ENODEV;
131
132 part = *dname - 'a';
133 devminor = dkmakeminor(unit, slice, part);
984263bc 134
83d36d43
MD
135 /*
136 * Disallow partition c
137 */
2f4ec5c1 138 if (part == 2)
984263bc
MD
139 return ENOTTY; /* not buying that */
140
e4c9c0c8 141 drive->dev = udev2dev(makeudev(devmajor, devminor), 0);
984263bc 142
028066b1 143 if (drive->dev == NULL)
6be3395d
SS
144 return ENODEV;
145
984263bc 146 drive->dev->si_iosize_max = DFLTPHYS;
e4c9c0c8 147 if (dev_is_good(drive->dev))
fef8985e 148 drive->lasterror = dev_dopen(drive->dev, FWRITE, 0, proc0.p_ucred);
e4c9c0c8
MD
149 else
150 drive->lasterror = ENOENT;
984263bc
MD
151
152 if (drive->lasterror != 0) { /* failed */
153 drive->state = drive_down; /* just force it down */
154 if (verbose)
155 log(LOG_WARNING,
156 "vinum open_drive %s: failed with error %d\n",
157 drive->devicename, drive->lasterror);
158 } else
159 drive->flags |= VF_OPEN; /* we're open now */
160
161 return drive->lasterror;
162}
163
164/*
165 * Set some variables in the drive struct
166 * in more convenient form. Return error indication
167 */
168int
169set_drive_parms(struct drive *drive)
170{
171 drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
172 drive->secsperblock = drive->blocksize /* number of sectors per block */
2ec8fb79 173 / drive->partinfo.media_blksize;
984263bc
MD
174
175 /* Now update the label part */
176 bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
177 getmicrotime(&drive->label.date_of_birth); /* and current time */
2ec8fb79 178 drive->label.drive_size = drive->partinfo.media_size;
984263bc
MD
179#if VINUMDEBUG
180 if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
181 drive->label.drive_size *= 100;
182#endif
183
184 /* number of sectors available for subdisks */
185 drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
186
187 /*
188 * Bug in 3.0 as of January 1998: you can open
189 * non-existent slices. They have a length of 0.
190 */
191 if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
192 set_drive_state(drive->driveno, drive_down, setstate_force);
193 drive->lasterror = ENOSPC;
194 return ENOSPC;
195 }
196 drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
197 drive->freelist = (struct drive_freelist *)
198 Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
199 if (drive->freelist == NULL) /* can't malloc, dammit */
200 return ENOSPC;
201 drive->freelist_entries = 1; /* just (almost) the complete drive */
202 drive->freelist[0].offset = DATASTART; /* starts here */
203 drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
204 if (drive->label.name[0] != '\0') /* got a name */
205 set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
206 else /* we know about it, but that's all */
207 drive->state = drive_referenced;
208 return 0;
209}
210
211/*
212 * Initialize a drive: open the device and add device
213 * information
214 */
215int
216init_drive(struct drive *drive, int verbose)
217{
218 if (drive->devicename[0] != '/') {
219 drive->lasterror = EINVAL;
220 log(LOG_ERR, "vinum: Can't open drive without drive name\n");
221 return EINVAL;
222 }
223 drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */
224 if (drive->lasterror)
225 return drive->lasterror;
226
335dda38
MD
227 drive->lasterror = dev_dioctl(
228 drive->dev,
984263bc
MD
229 DIOCGPART,
230 (caddr_t) & drive->partinfo,
231 FREAD,
fef8985e 232 proc0.p_ucred);
984263bc
MD
233 if (drive->lasterror) {
234 if (verbose)
235 log(LOG_WARNING,
236 "vinum open_drive %s: Can't get partition information, drive->lasterror %d\n",
237 drive->devicename,
238 drive->lasterror);
239 close_drive(drive);
240 return drive->lasterror;
241 }
2ec8fb79 242 if (drive->partinfo.fstype != FS_VINUM &&
18cb7add 243 !kuuid_is_vinum(&drive->partinfo.fstype_uuid)
2ec8fb79 244 ) {
984263bc
MD
245 drive->lasterror = EFTYPE;
246 if (verbose)
247 log(LOG_WARNING,
248 "vinum open_drive %s: Wrong partition type for vinum\n",
249 drive->devicename);
250 close_drive(drive);
251 return EFTYPE;
252 }
253 return set_drive_parms(drive); /* set various odds and ends */
254}
255
256/* Close a drive if it's open. */
257void
258close_drive(struct drive *drive)
259{
260 LOCKDRIVE(drive); /* keep the daemon out */
261 if (drive->flags & VF_OPEN)
262 close_locked_drive(drive); /* and close it */
263 if (drive->state > drive_down) /* if it's up */
264 drive->state = drive_down; /* make sure it's down */
265 unlockdrive(drive);
266}
267
268/*
269 * Real drive close code, called with drive already locked.
270 * We have also checked that the drive is open. No errors.
271 */
272void
273close_locked_drive(struct drive *drive)
274{
275 /*
276 * If we can't access the drive, we can't flush
277 * the queues, which spec_close() will try to
278 * do. Get rid of them here first.
279 */
fef8985e 280 drive->lasterror = dev_dclose(drive->dev, 0, 0);
984263bc
MD
281 drive->flags &= ~VF_OPEN; /* no longer open */
282}
283
284/*
285 * Remove drive from the configuration.
286 * Caller must ensure that it isn't active.
287 */
288void
289remove_drive(int driveno)
290{
291 struct drive *drive = &vinum_conf.drive[driveno];
292 struct vinum_hdr *vhdr; /* buffer for header */
293 int error;
294
295 if (drive->state > drive_referenced) { /* real drive */
296 if (drive->state == drive_up) {
297 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
298 CHECKALLOC(vhdr, "Can't allocate memory");
299 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
300 if (error)
301 drive->lasterror = error;
302 else {
303 vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
304 write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
305 }
306 Free(vhdr);
307 }
308 free_drive(drive); /* close it and free resources */
309 save_config(); /* and save the updated configuration */
310 }
311}
312
313/*
314 * Transfer drive data. Usually called from one of these defines;
10f3fee5
MD
315 * #define read_drive(a, b, c, d) driveio (a, b, c, d, BUF_CMD_READ)
316 * #define write_drive(a, b, c, d) driveio (a, b, c, d, BUF_CMD_WRITE)
984263bc
MD
317 *
318 * length and offset are in bytes, but must be multiples of sector
319 * size. The function *does not check* for this condition, and
320 * truncates ruthlessly.
321 * Return error number
322 */
323int
10f3fee5 324driveio(struct drive *drive, char *buf, size_t length, off_t offset, buf_cmd_t cmd)
984263bc
MD
325{
326 int error;
327 struct buf *bp;
3591bbc6 328 caddr_t saveaddr;
984263bc
MD
329
330 error = 0; /* to keep the compiler happy */
331 while (length) { /* divide into small enough blocks */
332 int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
333
334 bp = geteblk(len); /* get a buffer header */
10f3fee5 335 bp->b_cmd = cmd;
54078292 336 bp->b_bio1.bio_offset = offset; /* disk offset */
3591bbc6 337 saveaddr = bp->b_data;
984263bc
MD
338 bp->b_data = buf;
339 bp->b_bcount = len;
81b5c339 340 dev_dstrategy(drive->dev, &bp->b_bio1);
984263bc 341 error = biowait(bp);
3591bbc6 342 bp->b_data = saveaddr;
984263bc
MD
343 bp->b_flags |= B_INVAL | B_AGE;
344 bp->b_flags &= ~B_ERROR;
345 brelse(bp);
346 if (error)
347 break;
348 length -= len; /* update pointers */
349 buf += len;
350 offset += len;
351 }
352 return error;
353}
354
355/*
356 * Check a drive for a vinum header. If found,
357 * update the drive information. We come here
358 * with a partially populated drive structure
359 * which includes the device name.
360 *
361 * Return information on what we found.
362 *
363 * This function is called from two places: check_drive,
364 * which wants to find out whether the drive is a
365 * Vinum drive, and config_drive, which asserts that
366 * it is a vinum drive. In the first case, we don't
367 * print error messages (verbose==0), in the second
368 * we do (verbose==1).
369 */
370enum drive_label_info
371read_drive_label(struct drive *drive, int verbose)
372{
373 int error;
374 int result; /* result of our search */
375 struct vinum_hdr *vhdr; /* and as header */
376
377 error = init_drive(drive, 0); /* find the drive */
378 if (error) /* find the drive */
379 return DL_CANT_OPEN; /* not ours */
380
381 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
382 CHECKALLOC(vhdr, "Can't allocate memory");
383
384 drive->state = drive_up; /* be optimistic */
385 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
386 if (vhdr->magic == VINUM_MAGIC) { /* ours! */
387 if (drive->label.name[0] /* we have a name for this drive */
388 &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
389 drive->lasterror = EINVAL;
390 result = DL_WRONG_DRIVE; /* it's the wrong drive */
391 drive->state = drive_unallocated; /* put it back, it's not ours */
392 } else
393 result = DL_OURS;
394 /*
395 * We copy the drive anyway so that we have
396 * the correct name in the drive info. This
397 * may not be the name specified
398 */
399 drive->label = vhdr->label; /* put in the label information */
400 } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
401 result = DL_DELETED_LABEL; /* and return the info */
402 else
403 result = DL_NOT_OURS; /* we could have it, but we don't yet */
404 Free(vhdr); /* that's all. */
405 return result;
406}
407
408/*
409 * Check a drive for a vinum header. If found,
410 * read configuration information from the drive and
411 * incorporate the data into the configuration.
412 *
413 * Return drive number.
414 */
415struct drive *
416check_drive(char *devicename)
417{
418 int driveno;
419 int i;
420 struct drive *drive;
421
422 driveno = find_drive_by_dev(devicename, 1); /* if entry doesn't exist, create it */
423 drive = &vinum_conf.drive[driveno]; /* and get a pointer */
424
425 if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
426 for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
427 if ((i != driveno) /* not this drive */
428 &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
429 &&(strcmp(DRIVE[i].label.name,
430 DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
431 struct drive *mydrive = &DRIVE[i];
432
433 if (mydrive->devicename[0] == '/') { /* we know a device name for it */
434 /*
435 * set an error, but don't take the
436 * drive down: that would cause unneeded
437 * error messages.
438 */
439 drive->lasterror = EEXIST;
440 break;
441 } else { /* it's just a place holder, */
442 int sdno;
443
444 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
445 if ((SD[sdno].driveno == i) /* it's pointing to this one, */
446 &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
447 SD[sdno].driveno = drive->driveno; /* point to the one we found */
448 update_sd_state(sdno); /* and update its state */
449 }
450 }
451 bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
452 }
453 }
454 }
455 } else {
456 if (drive->lasterror == 0)
457 drive->lasterror = ENODEV;
458 close_drive(drive);
459 drive->state = drive_down;
460 }
461 return drive;
462}
463
464static char *
465sappend(char *txt, char *s)
466{
467 while ((*s++ = *txt++) != 0);
468 return s - 1;
469}
470
471void
472format_config(char *config, int len)
473{
474 int i;
475 int j;
476 char *s = config;
477 char *configend = &config[len];
478
479 bzero(config, len);
480
481 /* First write the volume configuration */
482 for (i = 0; i < vinum_conf.volumes_allocated; i++) {
483 struct volume *vol;
484
485 vol = &vinum_conf.volume[i];
486 if ((vol->state > volume_uninit)
487 && (vol->name[0] != '\0')) { /* paranoia */
f8c7a42d 488 ksnprintf(s,
984263bc
MD
489 configend - s,
490 "volume %s state %s",
491 vol->name,
492 volume_state(vol->state));
493 while (*s)
494 s++; /* find the end */
495 if (vol->preferred_plex >= 0) /* preferences, */
f8c7a42d 496 ksnprintf(s,
984263bc
MD
497 configend - s,
498 " readpol prefer %s",
499 vinum_conf.plex[vol->preferred_plex].name);
500 while (*s)
501 s++; /* find the end */
502 s = sappend("\n", s);
503 }
504 }
505
506 /* Then the plex configuration */
507 for (i = 0; i < vinum_conf.plexes_allocated; i++) {
508 struct plex *plex;
509
510 plex = &vinum_conf.plex[i];
511 if ((plex->state > plex_referenced)
512 && (plex->name[0] != '\0')) { /* paranoia */
f8c7a42d 513 ksnprintf(s,
984263bc
MD
514 configend - s,
515 "plex name %s state %s org %s ",
516 plex->name,
517 plex_state(plex->state),
518 plex_org(plex->organization));
519 while (*s)
520 s++; /* find the end */
521 if (isstriped(plex)) {
f8c7a42d 522 ksnprintf(s,
984263bc
MD
523 configend - s,
524 "%ds ",
525 (int) plex->stripesize);
526 while (*s)
527 s++; /* find the end */
528 }
529 if (plex->volno >= 0) /* we have a volume */
f8c7a42d 530 ksnprintf(s,
984263bc
MD
531 configend - s,
532 "vol %s ",
533 vinum_conf.volume[plex->volno].name);
534 while (*s)
535 s++; /* find the end */
536 for (j = 0; j < plex->subdisks; j++) {
f8c7a42d 537 ksnprintf(s,
984263bc
MD
538 configend - s,
539 " sd %s",
540 vinum_conf.sd[plex->sdnos[j]].name);
541 }
542 s = sappend("\n", s);
543 }
544 }
545
546 /* And finally the subdisk configuration */
547 for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
548 struct sd *sd;
549 char *drivename;
550
551 sd = &SD[i];
552 if ((sd->state != sd_referenced)
553 && (sd->state != sd_unallocated)
554 && (sd->name[0] != '\0')) { /* paranoia */
555 drivename = vinum_conf.drive[sd->driveno].label.name;
556 /*
557 * XXX We've seen cases of dead subdisks
558 * which don't have a drive. If we let them
559 * through here, the drive name is null, so
560 * they get the drive named 'plex'.
561 *
562 * This is a breakage limiter, not a fix.
563 */
564 if (drivename[0] == '\0')
565 drivename = "*invalid*";
f8c7a42d 566 ksnprintf(s,
984263bc
MD
567 configend - s,
568 "sd name %s drive %s plex %s len %llus driveoffset %llus state %s",
569 sd->name,
570 drivename,
571 vinum_conf.plex[sd->plexno].name,
572 (unsigned long long) sd->sectors,
573 (unsigned long long) sd->driveoffset,
574 sd_state(sd->state));
575 while (*s)
576 s++; /* find the end */
577 if (sd->plexno >= 0)
f8c7a42d 578 ksnprintf(s,
984263bc
MD
579 configend - s,
580 " plexoffset %llds",
581 (long long) sd->plexoffset);
582 else
f8c7a42d 583 ksnprintf(s, configend - s, " detached");
984263bc
MD
584 while (*s)
585 s++; /* find the end */
586 if (sd->flags & VF_RETRYERRORS) {
f8c7a42d 587 ksnprintf(s, configend - s, " retryerrors");
984263bc
MD
588 while (*s)
589 s++; /* find the end */
590 }
f8c7a42d 591 ksnprintf(s, configend - s, " \n");
984263bc
MD
592 while (*s)
593 s++; /* find the end */
594 }
595 }
596 if (s > &config[len - 2])
597 panic("vinum: configuration data overflow");
598}
599
600/*
601