Cleanup a couple of serious issues with vinum.
[dragonfly.git] / sys / dev / raid / vinum / vinumio.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1997, 1998
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * This software is distributed under the so-called ``Berkeley
6 * License'':
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by Nan Yang Computer
19 * Services Limited.
20 * 4. Neither the name of the Company nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * This software is provided ``as is'', and any express or implied
25 * warranties, including, but not limited to, the implied warranties of
26 * merchantability and fitness for a particular purpose are disclaimed.
27 * In no event shall the company or contributors be liable for any
28 * direct, indirect, incidental, special, exemplary, or consequential
29 * damages (including, but not limited to, procurement of substitute
30 * goods or services; loss of use, data, or profits; or business
31 * interruption) however caused and on any theory of liability, whether
32 * in contract, strict liability, or tort (including negligence or
33 * otherwise) arising in any way out of the use of this software, even if
34 * advised of the possibility of such damage.
35 *
36 * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $
37 * $FreeBSD: src/sys/dev/vinum/vinumio.c,v 1.52.2.6 2002/05/02 08:43:44 grog Exp $
83d36d43 38 * $DragonFly: src/sys/dev/raid/vinum/vinumio.c,v 1.9 2005/09/16 04:33:14 dillon Exp $
984263bc
MD
39 */
40
1f2de5d4
MD
41#include "vinumhdr.h"
42#include "request.h"
984263bc
MD
43#include <vm/vm_zone.h>
44
45static char *sappend(char *txt, char *s);
46static int drivecmp(const void *va, const void *vb);
47
48/*
49 * Open the device associated with the drive, and set drive's vp.
50 * Return an error number
51 */
52int
53open_drive(struct drive *drive, struct proc *p, int verbose)
54{
55 int devmajor; /* major devs for disk device */
56 int devminor; /* minor devs for disk device */
57 int unit;
58 char *dname;
984263bc
MD
59
60 if (bcmp(drive->devicename, "/dev/", 5)) /* device name doesn't start with /dev */
61 return ENOENT; /* give up */
62 if (drive->flags & VF_OPEN) /* open already, */
63 return EBUSY; /* don't do it again */
64
65 /*
66 * Yes, Bruce, I know this is horrible, but we
67 * don't have a root file system when we first
68 * try to do this. If you can come up with a
69 * better solution, I'd really like it. I'm
70 * just putting it in now to add ammuntion to
71 * moving the system to devfs.
72 */
73 dname = &drive->devicename[5];
74 drive->dev = NULL; /* no device yet */
75
76 /* Find the device */
77 if (bcmp(dname, "ad", 2) == 0) /* IDE disk */
78 devmajor = 116;
79 else if (bcmp(dname, "wd", 2) == 0) /* IDE disk */
80 devmajor = 3;
81 else if (bcmp(dname, "da", 2) == 0)
82 devmajor = 13;
83 else if (bcmp(dname, "vn", 2) == 0)
84 devmajor = 43;
85 else if (bcmp(dname, "md", 2) == 0)
86 devmajor = 95;
87 else if (bcmp(dname, "amrd", 4) == 0) {
88 devmajor = 133;
89 dname += 2;
90 } else if (bcmp(dname, "mlxd", 4) == 0) {
91 devmajor = 131;
92 dname += 2;
93 } else if (bcmp(dname, "idad", 4) == 0) {
94 devmajor = 109;
95 dname += 2;
96 } else if (bcmp(dname, "twed", 4) == 0) { /* 3ware raid */
97 devmajor = 147;
98 dname += 2;
00aab263
JS
99 } else if (bcmp(dname, "ar", 2) == 0) {
100 devmajor = 157;
984263bc
MD
101 } else
102 return ENODEV;
103 dname += 2; /* point past */
104
105 /*
106 * Found the device. We can expect one of
107 * two formats for the rest: a unit number,
108 * then either a partition letter for the
109 * compatiblity partition (e.g. h) or a
110 * slice ID and partition (e.g. s2e).
111 * Create a minor number for each of them.
112 */
113 unit = 0;
114 while ((*dname >= '0') /* unit number */
115 &&(*dname <= '9')) {
116 unit = unit * 10 + *dname - '0';
117 dname++;
118 }
119
120 if (*dname == 's') { /* slice */
121 if (((dname[1] < '1') || (dname[1] > '4')) /* invalid slice */
83d36d43 122 ||((dname[2] < 'a') || (dname[2] > 'p'))) /* or invalid partition */
984263bc 123 return ENODEV;
83d36d43 124 devminor = dkmakeminor(unit, dname[1] - '0' + 1, (dname[2] - 'a'));
984263bc 125 } else { /* compatibility partition */
83d36d43 126 if ((*dname < 'a') || (*dname > 'p')) /* or invalid partition */
984263bc 127 return ENODEV;
83d36d43 128 devminor = dkmakeminor(unit, 0, (dname[0] - 'a'));
984263bc
MD
129 }
130
83d36d43
MD
131 /*
132 * Disallow partition c
133 */
134 if ((((devminor >> 17) & 0x08) | (devminor & 7)) == 2)
984263bc
MD
135 return ENOTTY; /* not buying that */
136
e4c9c0c8 137 drive->dev = udev2dev(makeudev(devmajor, devminor), 0);
984263bc
MD
138
139 drive->dev->si_iosize_max = DFLTPHYS;
e4c9c0c8 140 if (dev_is_good(drive->dev))
335dda38 141 drive->lasterror = dev_dopen(drive->dev, FWRITE, 0, NULL);
e4c9c0c8
MD
142 else
143 drive->lasterror = ENOENT;
984263bc
MD
144
145 if (drive->lasterror != 0) { /* failed */
146 drive->state = drive_down; /* just force it down */
147 if (verbose)
148 log(LOG_WARNING,
149 "vinum open_drive %s: failed with error %d\n",
150 drive->devicename, drive->lasterror);
151 } else
152 drive->flags |= VF_OPEN; /* we're open now */
153
154 return drive->lasterror;
155}
156
157/*
158 * Set some variables in the drive struct
159 * in more convenient form. Return error indication
160 */
161int
162set_drive_parms(struct drive *drive)
163{
164 drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
165 drive->secsperblock = drive->blocksize /* number of sectors per block */
166 / drive->partinfo.disklab->d_secsize;
167
168 /* Now update the label part */
169 bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
170 getmicrotime(&drive->label.date_of_birth); /* and current time */
171 drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
172 *((u_int64_t) drive->partinfo.disklab->d_secsize);
173#if VINUMDEBUG
174 if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
175 drive->label.drive_size *= 100;
176#endif
177
178 /* number of sectors available for subdisks */
179 drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
180
181 /*
182 * Bug in 3.0 as of January 1998: you can open
183 * non-existent slices. They have a length of 0.
184 */
185 if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
186 set_drive_state(drive->driveno, drive_down, setstate_force);
187 drive->lasterror = ENOSPC;
188 return ENOSPC;
189 }
190 drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
191 drive->freelist = (struct drive_freelist *)
192 Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
193 if (drive->freelist == NULL) /* can't malloc, dammit */
194 return ENOSPC;
195 drive->freelist_entries = 1; /* just (almost) the complete drive */
196 drive->freelist[0].offset = DATASTART; /* starts here */
197 drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
198 if (drive->label.name[0] != '\0') /* got a name */
199 set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
200 else /* we know about it, but that's all */
201 drive->state = drive_referenced;
202 return 0;
203}
204
205/*
206 * Initialize a drive: open the device and add device
207 * information
208 */
209int
210init_drive(struct drive *drive, int verbose)
211{
212 if (drive->devicename[0] != '/') {
213 drive->lasterror = EINVAL;
214 log(LOG_ERR, "vinum: Can't open drive without drive name\n");
215 return EINVAL;
216 }
217 drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */
218 if (drive->lasterror)
219 return drive->lasterror;
220
335dda38
MD
221 drive->lasterror = dev_dioctl(
222 drive->dev,
984263bc
MD
223 DIOCGPART,
224 (caddr_t) & drive->partinfo,
225 FREAD,
41c20dac 226 curthread);
984263bc
MD
227 if (drive->lasterror) {
228 if (verbose)
229 log(LOG_WARNING,
230 "vinum open_drive %s: Can't get partition information, drive->lasterror %d\n",
231 drive->devicename,
232 drive->lasterror);
233 close_drive(drive);
234 return drive->lasterror;
235 }
236 if (drive->partinfo.part->p_fstype != FS_VINUM) { /* not Vinum */
237 drive->lasterror = EFTYPE;
238 if (verbose)
239 log(LOG_WARNING,
240 "vinum open_drive %s: Wrong partition type for vinum\n",
241 drive->devicename);
242 close_drive(drive);
243 return EFTYPE;
244 }
245 return set_drive_parms(drive); /* set various odds and ends */
246}
247
248/* Close a drive if it's open. */
249void
250close_drive(struct drive *drive)
251{
252 LOCKDRIVE(drive); /* keep the daemon out */
253 if (drive->flags & VF_OPEN)
254 close_locked_drive(drive); /* and close it */
255 if (drive->state > drive_down) /* if it's up */
256 drive->state = drive_down; /* make sure it's down */
257 unlockdrive(drive);
258}
259
260/*
261 * Real drive close code, called with drive already locked.
262 * We have also checked that the drive is open. No errors.
263 */
264void
265close_locked_drive(struct drive *drive)
266{
267 /*
268 * If we can't access the drive, we can't flush
269 * the queues, which spec_close() will try to
270 * do. Get rid of them here first.
271 */
335dda38 272 drive->lasterror = dev_dclose(drive->dev, 0, 0, NULL);
984263bc
MD
273 drive->flags &= ~VF_OPEN; /* no longer open */
274}
275
276/*
277 * Remove drive from the configuration.
278 * Caller must ensure that it isn't active.
279 */
280void
281remove_drive(int driveno)
282{
283 struct drive *drive = &vinum_conf.drive[driveno];
284 struct vinum_hdr *vhdr; /* buffer for header */
285 int error;
286
287 if (drive->state > drive_referenced) { /* real drive */
288 if (drive->state == drive_up) {
289 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
290 CHECKALLOC(vhdr, "Can't allocate memory");
291 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
292 if (error)
293 drive->lasterror = error;
294 else {
295 vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
296 write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
297 }
298 Free(vhdr);
299 }
300 free_drive(drive); /* close it and free resources */
301 save_config(); /* and save the updated configuration */
302 }
303}
304
305/*
306 * Transfer drive data. Usually called from one of these defines;
307 * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
308 * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
309 *
310 * length and offset are in bytes, but must be multiples of sector
311 * size. The function *does not check* for this condition, and
312 * truncates ruthlessly.
313 * Return error number
314 */
315int
316driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
317{
318 int error;
319 struct buf *bp;
320
321 error = 0; /* to keep the compiler happy */
322 while (length) { /* divide into small enough blocks */
323 int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
324
325 bp = geteblk(len); /* get a buffer header */
326 bp->b_flags = flag;
327 bp->b_dev = drive->dev; /* device */
328 bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
329 bp->b_saveaddr = bp->b_data;
330 bp->b_data = buf;
331 bp->b_bcount = len;
332 BUF_STRATEGY(bp, 0); /* initiate the transfer */
333 error = biowait(bp);
334 bp->b_data = bp->b_saveaddr;
335 bp->b_flags |= B_INVAL | B_AGE;
336 bp->b_flags &= ~B_ERROR;
337 brelse(bp);
338 if (error)
339 break;
340 length -= len; /* update pointers */
341 buf += len;
342 offset += len;
343 }
344 return error;
345}
346
347/*
348 * Check a drive for a vinum header. If found,
349 * update the drive information. We come here
350 * with a partially populated drive structure
351 * which includes the device name.
352 *
353 * Return information on what we found.
354 *
355 * This function is called from two places: check_drive,
356 * which wants to find out whether the drive is a
357 * Vinum drive, and config_drive, which asserts that
358 * it is a vinum drive. In the first case, we don't
359 * print error messages (verbose==0), in the second
360 * we do (verbose==1).
361 */
362enum drive_label_info
363read_drive_label(struct drive *drive, int verbose)
364{
365 int error;
366 int result; /* result of our search */
367 struct vinum_hdr *vhdr; /* and as header */
368
369 error = init_drive(drive, 0); /* find the drive */
370 if (error) /* find the drive */
371 return DL_CANT_OPEN; /* not ours */
372
373 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
374 CHECKALLOC(vhdr, "Can't allocate memory");
375
376 drive->state = drive_up; /* be optimistic */
377 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
378 if (vhdr->magic == VINUM_MAGIC) { /* ours! */
379 if (drive->label.name[0] /* we have a name for this drive */
380 &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
381 drive->lasterror = EINVAL;
382 result = DL_WRONG_DRIVE; /* it's the wrong drive */
383 drive->state = drive_unallocated; /* put it back, it's not ours */
384 } else
385 result = DL_OURS;
386 /*
387 * We copy the drive anyway so that we have
388 * the correct name in the drive info. This
389 * may not be the name specified
390 */
391 drive->label = vhdr->label; /* put in the label information */
392 } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
393 result = DL_DELETED_LABEL; /* and return the info */
394 else
395 result = DL_NOT_OURS; /* we could have it, but we don't yet */
396 Free(vhdr); /* that's all. */
397 return result;
398}
399
400/*
401 * Check a drive for a vinum header. If found,
402 * read configuration information from the drive and
403 * incorporate the data into the configuration.
404 *
405 * Return drive number.
406 */
407struct drive *
408check_drive(char *devicename)
409{
410 int driveno;
411 int i;
412 struct drive *drive;
413
414 driveno = find_drive_by_dev(devicename, 1); /* if entry doesn't exist, create it */
415 drive = &vinum_conf.drive[driveno]; /* and get a pointer */
416
417 if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
418 for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
419 if ((i != driveno) /* not this drive */
420 &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
421 &&(strcmp(DRIVE[i].label.name,
422 DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
423 struct drive *mydrive = &DRIVE[i];
424
425 if (mydrive->devicename[0] == '/') { /* we know a device name for it */
426 /*
427 * set an error, but don't take the
428 * drive down: that would cause unneeded
429 * error messages.
430 */
431 drive->lasterror = EEXIST;
432 break;
433 } else { /* it's just a place holder, */
434 int sdno;
435
436 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
437 if ((SD[sdno].driveno == i) /* it's pointing to this one, */
438 &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
439 SD[sdno].driveno = drive->driveno; /* point to the one we found */
440 update_sd_state(sdno); /* and update its state */
441 }
442 }
443 bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
444 }
445 }
446 }
447 } else {
448 if (drive->lasterror == 0)
449 drive->lasterror = ENODEV;
450 close_drive(drive);
451 drive->state = drive_down;
452 }
453 return drive;
454}
455
456static char *
457sappend(char *txt, char *s)
458{
459 while ((*s++ = *txt++) != 0);
460 return s - 1;
461}
462
463void
464format_config(char *config, int len)
465{
466 int i;
467 int j;
468 char *s = config;
469 char *configend = &config[len];
470
471 bzero(config, len);
472
473 /* First write the volume configuration */
474 for (i = 0; i < vinum_conf.volumes_allocated; i++) {
475 struct volume *vol;
476
477 vol = &vinum_conf.volume[i];
478 if ((vol->state > volume_uninit)
479 && (vol->name[0] != '\0')) { /* paranoia */
480 snprintf(s,
481 configend - s,
482 "volume %s state %s",
483 vol->name,
484 volume_state(vol->state));
485 while (*s)
486 s++; /* find the end */
487 if (vol->preferred_plex >= 0) /* preferences, */
488 snprintf(s,
489 configend - s,
490 " readpol prefer %s",
491 vinum_conf.plex[vol->preferred_plex].name);
492 while (*s)
493 s++; /* find the end */
494 s = sappend("\n", s);
495 }
496 }
497
498 /* Then the plex configuration */
499 for (i = 0; i < vinum_conf.plexes_allocated; i++) {
500 struct plex *plex;
501
502 plex = &vinum_conf.plex[i];
503 if ((plex->state > plex_referenced)
504 && (plex->name[0] != '\0')) { /* paranoia */
505 snprintf(s,
506 configend - s,
507 "plex name %s state %s org %s ",
508 plex->name,
509 plex_state(plex->state),
510 plex_org(plex->organization));
511 while (*s)
512 s++; /* find the end */
513 if (isstriped(plex)) {
514 snprintf(s,
515 configend - s,
516 "%ds ",
517 (int) plex->stripesize);
518 while (*s)
519 s++; /* find the end */
520 }
521 if (plex->volno >= 0) /* we have a volume */
522 snprintf(s,
523 configend - s,
524 "vol %s ",
525 vinum_conf.volume[plex->volno].name);
526 while (*s)
527 s++; /* find the end */
528 for (j = 0; j < plex->subdisks; j++) {
529 snprintf(s,
530 configend - s,
531 " sd %s",
532 vinum_conf.sd[plex->sdnos[j]].name);
533 }
534 s = sappend("\n", s);
535 }
536 }
537
538 /* And finally the subdisk configuration */
539 for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
540 struct sd *sd;
541 char *drivename;
542
543 sd = &SD[i];
544 if ((sd->state != sd_referenced)
545 && (sd->state != sd_unallocated)
546 && (sd->name[0] != '\0')) { /* paranoia */
547 drivename = vinum_conf.drive[sd->driveno].label.name;
548 /*
549 * XXX We've seen cases of dead subdisks
550 * which don't have a drive. If we let them
551 * through here, the drive name is null, so
552 * they get the drive named 'plex'.
553 *
554 * This is a breakage limiter, not a fix.
555 */
556 if (drivename[0] == '\0')
557 drivename = "*invalid*";
558 snprintf(s,
559 configend - s,
560 "sd name %s drive %s plex %s len %llus driveoffset %llus state %s",
561 sd->name,
562 drivename,
563 vinum_conf.plex[sd->plexno].name,
564 (unsigned long long) sd->sectors,
565 (unsigned long long) sd->driveoffset,
566 sd_state(sd->state));
567 while (*s)
568 s++; /* find the end */
569 if (sd->plexno >= 0)
570 snprintf(s,
571 configend - s,
572 " plexoffset %llds",
573 (long long) sd->plexoffset);
574 else
575 snprintf(s, configend - s, " detached");
576 while (*s)
577 s++; /* find the end */
578 if (sd->flags & VF_RETRYERRORS) {
579 snprintf(s, configend - s, " retryerrors");
580 while (*s)
581 s++; /* find the end */
582 }
583 snprintf(s, configend - s, " \n");
584 while (*s)
585 s++; /* find the end */
586 }
587 }
588 if (s > &config[len - 2])
589 panic("vinum: configuration data overflow");
590}
591
592/*
593