nrelease - fix/improve livecd
[dragonfly.git] / sys / dev / raid / vinum / vinumvar.h
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1997, 1998, 1999
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6 *
7 * Written by Greg Lehey
8 *
9 * This software is distributed under the so-called ``Berkeley
10 * License'':
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Nan Yang Computer
23 * Services Limited.
24 * 4. Neither the name of the Company nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided ``as is'', and any express or implied
29 * warranties, including, but not limited to, the implied warranties of
30 * merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall the company or contributors be liable for any
32 * direct, indirect, incidental, special, exemplary, or consequential
33 * damages (including, but not limited to, procurement of substitute
34 * goods or services; loss of use, data, or profits; or business
35 * interruption) however caused and on any theory of liability, whether
36 * in contract, strict liability, or tort (including negligence or
37 * otherwise) arising in any way out of the use of this software, even if
38 * advised of the possibility of such damage.
984263bc
MD
39 */
40
29bcf169 41#ifdef _KERNEL
752b2d38 42#include "opt_vinum.h"
29bcf169 43#endif
752b2d38 44
984263bc 45#include <sys/time.h>
1f2de5d4 46#include "vinumstate.h"
984263bc 47
c2fcd54e
MD
48/*
49 * A disk block number or offset
50 */
51typedef int64_t vinum_off_t;
52
984263bc
MD
53/*
54 * Some configuration maxima. They're an enum because
55 * we can't define global constants. Sorry about that.
56 *
57 * These aren't as bad as they look: most of them are soft limits.
58 */
59
60#define VINUMROOT
61enum constants {
7c8f38d4
MD
62 VINUM_HEADER = 512, /* size of header on disk */
63 MAXCONFIGLINE = 1024, /* maximum size of one config line */
64 MINVINUMSLICE = 1048576, /* minimum size of a slice */
65
7c8f38d4
MD
66 ROUND_ROBIN_READPOL = -1, /* round robin read policy */
67
68 /*
69 * type field in minor number
70 */
71 VINUM_VOLUME_TYPE = 0,
72 VINUM_PLEX_TYPE = 1,
73 VINUM_SD_TYPE = 2,
74 VINUM_DRIVE_TYPE = 3,
75 VINUM_SUPERDEV_TYPE = 4, /* super device. */
76 VINUM_RAWPLEX_TYPE = 5, /* anonymous plex */
77 VINUM_RAWSD_TYPE = 6, /* anonymous subdisk */
78
79 /*
80 * Shifts for the individual fields in the device
81 */
82 VINUM_TYPE_SHIFT = 28,
83 VINUM_VOL_SHIFT = 0,
84 VINUM_PLEX_SHIFT = 16,
85 VINUM_SD_SHIFT = 20,
86 VINUM_VOL_WIDTH = 8,
87 VINUM_PLEX_WIDTH = 3,
88 VINUM_SD_WIDTH = 8,
89
90 /*
91 * Shifts for the second half of raw plex and
92 * subdisk numbers
93 */
94 VINUM_RAWPLEX_SHIFT = 8, /* shift the second half this much */
95 VINUM_RAWPLEX_WIDTH = 12, /* width of second half */
96
97 MAJORDEV_SHIFT = 8,
98
99 MAXPLEX = 8, /* max number of plexes in a volume */
100 MAXSD = 256, /* max number of subdisks in a plex */
101 MAXDRIVENAME = 32, /* max length of a device name */
102 MAXSDNAME = 64, /* max length of a subdisk name */
103 MAXPLEXNAME = 64, /* max length of a plex name */
104 MAXVOLNAME = 64, /* max length of a volume name */
105 MAXNAME = 64, /* max length of any name */
106
107
108 /*
109 * Define a minor device number.
110 * This is not used directly; instead, it's
111 * called by the other macros.
112 */
113#define VINUMMINOR(v,p,s,t) ( (v << VINUM_VOL_SHIFT) \
984263bc
MD
114 | (p << VINUM_PLEX_SHIFT) \
115 | (s << VINUM_SD_SHIFT) \
116 | (t << VINUM_TYPE_SHIFT) )
117
e4c9c0c8 118/* Create device minor numbers */
984263bc 119
e4c9c0c8 120#ifdef _KERNEL
984263bc 121
e4c9c0c8 122#define VINUMDEV(v,p,s,t) \
d736a600 123 VINUMMINOR (v, p, s, t)
e4c9c0c8
MD
124
125#define VINUM_PLEX(p) \
d736a600 126 ((VINUM_RAWPLEX_TYPE << VINUM_TYPE_SHIFT) \
e4c9c0c8 127 | (p & 0xff) \
d736a600 128 | ((p & ~0xff) << 8))
e4c9c0c8
MD
129
130#define VINUM_SD(s) \
d736a600 131 ((VINUM_RAWSD_TYPE << VINUM_TYPE_SHIFT) \
e4c9c0c8 132 | (s & 0xff) \
d736a600 133 | ((s & ~0xff) << 8))
e4c9c0c8
MD
134
135#endif
984263bc
MD
136
137 /* Create a bit mask for x bits */
138#define MASK(x) ((1 << (x)) - 1)
139
140 /* Create a raw block device minor number */
7c8f38d4
MD
141#define VINUMRMINOR(d,t) \
142 ( ((d & MASK(VINUM_VOL_WIDTH)) << VINUM_VOL_SHIFT) \
143 | ((d & ~MASK(VINUM_VOL_WIDTH)) << \
144 (VINUM_PLEX_SHIFT + VINUM_VOL_WIDTH)) \
145 | (t << VINUM_TYPE_SHIFT) )
984263bc 146
984263bc
MD
147 /* extract device type */
148#define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 7)
149
150 /*
151 * This mess is used to catch people who compile
152 * a debug vinum(8) and non-debug kernel module,
153 * or the other way round.
154 */
155
156#ifdef VINUMDEBUG
7c8f38d4
MD
157
158/* superdevice number */
159#define VINUM_SUPERDEV VINUMMINOR(1, 0, 0, VINUM_SUPERDEV_TYPE)
160
161/* non-debug superdevice number */
162#define VINUM_WRONGSUPERDEV VINUMMINOR(2, 0, 0, VINUM_SUPERDEV_TYPE)
163
984263bc 164#else
984263bc 165
7c8f38d4
MD
166/* superdevice number */
167#define VINUM_SUPERDEV VINUMMINOR(2, 0, 0, VINUM_SUPERDEV_TYPE)
984263bc 168
7c8f38d4
MD
169/* debug superdevice number */
170#define VINUM_WRONGSUPERDEV VINUMMINOR(1, 0, 0, VINUM_SUPERDEV_TYPE)
984263bc 171
7c8f38d4
MD
172#endif
173
174/* daemon superdevice number */
175#define VINUM_DAEMON_DEV VINUMMINOR(0, 0, 0, VINUM_SUPERDEV_TYPE)
176
177 /*
178 * the number of object entries to cater for initially, and also the
179 * value by which they are incremented. It doesn't take long
180 * to extend them, so theoretically we could start with 1 of each, but
181 * it's untidy to allocate such small areas. These values are
182 * probably too small.
183 */
184
185 INITIAL_DRIVES = 4,
186 INITIAL_VOLUMES = 4,
187 INITIAL_PLEXES = 8,
188 INITIAL_SUBDISKS = 16,
189 INITIAL_SUBDISKS_IN_PLEX = 4, /* num subdisks to alloc to a plex */
190 INITIAL_SUBDISKS_IN_DRIVE = 4, /* num subdisks to alloc to a drive */
191 INITIAL_DRIVE_FREELIST = 16, /* num entries in drive freelist */
192 PLEX_REGION_TABLE_SIZE = 8, /* num entries in plex region tables */
193 PLEX_LOCKS = 256, /* num locks to alloc to a plex */
194 MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */
195 DEFAULT_REVIVE_BLOCKSIZE = 65536,/* default revive block size */
196 VINUMHOSTNAMELEN = 32, /* host name field in label */
984263bc
MD
197};
198
199/* device numbers */
200
201/*
7c8f38d4
MD
202 * 31 30 28 27 20 19 18 16 15 8 7 0
203 * |---------------------------------------------------------------------|
204 * |X | Type | Subdisk number | X| Plex | Major | volno |
205 * |---------------------------------------------------------------------|
984263bc 206 *
7c8f38d4 207 * 0x2 03 1 19 06
984263bc
MD
208 *
209 * The fields in the minor number are interpreted as follows:
210 *
211 * Volume: Only type and volume number are relevant
7c8f38d4
MD
212 * Plex in volume: type, plex number in volume and volume number
213 * are relevant
984263bc
MD
214 * raw plex: type, plex number is made of bits 27-16 and 7-0
215 * raw subdisk: type, subdisk number is made of bits 27-16 and 7-0
216 */
217
7c8f38d4
MD
218#if 0
219
984263bc
MD
220/* This doesn't get used. Consider removing it. */
221struct devcode {
7c8f38d4
MD
222 /*
223 * CARE. These fields assume a big-endian word. On a
224 * little-endian system, they're the wrong way around
225 */
226 unsigned volume:8; /* up to 256 volumes */
227 unsigned major:8; /* major number fits */
228 unsigned plex:3; /* up to 8 plexes per volume */
229 unsigned unused:1; /* up for grabs */
230 unsigned sd:8; /* up to 256 subdisks per plex */
231 unsigned type:3; /* type of object */
232 /*
233 * type field
234 VINUM_VOLUME = 0,
235 VINUM_PLEX = 1,
236 VINUM_SUBDISK = 2,
237 VINUM_DRIVE = 3,
238 VINUM_SUPERDEV = 4,
239 VINUM_RAWPLEX = 5,
240 VINUM_RAWSD = 6 */
241 unsigned signbit:1; /* to make 32 bits */
984263bc
MD
242};
243
7c8f38d4
MD
244#endif
245
d736a600 246#define VINUM_BASE "vinum/"
984263bc
MD
247#define VINUM_DIR "/dev/vinum"
248
249/*
250 * These definitions help catch
251 * userland/kernel mismatches.
252 */
0dcae153 253#ifdef VINUMDEBUG
7c8f38d4
MD
254
255/* normal super device */
d736a600
MD
256#define VINUM_WRONGSUPERDEV_NAME VINUM_DIR "/control"
257#define VINUM_WRONGSUPERDEV_BASE VINUM_BASE "control"
7c8f38d4
MD
258
259/* debug super device */
d736a600
MD
260#define VINUM_SUPERDEV_NAME VINUM_DIR "/Control"
261#define VINUM_SUPERDEV_BASE VINUM_BASE "Control"
7c8f38d4 262
984263bc 263#else
7c8f38d4
MD
264
265/* debug super device */
d736a600
MD
266#define VINUM_WRONGSUPERDEV_NAME VINUM_DIR "/Control"
267#define VINUM_WRONGSUPERDEV_BASE VINUM_BASE "Control"
7c8f38d4
MD
268
269/* normal super device */
d736a600
MD
270#define VINUM_SUPERDEV_NAME VINUM_DIR "/control"
271#define VINUM_SUPERDEV_BASE VINUM_BASE "control"
7c8f38d4 272
984263bc 273#endif
7c8f38d4
MD
274
275/* super device for daemon only */
d736a600
MD
276#define VINUM_DAEMON_DEV_NAME VINUM_DIR "/controld"
277#define VINUM_DAEMON_DEV_BASE VINUM_BASE "controld"
984263bc
MD
278
279/*
280 * Flags for all objects. Most of them only apply to
281 * specific objects, but we have space for all in any
282 * 32 bit flags word.
283 */
284enum objflags {
7c8f38d4
MD
285 VF_LOCKED = 1, /* locked access to this object */
286 VF_LOCKING = 2, /* we want access to this object */
287 VF_OPEN = 4, /* object has openers */
288 VF_WRITETHROUGH = 8, /* volume: write through */
289 VF_INITED = 0x10, /* unit has been initialized */
290
291 /* 0x20 unused, was: VF_WLABEL: label area is writable */
292 VF_LABELLING = 0x40, /* unit is currently being labelled */
293 VF_WANTED = 0x80, /* waiting to obtain a lock */
294 VF_RAW = 0x100, /* raw volume (no file system) */
295 VF_LOADED = 0x200, /* module is loaded */
296 VF_CONFIGURING = 0x400, /* someone is changing the config */
297 VF_WILL_CONFIGURE = 0x800, /* someone wants to change the config */
298 VF_CONFIG_INCOMPLETE = 0x1000, /* not finished changing the config */
299 VF_CONFIG_SETUPSTATE = 0x2000, /* set a vol up if all plexes empty */
300 VF_READING_CONFIG = 0x4000, /* reading config database from disk */
301 VF_FORCECONFIG = 0x8000, /* config drives even with diff names */
302 VF_NEWBORN = 0x10000, /* for objects: we've just created it */
303 VF_CONFIGURED = 0x20000, /* for drives: we read the config */
304 VF_STOPPING = 0x40000, /* for vinum_conf: stop on last close */
305
306 VF_DAEMONOPEN = 0x80000, /* the daemon has us open (only
307 * superdev) */
308
309 VF_CREATED = 0x100000, /* for vols: freshly created,
310 * more then new */
311 VF_HOTSPARE = 0x200000, /* for drives: use as hot spare */
312 VF_RETRYERRORS = 0x400000, /* don't down subdisks on I/O errors */
984263bc
MD
313};
314
315/* Global configuration information for the vinum subsystem */
316struct _vinum_conf {
7c8f38d4
MD
317 /* Pointers to vinum structures */
318 struct drive *drive;
319 struct sd *sd;
320 struct plex *plex;
321 struct volume *volume;
322
323 /* the number allocated */
324 int drives_allocated;
325 int subdisks_allocated;
326 int plexes_allocated;
327 int volumes_allocated;
328
329 /* and the number currently in use */
330 int drives_used;
331 int subdisks_used;
332 int plexes_used;
333 int volumes_used;
334
335 int flags;
336
337#define VINUM_MAXACTIVE 30000 /* max number of active requests */
338 int active; /* current number of requests outstanding */
339 int maxactive; /* max number of requests ever outstanding */
0dcae153 340#ifdef VINUMDEBUG
7c8f38d4
MD
341 struct request *lastrq;
342 struct bio *lastbio;
984263bc 343#endif
7c8f38d4 344 int physbufs;
984263bc
MD
345};
346
347/* Use these defines to simplify code */
348#define DRIVE vinum_conf.drive
349#define SD vinum_conf.sd
350#define PLEX vinum_conf.plex
351#define VOL vinum_conf.volume
352#define VFLAGS vinum_conf.flags
353
354/*
355 * Slice header
356 *
357 * Vinum drives start with this structure:
358 *
359 *\ Sector
360 * |--------------------------------------|
361 * | PDP-11 memorial boot block | 0
362 * |--------------------------------------|
363 * | Disk label, maybe | 1
364 * |--------------------------------------|
365 * | Slice definition (vinum_hdr) | 8
366 * |--------------------------------------|
367 * | |
368 * | Configuration info, first copy | 9
369 * | |
370 * |--------------------------------------|
371 * | |
372 * | Configuration info, second copy | 9 + size of config
373 * | |
374 * |--------------------------------------|
375 */
376
7c8f38d4
MD
377/*
378 * Sizes and offsets of our information
379 */
984263bc 380enum {
7c8f38d4
MD
381 VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
382 VINUMHEADERLEN = 512, /* size of vinum label */
383 VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
384 MAXCONFIG = 65536, /* and size of config copy */
385
386 /* this is where the data starts */
387 DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE
984263bc
MD
388};
389
390/*
391 * hostname is 256 bytes long, but we don't need to shlep
392 * multiple copies in vinum. We use the host name just
393 * to identify this system, and 32 bytes should be ample
394 * for that purpose
395 */
396
397struct vinum_label {
7c8f38d4
MD
398 char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
399 char name[MAXDRIVENAME]; /* our name of the drive */
400 struct timeval date_of_birth; /* the time it was created */
401 struct timeval last_update; /* and the time of last update */
402 /*
403 * total size in bytes of the drive. This value
404 * includes the headers.
405 */
406 off_t drive_size;
984263bc
MD
407};
408
409struct vinum_hdr {
7c8f38d4
MD
410 uint64_t magic; /* we're long on magic numbers */
411
412 /*
413 * Size in bytes of each copy of the
414 * configuration info. This must be a multiple
415 * of the sector size.
416 */
417 int config_length;
418 struct vinum_label label; /* unique label */
984263bc
MD
419};
420
7c8f38d4
MD
421/* should be this */
422#define VINUM_MAGIC 22322600044678729LL
423
424/* becomes this after obliteration */
425#define VINUM_NOMAGIC 22322600044678990LL
426
984263bc
MD
427/* Information returned from read_drive_label */
428enum drive_label_info {
7c8f38d4
MD
429 DL_CANT_OPEN, /* invalid partition */
430 DL_NOT_OURS, /* valid part, but no vinum label */
431 DL_DELETED_LABEL, /* valid part, deleted label found */
432 DL_WRONG_DRIVE, /* drive name doesn't match */
433 DL_OURS /* valid partition and label found */
984263bc
MD
434};
435
436/*** Drive definitions ***/
437/*
438 * A drive corresponds to a disk slice. We use a different term to show
439 * the difference in usage: it doesn't have to be a slice, and could
440 * theoretically be a complete, unpartitioned disk
441 */
442
443struct drive {
7c8f38d4
MD
444 char devicename[MAXDRIVENAME]; /* name of the slice it's on */
445 enum drivestate state; /* current state */
446 int flags; /* flags */
447 int subdisks_allocated; /* number of entries in sd */
448 int subdisks_used; /* and the number used */
449 int blocksize; /* size of fs blocks */
450 int pid; /* of locker */
451 u_int64_t sectors_available; /* number of sectors still available */
452 int secsperblock;
453 int lasterror; /* last error on drive */
454 int driveno; /* index of drive in vinum_conf */
455 int opencount; /* number of up subdisks */
456 u_int64_t reads; /* number of reads on this drive */
457 u_int64_t writes; /* number of writes on this drive */
458 u_int64_t bytes_read; /* number of bytes read */
459 u_int64_t bytes_written; /* number of bytes written */
460 struct vinum_label label; /* and the label information */
461#define DRIVE_MAXACTIVE 30000 /* maximum number of active requests */
462 int active; /* current number of reqs outstanding */
463 int maxactive; /* max num of reqs ever outstanding */
464 int freelist_size; /* entries alloced in free list */
465 int freelist_entries; /* entries used in free list */
466 struct drive_freelist { /* sorted list of free space on drive */
467 u_int64_t offset; /* offset of entry */
468 u_int64_t sectors; /* and length in sectors */
469 } *freelist;
470 struct partinfo partinfo; /* partition information */
471 /* XXX kludge until we get this struct cleaned up */
dcd1a9c7 472#ifdef _KERNEL
7c8f38d4
MD
473 struct vnode *vp;
474 struct cdev *dev;
984263bc 475#else
d736a600
MD
476 void *vp_dummy;
477 void *dev_dummy;
984263bc
MD
478#endif
479#ifdef VINUMDEBUG
7c8f38d4
MD
480 char lockfilename[16]; /* locked with file */
481 int lockline; /* and the line number */
984263bc
MD
482#endif
483};
484
485/*** Subdisk definitions ***/
486
487struct sd {
7c8f38d4
MD
488 char name[MAXSDNAME]; /* name of subdisk */
489 enum sdstate state; /* state */
490 int flags;
491 int lasterror; /* last error occurred */
492 /* offsets in blocks */
493 int64_t driveoffset; /* offset on drive */
d736a600
MD
494#ifdef _KERNEL
495 cdev_t sd_dev;
496#else
497 void *sd_dev_dummy;
498#endif
7c8f38d4
MD
499
500 /*
501 * plexoffset is the offset from the beginning
502 * of the plex to the very first part of the
503 * subdisk, in sectors. For striped, RAID-4 and
504 * RAID-5 plexes, only the first stripe is
505 * located at this offset
506 */
507 int64_t plexoffset; /* offset in plex */
508 u_int64_t sectors; /* and length in sectors */
509 int plexno; /* index of plex, if it belongs */
510 int driveno; /* index of the drive */
511 int sdno; /* our index in vinum_conf */
512 int plexsdno; /* and our number in our plex */
513 /* (undefined if no plex) */
514 u_int64_t reads; /* number of reads on this subdisk */
515 u_int64_t writes; /* number of writes on this subdisk */
516 u_int64_t bytes_read; /* number of bytes read */
517 u_int64_t bytes_written; /* number of bytes written */
518 /* revive parameters */
519 u_int64_t revived; /* blkno of current revive request */
520 int revive_blocksize; /* revive block size (bytes) */
521 int revive_interval; /* and time to wait between transfers */
522 pid_t reviver; /* PID of reviving process */
523 /* init parameters */
524 u_int64_t initialized; /* blkno of current init request */
525 int init_blocksize; /* init block size (bytes) */
526 int init_interval; /* time to wait between transfers */
527 struct request *waitlist; /* list of reqs waiting on revive op */
984263bc
MD
528};
529
530/*** Plex definitions ***/
531
532/* kinds of plex organization */
533enum plexorg {
7c8f38d4
MD
534 plex_disorg, /* disorganized */
535 plex_concat, /* concatenated plex */
536 plex_striped, /* striped plex */
537 plex_raid4, /* RAID4 plex */
538 plex_raid5 /* RAID5 plex */
984263bc
MD
539};
540
541/* Recognize plex organizations */
7c8f38d4
MD
542/* RAID 1, 4 or 5 */
543#define isstriped(p) (p->organization >= plex_striped)
544
545/* RAID 4 or 5 */
546#define isparity(p) (p->organization >= plex_raid4)
984263bc
MD
547
548struct plex {
7c8f38d4
MD
549 char name[MAXPLEXNAME]; /* name of plex */
550 enum plexorg organization; /* Plex organization */
551 enum plexstate state; /* and current state */
d736a600
MD
552#ifdef _KERNEL
553 cdev_t plex_dev;
554#else
555 void *plex_dev_dummy;
556#endif
7c8f38d4
MD
557 u_int64_t length; /* total length of plex (sectors) */
558 int flags;
559 int stripesize; /* size of stripe or raid band,
560 * in sectors */
561 int subdisks; /* number of associated subdisks */
562 int subdisks_allocated; /* number of subdisks allocated
563 * space for */
564 int *sdnos; /* list of component subdisks */
565 int plexno; /* index of plex in vinum_conf */
566 int volno; /* index of volume */
567 int volplexno; /* number of plex in volume */
568 /* Statistics */
569 u_int64_t reads; /* number of reads on this plex */
570 u_int64_t writes; /* number of writes on this plex */
571 u_int64_t bytes_read; /* number of bytes read */
572 u_int64_t bytes_written; /* number of bytes written */
573 u_int64_t recovered_reads; /* number of recovered read
574 * operations */
575 u_int64_t degraded_writes; /* number of degraded writes */
576 u_int64_t parityless_writes; /* number of parityless writes */
577 u_int64_t multiblock; /* requests that needed more than
578 * one block */
579 u_int64_t multistripe; /* requests that needed more than
580 * one stripe */
581 int sddowncount; /* number of subdisks down */
582
583 /* Lock information */
584 int usedlocks; /* number currently in use */
585 int lockwaits; /* and number of waits for locks */
586 off_t checkblock; /* block number for parity op */
587 struct rangelock *lock; /* ranges of locked addresses */
984263bc
MD
588};
589
590/*** Volume definitions ***/
591
592/* Address range definitions, for locking volumes */
593struct rangelock {
c2fcd54e 594 vinum_off_t stripe; /* address + 1 of the range being locked */
7c8f38d4 595 struct buf *bp; /* user's buffer pointer */
984263bc
MD
596};
597
598struct volume {
7c8f38d4
MD
599 char name[MAXVOLNAME]; /* name of volume */
600 enum volumestate state; /* current state */
601 int plexes; /* number of plexes */
602 int preferred_plex; /* plex to read from, -1 for
603 * round-robin */
d736a600
MD
604#ifdef _KERNEL
605 cdev_t vol_dev;
606#else
607 void *vol_dev_dummy;
608#endif
609
7c8f38d4
MD
610 /*
611 * index of plex used for last read, for
612 * round-robin.
613 */
614 int last_plex_read;
615 int volno; /* volume number */
616 int flags; /* status and configuration flags */
617 int openflags; /* flags supplied to last open(2) */
618 u_int64_t size; /* size of volume */
619 int blocksize; /* logical block size */
620 int active; /* number of outstanding
621 * requests active */
622 int subops; /* and the number of suboperations */
623 /* Statistics */
624 u_int64_t bytes_read; /* number of bytes read */
625 u_int64_t bytes_written; /* number of bytes written */
626 u_int64_t reads; /* number of reads on this volume */
627 u_int64_t writes; /* number of writes on this volume */
628 u_int64_t recovered_reads; /* reads recovered from another plex */
629
630 /*
631 * Unlike subdisks in the plex, space for the
632 * plex pointers is static.
633 */
634 int plex[MAXPLEX]; /* index of plexes */
984263bc
MD
635};
636
637/*
638 * Table expansion. Expand table, which contains oldcount
639 * entries of type element, by increment entries, and change
640 * oldcount accordingly
641 */
642#define EXPAND(table, element, oldcount, increment) \
643{ \
7c8f38d4
MD
644 expand_table((void **) &table, \
645 oldcount * sizeof (element), \
646 (oldcount + increment) * sizeof (element)); \
647 oldcount += increment; \
648}
984263bc 649
7c8f38d4
MD
650/*
651 * Information on vinum's memory usage
652 */
984263bc 653struct meminfo {
7c8f38d4
MD
654 int mallocs; /* number of malloced blocks */
655 int total_malloced; /* total amount malloced */
656 int highwater; /* maximum number of mallocs */
657 struct mc *malloced; /* pointer to kernel table */
984263bc
MD
658};
659
660#define MCFILENAMELEN 16
661struct mc {
7c8f38d4
MD
662 struct timeval time;
663 int seq;
664 int size;
665 short line;
666 caddr_t address;
667 char file[MCFILENAMELEN];
984263bc
MD
668};
669
670/*
671 * These enums are used by the state transition
672 * routines. They're in bit map format:
673 *
674 * Bit 0: Other plexes in the volume are down
675 * Bit 1: Other plexes in the volume are up
676 * Bit 2: The current plex is up
677 * Maybe they should be local to
678 * state.c
679 */
680enum volplexstate {
7c8f38d4
MD
681 volplex_onlyusdown = 0, /* 0: we're the only plex,
682 * and we're down */
683 volplex_alldown, /* 1: another plex is down,
684 * and so are we */
685 volplex_otherup, /* 2: another plex is up */
686 volplex_otherupdown, /* 3: other plexes are up and down */
687 volplex_onlyus, /* 4: we're up and alone */
688 volplex_onlyusup, /* 5: only we are up, others are down */
689 volplex_allup, /* 6: all plexes are up */
690 volplex_someup /* 7: some plexes are up,
691 * including us */
984263bc
MD
692};
693
694/* state map for plex */
695enum sdstates {
7c8f38d4
MD
696 sd_emptystate = 1,
697 sd_downstate = 2, /* SD is down */
698 sd_crashedstate = 4, /* SD is crashed */
699 sd_obsoletestate = 8, /* SD is obsolete */
700 sd_stalestate = 16, /* SD is stale */
701 sd_rebornstate = 32, /* SD is reborn */
702 sd_upstate = 64, /* SD is up */
703 sd_initstate = 128, /* SD is initializing */
704 sd_initializedstate = 256, /* SD is initialized */
705 sd_otherstate = 512, /* SD is in some other state */
984263bc
MD
706};
707
708/*
709 * This is really just a parameter to pass to
710 * set_<foo>_state, but since it needs to be known
711 * in the external definitions, we need to define
712 * it here
713 */
714enum setstateflags {
7c8f38d4
MD
715 setstate_none = 0, /* no flags */
716 setstate_force = 1, /* force the state change */
717 setstate_configuring = 2, /* we're currently configuring,
718 don't save */
984263bc
MD
719};
720
721/* Operations for parityops to perform. */
722enum parityop {
7c8f38d4
MD
723 checkparity,
724 rebuildparity,
725 rebuildandcheckparity, /* rebuildparity with the -v option */
984263bc
MD
726};
727
728#ifdef VINUMDEBUG
7c8f38d4
MD
729
730/*
731 * Debugging stuff
732 */
984263bc 733enum debugflags {
7c8f38d4
MD
734 DEBUG_ADDRESSES = 1, /* show buffer information during
735 * requests */
736 DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */
737 DEBUG_RESID = 4, /* go into debugger in complete_rqe */
738 DEBUG_LASTREQS = 8, /* keep a circular buffer of
739 * last requests */
740 DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */
741 DEBUG_EOFINFO = 32, /* print info about EOF detection */
742 DEBUG_MEMFREE = 64, /* keep info about Frees */
743 DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times
744 * the size */
745 DEBUG_REMOTEGDB = 256, /* go into remote gdb */
746 DEBUG_WARNINGS = 512, /* log various relatively
747 * harmless warnings */
984263bc
MD
748};
749
984263bc 750#endif