Replace the the buffer cache's B_READ, B_WRITE, B_FORMAT, and B_FREEBUF
[dragonfly.git] / sys / dev / raid / vinum / vinumstate.c
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *      Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $
41  * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $
42  * $DragonFly: src/sys/dev/raid/vinum/vinumstate.c,v 1.6 2006/04/30 17:22:17 dillon Exp $
43  */
44
45 #include "vinumhdr.h"
46 #include "request.h"
47
48 /* Update drive state */
49 /* Return 1 if the state changes, otherwise 0 */
50 int
51 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
52 {
53     struct drive *drive = &DRIVE[driveno];
54     int oldstate = drive->state;
55     int sdno;
56
57     if (drive->state == drive_unallocated)                  /* no drive to do anything with, */
58         return 0;
59
60     if (newstate == oldstate)                               /* don't change it if it's not different */
61         return 1;                                           /* all OK */
62     if ((newstate == drive_down)                            /* the drive's going down */
63     &&(!(flags & setstate_force))
64         && (drive->opencount != 0))                         /* we can't do it */
65         return 0;                                           /* don't do it */
66     drive->state = newstate;                                /* set the state */
67     if (drive->label.name[0] != '\0')                       /* we have a name, */
68         log(LOG_INFO,
69             "vinum: drive %s is %s\n",
70             drive->label.name,
71             drive_state(drive->state));
72     if (drive->state != oldstate) {                         /* state has changed */
73         for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
74             if ((SD[sdno].state >= sd_referenced)
75                 && (SD[sdno].driveno == driveno))           /* belongs to this drive */
76                 update_sd_state(sdno);                      /* update the state */
77         }
78     }
79     if (newstate == drive_up) {                             /* want to bring it up */
80         if ((drive->flags & VF_OPEN) == 0)                  /* should be open, but we're not */
81             init_drive(drive, 1);                           /* which changes the state again */
82     } else                                                  /* taking it down or worse */
83         queue_daemon_request(daemonrq_closedrive,           /* get the daemon to close it */
84             (union daemoninfo) drive);
85     if ((flags & setstate_configuring) == 0)                /* configuring? */
86         save_config();                                      /* no: save the updated configuration now */
87     return 1;
88 }
89
90 /*
91  * Try to set the subdisk state.  Return 1 if state changed to
92  * what we wanted, -1 if it changed to something else, and 0
93  * if no change.
94  *
95  * This routine is called both from the user (up, down states only)
96  * and internally.
97  *
98  * The setstate_force bit in the flags enables the state change even
99  * if it could be dangerous to data consistency.  It shouldn't allow
100  * nonsense.
101  */
102 int
103 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
104 {
105     struct sd *sd = &SD[sdno];
106     struct plex *plex;
107     struct volume *vol;
108     int oldstate = sd->state;
109     int status = 1;                                         /* status to return */
110
111     if (newstate == oldstate)                               /* already there, */
112         return 1;
113     else if (sd->state == sd_unallocated)                   /* no subdisk to do anything with, */
114         return 0;                                           /* can't do it */
115
116     if (sd->driveoffset < 0) {                              /* not allocated space */
117         sd->state = sd_down;
118         if (newstate != sd_down) {
119             if (sd->plexno >= 0)
120                 sdstatemap(&PLEX[sd->plexno]);              /* count up subdisks */
121             return -1;
122         }
123     } else {                                                /* space allocated */
124         switch (newstate) {
125         case sd_down:                                       /* take it down? */
126             /*
127              * If we're attached to a plex, and we're
128              * not reborn, we won't go down without
129              * use of force.
130              */
131             if ((!flags & setstate_force)
132                 && (sd->plexno >= 0)
133                 && (sd->state != sd_reborn))
134                 return 0;                                   /* don't do it */
135             break;
136
137         case sd_initialized:
138             if ((sd->state == sd_initializing)              /* we were initializing */
139             ||(flags & setstate_force))                     /* or we forced it */
140                 break;
141             return 0;                                       /* can't do it otherwise */
142
143         case sd_up:
144             if (DRIVE[sd->driveno].state != drive_up)       /* can't bring the sd up if the drive isn't, */
145                 return 0;                                   /* not even by force */
146             if (flags & setstate_force)                     /* forcing it, */
147                 break;                                      /* just do it, and damn the consequences */
148             switch (sd->state) {
149                 /*
150                  * Perform the necessary tests.  To allow
151                  * the state transition, just break out of
152                  * the switch.
153                  */
154             case sd_crashed:
155             case sd_reborn:
156             case sd_down:                                   /* been down, no data lost */
157                 /*
158                  * If we're associated with a plex, and
159                  * the plex isn't up, or we're the only
160                  * subdisk in the plex, we can do it.
161                  */
162                 if ((sd->plexno >= 0)
163                     && (((PLEX[sd->plexno].state < plex_firstup)
164                             || (PLEX[sd->plexno].subdisks > 1))))
165                     break;                                  /* do it */
166                 if (oldstate != sd_reborn) {
167                     sd->state = sd_reborn;                  /* here it is again */
168                     log(LOG_INFO,
169                         "vinum: %s is %s, not %s\n",
170                         sd->name,
171                         sd_state(sd->state),
172                         sd_state(newstate));
173                 }
174                 status = -1;
175                 break;
176
177             case sd_init:                                   /* brand new */
178                 if (flags & setstate_configuring)           /* we're doing this while configuring */
179                     break;
180                 /* otherwise it's like being empty */
181                 /* FALLTHROUGH */
182
183             case sd_empty:
184             case sd_initialized:
185                 /*
186                  * If we're not part of a plex, or the
187                  * plex is not part of a volume with other
188                  * plexes which are up, we can come up
189                  * without being inconsistent.
190                  *
191                  * If we're part of a parity plex, we'll
192                  * come up if the caller uses force.  This
193                  * is the way we bring them up after
194                  * initialization.
195                  */
196                 if ((sd->plexno < 0)
197                     || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
198                     || (isparity((&PLEX[sd->plexno]))
199                         && (flags & setstate_force)))
200                     break;
201
202                 /* Otherwise it's just out of date */
203                 /* FALLTHROUGH */
204
205             case sd_stale:                                  /* out of date info, need reviving */
206             case sd_obsolete:
207                 /*
208
209                  * 1.  If the subdisk is not part of a
210                  *     plex, bring it up, don't revive.
211                  *
212                  * 2.  If the subdisk is part of a
213                  *     one-plex volume or an unattached
214                  *     plex, and it's not RAID-4 or
215                  *     RAID-5, we *can't revive*.  The
216                  *     subdisk doesn't change its state.
217                  *
218                  * 3.  If the subdisk is part of a
219                  *     one-plex volume or an unattached
220                  *     plex, and it's RAID-4 or RAID-5,
221                  *     but more than one subdisk is down,
222                  *     we *still can't revive*.  The
223                  *     subdisk doesn't change its state.
224                  *
225                  * 4.  If the subdisk is part of a
226                  *     multi-plex volume, we'll change to
227                  *     reviving and let the revive
228                  *     routines find out whether it will
229                  *     work or not.  If they don't, the
230                  *     revive stops with an error message,
231                  *     but the state doesn't change
232                  *     (FWIW).
233                  */
234                 if (sd->plexno < 0)                         /* no plex associated, */
235                     break;                                  /* bring it up */
236                 plex = &PLEX[sd->plexno];
237                 if (plex->volno >= 0)                       /* have a volume */
238                     vol = &VOL[plex->volno];
239                 else
240                     vol = NULL;
241                 /*
242                  * We can't do it if:
243                  *
244                  * 1: we don't have a volume
245                  * 2: we're the only plex in the volume
246                  * 3: we're a RAID-4 or RAID-5 plex, and
247                  *    more than one subdisk is down.
248                  */
249                 if (((vol == NULL)
250                         || (vol->plexes == 1))
251                     && ((!isparity(plex))
252                         || (plex->sddowncount > 1))) {
253                     if (sd->state == sd_initializing)       /* it's finished initializing  */
254                         sd->state = sd_initialized;
255                     else
256                         return 0;                           /* can't do it */
257                 } else {
258                     sd->state = sd_reviving;                /* put in reviving state */
259                     sd->revived = 0;                        /* nothing done yet */
260                     status = EAGAIN;                        /* need to repeat */
261                 }
262                 break;
263
264             case sd_reviving:
265                 if (flags & setstate_force)                 /* insist, */
266                     break;
267                 return EAGAIN;                              /* no, try again */
268
269             default:                                        /* can't do it */
270                 /*
271                  * There's no way to bring subdisks up directly from
272                  * other states.  First they need to be initialized
273                  * or revived.
274                  */
275                 return 0;
276             }
277             break;
278
279         default:                                            /* other ones, only internal with force */
280             if ((flags & setstate_force) == 0)              /* no force?  What's this? */
281                 return 0;                                   /* don't do it */
282         }
283     }
284     if (status == 1) {                                      /* we can do it, */
285         sd->state = newstate;
286         if (flags & setstate_force)
287             log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
288         else
289             log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
290     } else                                                  /* we don't get here with status 0 */
291         log(LOG_INFO,
292             "vinum: %s is %s, not %s\n",
293             sd->name,
294             sd_state(sd->state),
295             sd_state(newstate));
296     if (sd->plexno >= 0)                                    /* we belong to a plex */
297         update_plex_state(sd->plexno);                      /* update plex state */
298     if ((flags & setstate_configuring) == 0)                /* save config now */
299         save_config();
300     return status;
301 }
302
303 /*
304  * Set the state of a plex dependent on its subdisks.
305  * This time round, we'll let plex state just reflect
306  * aggregate subdisk state, so this becomes an order of
307  * magnitude less complicated.  In particular, ignore
308  * the requested state.
309  */
310 int
311 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
312 {
313     struct plex *plex;                                      /* point to our plex */
314     enum plexstate oldstate;
315     enum volplexstate vps;                                  /* how do we compare with the other plexes? */
316
317     plex = &PLEX[plexno];                                   /* point to our plex */
318     oldstate = plex->state;
319
320     /* If the plex isn't allocated, we can't do it. */
321     if (plex->state == plex_unallocated)
322         return 0;
323
324     /*
325      * If it's already in the the state we want,
326      * and it's not up, just return.  If it's up,
327      * we still need to do some housekeeping.
328      */
329     if ((state == oldstate)
330         && (state != plex_up))
331         return 1;
332     vps = vpstate(plex);                                    /* how do we compare with the other plexes? */
333     switch (state) {
334         /*
335          * We can't bring the plex up, even by force,
336          * unless it's ready.  update_plex_state
337          * checks that.
338          */
339     case plex_up:                                           /* bring the plex up */
340         update_plex_state(plex->plexno);                    /* it'll come up if it can */
341         break;
342
343     case plex_down:                                         /* want to take it down */
344         /*
345          * If we're the only one, or the only one
346          * which is up, we need force to do it.
347          */
348         if (((vps == volplex_onlyus)
349                 || (vps == volplex_onlyusup))
350             && (!(flags & setstate_force)))
351             return 0;                                       /* can't do it */
352         plex->state = state;                                /* do it */
353         invalidate_subdisks(plex, sd_down);                 /* and down all up subdisks */
354         break;
355
356         /*
357          * This is only requested internally.
358          * Trust ourselves
359          */
360     case plex_faulty:
361         plex->state = state;                                /* do it */
362         invalidate_subdisks(plex, sd_crashed);              /* and crash all up subdisks */
363         break;
364
365     case plex_initializing:
366         /* XXX consider what safeguards we need here */
367         if ((flags & setstate_force) == 0)
368             return 0;
369         plex->state = state;                                /* do it */
370         break;
371
372         /* What's this? */
373     default:
374         return 0;
375     }
376     if (plex->state != oldstate)                            /* we've changed, */
377         log(LOG_INFO,                                       /* tell them about it */
378             "vinum: %s is %s\n",
379             plex->name,
380             plex_state(plex->state));
381     /*
382      * Now see what we have left, and whether
383      * we're taking the volume down
384      */
385     if (plex->volno >= 0)                                   /* we have a volume */
386         update_volume_state(plex->volno);                   /* update its state */
387     if ((flags & setstate_configuring) == 0)                /* save config now */
388         save_config();                                      /* yes: save the updated configuration */
389     return 1;
390 }
391
392 /* Update the state of a plex dependent on its plexes. */
393 int
394 set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
395 {
396     struct volume *vol = &VOL[volno];                       /* point to our volume */
397
398     if (vol->state == volume_unallocated)                   /* no volume to do anything with, */
399         return 0;
400     if (vol->state == state)                                /* we're there already */
401         return 1;
402
403     if (state == volume_up)                                 /* want to come up */
404         update_volume_state(volno);
405     else if (state == volume_down) {                        /* want to go down */
406         if (((vol->flags & VF_OPEN) == 0)                   /* not open */
407         ||((flags & setstate_force) != 0)) {                /* or we're forcing */
408             vol->state = volume_down;
409             log(LOG_INFO,
410                 "vinum: volume %s is %s\n",
411                 vol->name,
412                 volume_state(vol->state));
413             if ((flags & setstate_configuring) == 0)        /* save config now */
414                 save_config();                              /* yes: save the updated configuration */
415             return 1;
416         }
417     }
418     return 0;                                               /* no change */
419 }
420
421 /* Set the state of a subdisk based on its environment */
422 void
423 update_sd_state(int sdno)
424 {
425     struct sd *sd;
426     struct drive *drive;
427     enum sdstate oldstate;
428
429     sd = &SD[sdno];
430     oldstate = sd->state;
431     drive = &DRIVE[sd->driveno];
432
433     if (drive->state == drive_up) {
434         switch (sd->state) {
435         case sd_down:
436         case sd_crashed:
437             sd->state = sd_reborn;                          /* back up again with no loss */
438             break;
439
440         default:
441             break;
442         }
443     } else {                                                /* down or worse */
444         switch (sd->state) {
445         case sd_up:
446         case sd_reborn:
447         case sd_reviving:
448         case sd_empty:
449             sd->state = sd_crashed;                         /* lost our drive */
450             break;
451
452         default:
453             break;
454         }
455     }
456     if (sd->state != oldstate)                              /* state has changed, */
457         log(LOG_INFO,                                       /* say so */
458             "vinum: %s is %s\n",
459             sd->name,
460             sd_state(sd->state));
461     if (sd->plexno >= 0)                                    /* we're part of a plex, */
462         update_plex_state(sd->plexno);                      /* update its state */
463 }
464
465 /*
466  * Force a plex and all its subdisks
467  * into an 'up' state.  This is a helper
468  * for update_plex_state.
469  */
470 void
471 forceup(int plexno)
472 {
473     struct plex *plex;
474     int sdno;
475
476     plex = &PLEX[plexno];                                   /* point to the plex */
477     plex->state = plex_up;                                  /* and bring it up */
478
479     /* change the subdisks to up state */
480     for (sdno = 0; sdno < plex->subdisks; sdno++) {
481         SD[plex->sdnos[sdno]].state = sd_up;
482         log(LOG_INFO,                                       /* tell them about it */
483             "vinum: %s is up\n",
484             SD[plex->sdnos[sdno]].name);
485     }
486 }
487
488 /* Set the state of a plex based on its environment */
489 void
490 update_plex_state(int plexno)
491 {
492     struct plex *plex;                                      /* point to our plex */
493     enum plexstate oldstate;
494     enum sdstates statemap;                                 /* get a map of the subdisk states */
495     enum volplexstate vps;                                  /* how do we compare with the other plexes? */
496
497     plex = &PLEX[plexno];                                   /* point to our plex */
498     oldstate = plex->state;
499     statemap = sdstatemap(plex);                            /* get a map of the subdisk states */
500     vps = vpstate(plex);                                    /* how do we compare with the other plexes? */
501
502     if (statemap & sd_initstate)                            /* something initializing? */
503         plex->state = plex_initializing;                    /* yup, that makes the plex the same */
504     else if (statemap == sd_upstate)
505         /*
506          * All the subdisks are up.  This also means that
507          * they are consistent, so we can just bring
508          * the plex up
509          */
510         plex->state = plex_up;
511     else if (isparity(plex)                                 /* RAID-4 or RAID-5 plex */
512     &&(plex->sddowncount == 1))                             /* and exactly one subdisk down */
513         plex->state = plex_degraded;                        /* limping a bit */
514     else if (((statemap & ~sd_downstate) == sd_emptystate)  /* all subdisks empty */
515     ||((statemap & ~sd_downstate)
516             == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
517         if ((vps & volplex_otherup) == 0) {                 /* no other plex is up */
518             struct volume *vol = &VOL[plex->volno];         /* possible volume to which it points */
519
520             /*
521              * If we're a striped or concat plex
522              * associated with a volume, none of whose
523              * plexes are up, and we're new and untested,
524              * and the volume has the setupstate bit set,
525              * we can pretend to be in a consistent state.
526              *
527              * We need to do this in one swell foop: on
528              * the next call we will no longer be just
529              * empty.
530              *
531              * This code assumes that all the other plexes
532              * are also capable of coming up (i.e. all the
533              * sds are up), but that's OK: we'll come back
534              * to this function for the remaining plexes
535              * in the volume.
536              */
537             if ((plex->state == plex_init)
538                 && (plex->volno >= 0)
539                 && (vol->flags & VF_CONFIG_SETUPSTATE)) {
540                 for (plexno = 0; plexno < vol->plexes; plexno++)
541                     forceup(VOL[plex->volno].plex[plexno]);
542             } else if ((statemap == sd_initializedstate)    /* if it's initialized (not empty) */
543 ||(plex->organization == plex_concat)                       /* and we're not RAID-4 or RAID-5 */
544             ||(plex->organization == plex_striped))
545                 forceup(plexno);                            /* we'll do it */
546             /*
547              * This leaves a case where things don't get
548              * done: the plex is RAID-4 or RAID-5, and
549              * the subdisks are all empty.  They need to
550              * be initialized first.
551              */
552         } else {
553             if (statemap == sd_upstate)                     /* all subdisks up */
554                 plex->state = plex_up;                      /* we can come up too */
555             else
556                 plex->state = plex_faulty;
557         }
558     } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
559         plex->state = plex_flaky;
560     else if (statemap & (sd_upstate | sd_rebornstate))      /* some up or reborn */
561         plex->state = plex_corrupt;                         /* corrupt */
562     else if (statemap & (sd_initstate | sd_emptystate))     /* some subdisks empty or initializing */
563         plex->state = plex_initializing;
564     else                                                    /* nothing at all up */
565         plex->state = plex_faulty;
566
567     if (plex->state != oldstate)                            /* state has changed, */
568         log(LOG_INFO,                                       /* tell them about it */
569             "vinum: %s is %s\n",
570             plex->name,
571             plex_state(plex->state));
572     if (plex->volno >= 0)                                   /* we're part of a volume, */
573         update_volume_state(plex->volno);                   /* update its state */
574 }
575
576 /* Set volume state based on its components */
577 void
578 update_volume_state(int volno)
579 {
580     struct volume *vol;                                     /* our volume */
581     int plexno;
582     enum volumestate oldstate;
583
584     vol = &VOL[volno];                                      /* point to our volume */
585     oldstate = vol->state;
586
587     for (plexno = 0; plexno < vol->plexes; plexno++) {
588         struct plex *plex = &PLEX[vol->plex[plexno]];       /* point to the plex */
589         if (plex->state >= plex_corrupt) {                  /* something accessible, */
590             vol->state = volume_up;
591             break;
592         }
593     }
594     if (plexno == vol->plexes)                              /* didn't find an up plex */
595         vol->state = volume_down;
596
597     if (vol->state != oldstate) {                           /* state changed */
598         log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
599         save_config();                                      /* save the updated configuration */
600     }
601 }
602
603 /*
604  * Called from request routines when they find
605  * a subdisk which is not kosher.  Decide whether
606  * it warrants changing the state.  Return
607  * REQUEST_DOWN if we can't use the subdisk,
608  * REQUEST_OK if we can.
609  */
610 /*
611  * A prior version of this function checked the plex
612  * state as well.  At the moment, consider plex states
613  * information for the user only.  We'll ignore them
614  * and use the subdisk state only.  The last version of
615  * this file with the old logic was 2.7. XXX
616  */
617 enum requeststatus
618 checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
619 {
620     struct plex *plex = &PLEX[sd->plexno];
621     int writeop = (rq->bio->bio_buf->b_cmd != BUF_CMD_READ);        /* note if we're writing */
622
623     switch (sd->state) {
624         /* We shouldn't get called if the subdisk is up */
625     case sd_up:
626         return REQUEST_OK;
627
628     case sd_reviving:
629         /*
630          * Access to a reviving subdisk depends on the
631          * organization of the plex:
632          *
633          * - If it's concatenated, access the subdisk
634          *   up to its current revive point.  If we
635          *   want to write to the subdisk overlapping
636          *   the current revive block, set the
637          *   conflict flag in the request, asking the
638          *   caller to put the request on the wait
639          *   list, which will be attended to by
640          *   revive_block when it's done.
641          * - if it's striped, we can't do it (we could
642          *   do some hairy calculations, but it's
643          *   unlikely to work).
644          * - if it's RAID-4 or RAID-5, we can do it as
645          *   long as only one subdisk is down
646          */
647         if (plex->organization == plex_striped)             /* plex is striped, */
648             return REQUEST_DOWN;
649
650         else if (isparity(plex)) {                          /* RAID-4 or RAID-5 plex */
651             if (plex->sddowncount > 1)                      /* with more than one sd down, */
652                 return REQUEST_DOWN;
653             else
654                 /*
655                  * XXX We shouldn't do this if we can find a
656                  * better way.  Check the other plexes
657                  * first, and return a DOWN if another
658                  * plex will do it better
659                  */
660                 return REQUEST_OK;                          /* OK, we'll find a way */
661         }
662         if (diskaddr > (sd->revived
663                 + sd->plexoffset
664                 + (sd->revive_blocksize >> DEV_BSHIFT)))    /* we're beyond the end */
665             return REQUEST_DOWN;
666         else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
667             if (writeop) {
668                 rq->flags |= XFR_REVIVECONFLICT;            /* note a potential conflict */
669                 rq->sdno = sd->sdno;                        /* and which sd last caused it */
670             } else
671                 return REQUEST_DOWN;
672         }
673         return REQUEST_OK;
674
675     case sd_reborn:
676         if (writeop)
677             return REQUEST_OK;                              /* always write to a reborn disk */
678         else                                                /* don't allow a read */
679             /*
680                * Handle the mapping.  We don't want to reject
681                * a read request to a reborn subdisk if that's
682                * all we have. XXX
683              */
684             return REQUEST_DOWN;
685
686     case sd_down:
687         if (writeop)                                        /* writing to a consistent down disk */
688             set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
689         return REQUEST_DOWN;
690
691     case sd_crashed:
692         if (writeop)                                        /* writing to a consistent down disk */
693             set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
694         return REQUEST_DOWN;
695
696     default:
697         return REQUEST_DOWN;
698     }
699 }
700
701 /* return a state map for the subdisks of a plex */
702 enum sdstates
703 sdstatemap(struct plex *plex)
704 {
705     int sdno;
706     enum sdstates statemap = 0;                             /* note the states we find */
707
708     plex->sddowncount = 0;                                  /* no subdisks down yet */
709     for (sdno = 0; sdno < plex->subdisks; sdno++) {
710         struct sd *sd = &SD[plex->sdnos[sdno]];             /* point to the subdisk */
711
712         switch (sd->state) {
713         case sd_empty:
714             statemap |= sd_emptystate;
715             (plex->sddowncount)++;                          /* another unusable subdisk */
716             break;
717
718         case sd_init:
719             statemap |= sd_initstate;
720             (plex->sddowncount)++;                          /* another unusable subdisk */
721             break;
722
723         case sd_down:
724             statemap |= sd_downstate;
725             (plex->sddowncount)++;                          /* another unusable subdisk */
726             break;
727
728         case sd_crashed:
729             statemap |= sd_crashedstate;
730             (plex->sddowncount)++;                          /* another unusable subdisk */
731             break;
732
733         case sd_obsolete:
734             statemap |= sd_obsoletestate;
735             (plex->sddowncount)++;                          /* another unusable subdisk */
736             break;
737
738         case sd_stale:
739             statemap |= sd_stalestate;
740             (plex->sddowncount)++;                          /* another unusable subdisk */
741             break;
742
743         case sd_reborn:
744             statemap |= sd_rebornstate;
745             break;
746
747         case sd_up:
748             statemap |= sd_upstate;
749             break;
750
751         case sd_initializing:
752             statemap |= sd_initstate;
753             (plex->sddowncount)++;                          /* another unusable subdisk */
754             break;
755
756         case sd_initialized:
757             statemap |= sd_initializedstate;
758             (plex->sddowncount)++;                          /* another unusable subdisk */
759             break;
760
761         case sd_unallocated:
762         case sd_uninit:
763         case sd_reviving:
764         case sd_referenced:
765             statemap |= sd_otherstate;
766             (plex->sddowncount)++;                          /* another unusable subdisk */
767         }
768     }
769     return statemap;
770 }
771
772 /* determine the state of the volume relative to this plex */
773 enum volplexstate
774 vpstate(struct plex *plex)
775 {
776     struct volume *vol;
777     enum volplexstate state = volplex_onlyusdown;           /* state to return */
778     int plexno;
779
780     if (plex->volno < 0) {                                  /* not associated with a volume */
781         if (plex->state > plex_degraded)
782             return volplex_onlyus;                          /* just us */
783         else
784             return volplex_onlyusdown;                      /* assume the worst */
785     }
786     vol = &VOL[plex->volno];                                /* point to our volume */
787     for (plexno = 0; plexno < vol->plexes; plexno++) {
788         if (&PLEX[vol->plex[plexno]] == plex) {             /* us */
789             if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
790                 state |= volplex_onlyus;                    /* yes */
791         } else {
792             if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
793                 state |= volplex_otherup;                   /* and when they were up, they were up */
794             else
795                 state |= volplex_alldown;                   /* and when they were down, they were down */
796         }
797     }
798     return state;                                           /* and when they were only halfway up */
799 }                                                           /* they were neither up nor down */
800
801 /* Check if all bits b are set in a */
802 int allset(int a, int b);
803
804 int
805 allset(int a, int b)
806 {
807     return (a & b) == b;
808 }
809
810 /* Invalidate the subdisks belonging to a plex */
811 void
812 invalidate_subdisks(struct plex *plex, enum sdstate state)
813 {
814     int sdno;
815
816     for (sdno = 0; sdno < plex->subdisks; sdno++) {         /* for each subdisk */
817         struct sd *sd = &SD[plex->sdnos[sdno]];
818
819         switch (sd->state) {
820         case sd_unallocated:
821         case sd_uninit:
822         case sd_init:
823         case sd_initializing:
824         case sd_initialized:
825         case sd_empty:
826         case sd_obsolete:
827         case sd_stale:
828         case sd_crashed:
829         case sd_down:
830         case sd_referenced:
831             break;
832
833         case sd_reviving:
834         case sd_reborn:
835         case sd_up:
836             set_sd_state(plex->sdnos[sdno], state, setstate_force);
837         }
838     }
839 }
840
841 /*
842  * Start an object, in other words do what we can to get it up.
843  * This is called from vinumioctl (VINUMSTART).
844  * Return error indications via ioctl_reply
845  */
846 void
847 start_object(struct vinum_ioctl_msg *data)
848 {
849     int status;
850     int objindex = data->index;                             /* data gets overwritten */
851     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
852     enum setstateflags flags;
853
854     if (data->force != 0)                                   /* are we going to use force? */
855         flags = setstate_force;                             /* yes */
856     else
857         flags = setstate_none;                              /* no */
858
859     switch (data->type) {
860     case drive_object:
861         status = set_drive_state(objindex, drive_up, flags);
862         if (DRIVE[objindex].state != drive_up)              /* set status on whether we really did it */
863             ioctl_reply->error = EBUSY;
864         else
865             ioctl_reply->error = 0;
866         break;
867
868     case sd_object:
869         if (DRIVE[SD[objindex].driveno].state != drive_up) {
870             ioctl_reply->error = EIO;
871             strcpy(ioctl_reply->msg, "Drive is down");
872             return;
873         }
874         if (data->blocksize)
875             SD[objindex].revive_blocksize = data->blocksize;
876         if ((SD[objindex].state == sd_reviving)             /* reviving, */
877         ||(SD[objindex].state == sd_stale)) {               /* or stale, will revive */
878             SD[objindex].state = sd_reviving;               /* make sure we're reviving */
879             ioctl_reply->error = revive_block(objindex);    /* revive another block */
880             ioctl_reply->msg[0] = '\0';                     /* no comment */
881             return;
882         } else if (SD[objindex].state == sd_initializing) { /* initializing, */
883             if (data->blocksize)
884                 SD[objindex].init_blocksize = data->blocksize;
885             ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
886             ioctl_reply->msg[0] = '\0';                     /* no comment */
887             return;
888         }
889         status = set_sd_state(objindex, sd_up, flags);      /* set state */
890         if (status != EAGAIN) {                             /* not first revive or initialize, */
891             if (SD[objindex].state != sd_up)                /* set status on whether we really did it */
892                 ioctl_reply->error = EBUSY;
893             else
894                 ioctl_reply->error = 0;
895         } else
896             ioctl_reply->error = status;
897         break;
898
899     case plex_object:
900         status = set_plex_state(objindex, plex_up, flags);
901         if (PLEX[objindex].state != plex_up)                /* set status on whether we really did it */
902             ioctl_reply->error = EBUSY;
903         else
904             ioctl_reply->error = 0;
905         break;
906
907     case volume_object:
908         status = set_volume_state(objindex, volume_up, flags);
909         if (VOL[objindex].state != volume_up)               /* set status on whether we really did it */
910             ioctl_reply->error = EBUSY;
911         else
912             ioctl_reply->error = 0;
913         break;
914
915     default:
916         ioctl_reply->error = EINVAL;
917         strcpy(ioctl_reply->msg, "Invalid object type");
918         return;
919     }
920     /*
921      * There's no point in saying anything here:
922      * the userland program does it better
923      */
924     ioctl_reply->msg[0] = '\0';
925 }
926
927 /*
928  * Stop an object, in other words do what we can to get it down
929  * This is called from vinumioctl (VINUMSTOP).
930  * Return error indications via ioctl_reply.
931  */
932 void
933 stop_object(struct vinum_ioctl_msg *data)
934 {
935     int status = 1;
936     int objindex = data->index;                             /* save the number from change */
937     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
938
939     switch (data->type) {
940     case drive_object:
941         status = set_drive_state(objindex, drive_down, data->force);
942         break;
943
944     case sd_object:
945         status = set_sd_state(objindex, sd_down, data->force);
946         break;
947
948     case plex_object:
949         status = set_plex_state(objindex, plex_down, data->force);
950         break;
951
952     case volume_object:
953         status = set_volume_state(objindex, volume_down, data->force);
954         break;
955
956     default:
957         ioctl_reply->error = EINVAL;
958         strcpy(ioctl_reply->msg, "Invalid object type");
959         return;
960     }
961     ioctl_reply->msg[0] = '\0';
962     if (status == 0)                                        /* couldn't do it */
963         ioctl_reply->error = EBUSY;
964     else
965         ioctl_reply->error = 0;
966 }
967
968 /*
969  * VINUM_SETSTATE ioctl: set an object state.
970  * msg is the message passed by the user.
971  */
972 void
973 setstate(struct vinum_ioctl_msg *msg)
974 {
975     int sdno;
976     struct sd *sd;
977     struct plex *plex;
978     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
979
980     switch (msg->state) {
981     case object_down:
982         stop_object(msg);
983         break;
984
985     case object_initializing:
986         switch (msg->type) {
987         case sd_object:
988             sd = &SD[msg->index];
989             if ((msg->index >= vinum_conf.subdisks_allocated)
990                 || (sd->state <= sd_referenced)) {
991                 sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
992                 ioctl_reply->error = EFAULT;
993                 return;
994             }
995             set_sd_state(msg->index, sd_initializing, msg->force);
996             if (sd->state != sd_initializing) {
997                 strcpy(ioctl_reply->msg, "Can't set state");
998                 ioctl_reply->error = EBUSY;
999             } else
1000                 ioctl_reply->error = 0;
1001             break;
1002
1003         case plex_object:
1004             plex = &PLEX[msg->index];
1005             if ((msg->index >= vinum_conf.plexes_allocated)
1006                 || (plex->state <= plex_unallocated)) {
1007                 sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
1008                 ioctl_reply->error = EFAULT;
1009                 return;
1010             }
1011             set_plex_state(msg->index, plex_initializing, msg->force);
1012             if (plex->state != plex_initializing) {
1013                 strcpy(ioctl_reply->msg, "Can't set state");
1014                 ioctl_reply->error = EBUSY;
1015             } else {
1016                 ioctl_reply->error = 0;
1017                 for (sdno = 0; sdno < plex->subdisks; sdno++) {
1018                     sd = &SD[plex->sdnos[sdno]];
1019                     set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
1020                     if (sd->state != sd_initializing) {
1021                         strcpy(ioctl_reply->msg, "Can't set state");
1022                         ioctl_reply->error = EBUSY;
1023                         break;
1024                     }
1025                 }
1026             }
1027             break;
1028
1029         default:
1030             strcpy(ioctl_reply->msg, "Invalid object");
1031             ioctl_reply->error = EINVAL;
1032         }
1033         break;
1034
1035     case object_initialized:
1036         if (msg->type == sd_object) {
1037             sd = &SD[msg->index];
1038             if ((msg->index >= vinum_conf.subdisks_allocated)
1039                 || (sd->state <= sd_referenced)) {
1040                 sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
1041                 ioctl_reply->error = EFAULT;
1042                 return;
1043             }
1044             set_sd_state(msg->index, sd_initialized, msg->force);
1045             if (sd->state != sd_initializing) {
1046                 strcpy(ioctl_reply->msg, "Can't set state");
1047                 ioctl_reply->error = EBUSY;
1048             } else
1049                 ioctl_reply->error = 0;
1050         } else {
1051             strcpy(ioctl_reply->msg, "Invalid object");
1052             ioctl_reply->error = EINVAL;
1053         }
1054         break;
1055
1056     case object_up:
1057         start_object(msg);
1058     }
1059 }
1060
1061 /*
1062  * Brute force set state function.  Don't look at
1063  * any dependencies, just do it.  This is mainly
1064  * intended for testing and recovery.
1065  */
1066 void
1067 setstate_by_force(struct vinum_ioctl_msg *msg)
1068 {
1069     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
1070
1071     switch (msg->type) {
1072     case drive_object:
1073         DRIVE[msg->index].state = msg->state;
1074         break;
1075
1076     case sd_object:
1077         SD[msg->index].state = msg->state;
1078         break;
1079
1080     case plex_object:
1081         PLEX[msg->index].state = msg->state;
1082         break;
1083
1084     case volume_object:
1085         VOL[msg->index].state = msg->state;
1086         break;
1087
1088     default:
1089         break;
1090     }
1091     ioctl_reply->error = 0;
1092 }
1093 /* Local Variables: */
1094 /* fill-column: 50 */
1095 /* End: */