59156bb83c16837d902917123bcecae025cf1d19
[dragonfly.git] / sys / dev / raid / vinum / vinumstate.c
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *      Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $
41  * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $
42  */
43
44 #include "vinumhdr.h"
45 #include "request.h"
46
47 /* Update drive state */
48 /* Return 1 if the state changes, otherwise 0 */
49 int
50 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
51 {
52     struct drive *drive = &DRIVE[driveno];
53     int oldstate = drive->state;
54     int sdno;
55
56     if (drive->state == drive_unallocated)                  /* no drive to do anything with, */
57         return 0;
58
59     if (newstate == oldstate)                               /* don't change it if it's not different */
60         return 1;                                           /* all OK */
61     if ((newstate == drive_down)                            /* the drive's going down */
62     &&(!(flags & setstate_force))
63         && (drive->opencount != 0))                         /* we can't do it */
64         return 0;                                           /* don't do it */
65     drive->state = newstate;                                /* set the state */
66     if (drive->label.name[0] != '\0')                       /* we have a name, */
67         log(LOG_INFO,
68             "vinum: drive %s is %s\n",
69             drive->label.name,
70             drive_state(drive->state));
71     if (drive->state != oldstate) {                         /* state has changed */
72         for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
73             if ((SD[sdno].state >= sd_referenced)
74                 && (SD[sdno].driveno == driveno))           /* belongs to this drive */
75                 update_sd_state(sdno);                      /* update the state */
76         }
77     }
78     if (newstate == drive_up) {                             /* want to bring it up */
79         if ((drive->flags & VF_OPEN) == 0)                  /* should be open, but we're not */
80             init_drive(drive, 1);                           /* which changes the state again */
81     } else                                                  /* taking it down or worse */
82         queue_daemon_request(daemonrq_closedrive,           /* get the daemon to close it */
83             (union daemoninfo) drive);
84     if ((flags & setstate_configuring) == 0)                /* configuring? */
85         save_config();                                      /* no: save the updated configuration now */
86     return 1;
87 }
88
89 /*
90  * Try to set the subdisk state.  Return 1 if state changed to
91  * what we wanted, -1 if it changed to something else, and 0
92  * if no change.
93  *
94  * This routine is called both from the user (up, down states only)
95  * and internally.
96  *
97  * The setstate_force bit in the flags enables the state change even
98  * if it could be dangerous to data consistency.  It shouldn't allow
99  * nonsense.
100  */
101 int
102 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
103 {
104     struct sd *sd = &SD[sdno];
105     struct plex *plex;
106     struct volume *vol;
107     int oldstate = sd->state;
108     int status = 1;                                         /* status to return */
109
110     if (newstate == oldstate)                               /* already there, */
111         return 1;
112     else if (sd->state == sd_unallocated)                   /* no subdisk to do anything with, */
113         return 0;                                           /* can't do it */
114
115     if (sd->driveoffset < 0) {                              /* not allocated space */
116         sd->state = sd_down;
117         if (newstate != sd_down) {
118             if (sd->plexno >= 0)
119                 sdstatemap(&PLEX[sd->plexno]);              /* count up subdisks */
120             return -1;
121         }
122     } else {                                                /* space allocated */
123         switch (newstate) {
124         case sd_down:                                       /* take it down? */
125             /*
126              * If we're attached to a plex, and we're
127              * not reborn, we won't go down without
128              * use of force.
129              */
130             if (!(flags & setstate_force)
131                 && (sd->plexno >= 0)
132                 && (sd->state != sd_reborn))
133                 return 0;                                   /* don't do it */
134             break;
135
136         case sd_initialized:
137             if ((sd->state == sd_initializing)              /* we were initializing */
138             ||(flags & setstate_force))                     /* or we forced it */
139                 break;
140             return 0;                                       /* can't do it otherwise */
141
142         case sd_up:
143             if (DRIVE[sd->driveno].state != drive_up)       /* can't bring the sd up if the drive isn't, */
144                 return 0;                                   /* not even by force */
145             if (flags & setstate_force)                     /* forcing it, */
146                 break;                                      /* just do it, and damn the consequences */
147             switch (sd->state) {
148                 /*
149                  * Perform the necessary tests.  To allow
150                  * the state transition, just break out of
151                  * the switch.
152                  */
153             case sd_crashed:
154             case sd_reborn:
155             case sd_down:                                   /* been down, no data lost */
156                 /*
157                  * If we're associated with a plex, and
158                  * the plex isn't up, or we're the only
159                  * subdisk in the plex, we can do it.
160                  */
161                 if ((sd->plexno >= 0)
162                     && (((PLEX[sd->plexno].state < plex_firstup)
163                             || (PLEX[sd->plexno].subdisks > 1))))
164                     break;                                  /* do it */
165                 if (oldstate != sd_reborn) {
166                     sd->state = sd_reborn;                  /* here it is again */
167                     log(LOG_INFO,
168                         "vinum: %s is %s, not %s\n",
169                         sd->name,
170                         sd_state(sd->state),
171                         sd_state(newstate));
172                 }
173                 status = -1;
174                 break;
175
176             case sd_init:                                   /* brand new */
177                 if (flags & setstate_configuring)           /* we're doing this while configuring */
178                     break;
179                 /* otherwise it's like being empty */
180                 /* FALLTHROUGH */
181
182             case sd_empty:
183             case sd_initialized:
184                 /*
185                  * If we're not part of a plex, or the
186                  * plex is not part of a volume with other
187                  * plexes which are up, we can come up
188                  * without being inconsistent.
189                  *
190                  * If we're part of a parity plex, we'll
191                  * come up if the caller uses force.  This
192                  * is the way we bring them up after
193                  * initialization.
194                  */
195                 if ((sd->plexno < 0)
196                     || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
197                     || (isparity((&PLEX[sd->plexno]))
198                         && (flags & setstate_force)))
199                     break;
200
201                 /* Otherwise it's just out of date */
202                 /* FALLTHROUGH */
203
204             case sd_stale:                                  /* out of date info, need reviving */
205             case sd_obsolete:
206                 /*
207
208                  * 1.  If the subdisk is not part of a
209                  *     plex, bring it up, don't revive.
210                  *
211                  * 2.  If the subdisk is part of a
212                  *     one-plex volume or an unattached
213                  *     plex, and it's not RAID-4 or
214                  *     RAID-5, we *can't revive*.  The
215                  *     subdisk doesn't change its state.
216                  *
217                  * 3.  If the subdisk is part of a
218                  *     one-plex volume or an unattached
219                  *     plex, and it's RAID-4 or RAID-5,
220                  *     but more than one subdisk is down,
221                  *     we *still can't revive*.  The
222                  *     subdisk doesn't change its state.
223                  *
224                  * 4.  If the subdisk is part of a
225                  *     multi-plex volume, we'll change to
226                  *     reviving and let the revive
227                  *     routines find out whether it will
228                  *     work or not.  If they don't, the
229                  *     revive stops with an error message,
230                  *     but the state doesn't change
231                  *     (FWIW).
232                  */
233                 if (sd->plexno < 0)                         /* no plex associated, */
234                     break;                                  /* bring it up */
235                 plex = &PLEX[sd->plexno];
236                 if (plex->volno >= 0)                       /* have a volume */
237                     vol = &VOL[plex->volno];
238                 else
239                     vol = NULL;
240                 /*
241                  * We can't do it if:
242                  *
243                  * 1: we don't have a volume
244                  * 2: we're the only plex in the volume
245                  * 3: we're a RAID-4 or RAID-5 plex, and
246                  *    more than one subdisk is down.
247                  */
248                 if (((vol == NULL)
249                         || (vol->plexes == 1))
250                     && ((!isparity(plex))
251                         || (plex->sddowncount > 1))) {
252                     if (sd->state == sd_initializing)       /* it's finished initializing  */
253                         sd->state = sd_initialized;
254                     else
255                         return 0;                           /* can't do it */
256                 } else {
257                     sd->state = sd_reviving;                /* put in reviving state */
258                     sd->revived = 0;                        /* nothing done yet */
259                     status = EAGAIN;                        /* need to repeat */
260                 }
261                 break;
262
263             case sd_reviving:
264                 if (flags & setstate_force)                 /* insist, */
265                     break;
266                 return EAGAIN;                              /* no, try again */
267
268             default:                                        /* can't do it */
269                 /*
270                  * There's no way to bring subdisks up directly from
271                  * other states.  First they need to be initialized
272                  * or revived.
273                  */
274                 return 0;
275             }
276             break;
277
278         default:                                            /* other ones, only internal with force */
279             if ((flags & setstate_force) == 0)              /* no force?  What's this? */
280                 return 0;                                   /* don't do it */
281         }
282     }
283     if (status == 1) {                                      /* we can do it, */
284         sd->state = newstate;
285         if (flags & setstate_force)
286             log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
287         else
288             log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
289     } else                                                  /* we don't get here with status 0 */
290         log(LOG_INFO,
291             "vinum: %s is %s, not %s\n",
292             sd->name,
293             sd_state(sd->state),
294             sd_state(newstate));
295     if (sd->plexno >= 0)                                    /* we belong to a plex */
296         update_plex_state(sd->plexno);                      /* update plex state */
297     if ((flags & setstate_configuring) == 0)                /* save config now */
298         save_config();
299     return status;
300 }
301
302 /*
303  * Set the state of a plex dependent on its subdisks.
304  * This time round, we'll let plex state just reflect
305  * aggregate subdisk state, so this becomes an order of
306  * magnitude less complicated.  In particular, ignore
307  * the requested state.
308  */
309 int
310 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
311 {
312     struct plex *plex;                                      /* point to our plex */
313     enum plexstate oldstate;
314     enum volplexstate vps;                                  /* how do we compare with the other plexes? */
315
316     plex = &PLEX[plexno];                                   /* point to our plex */
317     oldstate = plex->state;
318
319     /* If the plex isn't allocated, we can't do it. */
320     if (plex->state == plex_unallocated)
321         return 0;
322
323     /*
324      * If it's already in the the state we want,
325      * and it's not up, just return.  If it's up,
326      * we still need to do some housekeeping.
327      */
328     if ((state == oldstate)
329         && (state != plex_up))
330         return 1;
331     vps = vpstate(plex);                                    /* how do we compare with the other plexes? */
332     switch (state) {
333         /*
334          * We can't bring the plex up, even by force,
335          * unless it's ready.  update_plex_state
336          * checks that.
337          */
338     case plex_up:                                           /* bring the plex up */
339         update_plex_state(plex->plexno);                    /* it'll come up if it can */
340         break;
341
342     case plex_down:                                         /* want to take it down */
343         /*
344          * If we're the only one, or the only one
345          * which is up, we need force to do it.
346          */
347         if (((vps == volplex_onlyus)
348                 || (vps == volplex_onlyusup))
349             && (!(flags & setstate_force)))
350             return 0;                                       /* can't do it */
351         plex->state = state;                                /* do it */
352         invalidate_subdisks(plex, sd_down);                 /* and down all up subdisks */
353         break;
354
355         /*
356          * This is only requested internally.
357          * Trust ourselves
358          */
359     case plex_faulty:
360         plex->state = state;                                /* do it */
361         invalidate_subdisks(plex, sd_crashed);              /* and crash all up subdisks */
362         break;
363
364     case plex_initializing:
365         /* XXX consider what safeguards we need here */
366         if ((flags & setstate_force) == 0)
367             return 0;
368         plex->state = state;                                /* do it */
369         break;
370
371         /* What's this? */
372     default:
373         return 0;
374     }
375     if (plex->state != oldstate)                            /* we've changed, */
376         log(LOG_INFO,                                       /* tell them about it */
377             "vinum: %s is %s\n",
378             plex->name,
379             plex_state(plex->state));
380     /*
381      * Now see what we have left, and whether
382      * we're taking the volume down
383      */
384     if (plex->volno >= 0)                                   /* we have a volume */
385         update_volume_state(plex->volno);                   /* update its state */
386     if ((flags & setstate_configuring) == 0)                /* save config now */
387         save_config();                                      /* yes: save the updated configuration */
388     return 1;
389 }
390
391 /* Update the state of a plex dependent on its plexes. */
392 int
393 set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
394 {
395     struct volume *vol = &VOL[volno];                       /* point to our volume */
396
397     if (vol->state == volume_unallocated)                   /* no volume to do anything with, */
398         return 0;
399     if (vol->state == state)                                /* we're there already */
400         return 1;
401
402     if (state == volume_up)                                 /* want to come up */
403         update_volume_state(volno);
404     else if (state == volume_down) {                        /* want to go down */
405         if (((vol->flags & VF_OPEN) == 0)                   /* not open */
406         ||((flags & setstate_force) != 0)) {                /* or we're forcing */
407             vol->state = volume_down;
408             log(LOG_INFO,
409                 "vinum: volume %s is %s\n",
410                 vol->name,
411                 volume_state(vol->state));
412             if ((flags & setstate_configuring) == 0)        /* save config now */
413                 save_config();                              /* yes: save the updated configuration */
414             return 1;
415         }
416     }
417     return 0;                                               /* no change */
418 }
419
420 /* Set the state of a subdisk based on its environment */
421 void
422 update_sd_state(int sdno)
423 {
424     struct sd *sd;
425     struct drive *drive;
426     enum sdstate oldstate;
427
428     sd = &SD[sdno];
429     oldstate = sd->state;
430     drive = &DRIVE[sd->driveno];
431
432     if (drive->state == drive_up) {
433         switch (sd->state) {
434         case sd_down:
435         case sd_crashed:
436             sd->state = sd_reborn;                          /* back up again with no loss */
437             break;
438
439         default:
440             break;
441         }
442     } else {                                                /* down or worse */
443         switch (sd->state) {
444         case sd_up:
445         case sd_reborn:
446         case sd_reviving:
447         case sd_empty:
448             sd->state = sd_crashed;                         /* lost our drive */
449             break;
450
451         default:
452             break;
453         }
454     }
455     if (sd->state != oldstate)                              /* state has changed, */
456         log(LOG_INFO,                                       /* say so */
457             "vinum: %s is %s\n",
458             sd->name,
459             sd_state(sd->state));
460     if (sd->plexno >= 0)                                    /* we're part of a plex, */
461         update_plex_state(sd->plexno);                      /* update its state */
462 }
463
464 /*
465  * Force a plex and all its subdisks
466  * into an 'up' state.  This is a helper
467  * for update_plex_state.
468  */
469 void
470 forceup(int plexno)
471 {
472     struct plex *plex;
473     int sdno;
474
475     plex = &PLEX[plexno];                                   /* point to the plex */
476     plex->state = plex_up;                                  /* and bring it up */
477
478     /* change the subdisks to up state */
479     for (sdno = 0; sdno < plex->subdisks; sdno++) {
480         SD[plex->sdnos[sdno]].state = sd_up;
481         log(LOG_INFO,                                       /* tell them about it */
482             "vinum: %s is up\n",
483             SD[plex->sdnos[sdno]].name);
484     }
485 }
486
487 /* Set the state of a plex based on its environment */
488 void
489 update_plex_state(int plexno)
490 {
491     struct plex *plex;                                      /* point to our plex */
492     enum plexstate oldstate;
493     enum sdstates statemap;                                 /* get a map of the subdisk states */
494     enum volplexstate vps;                                  /* how do we compare with the other plexes? */
495
496     plex = &PLEX[plexno];                                   /* point to our plex */
497     oldstate = plex->state;
498     statemap = sdstatemap(plex);                            /* get a map of the subdisk states */
499     vps = vpstate(plex);                                    /* how do we compare with the other plexes? */
500
501     if (statemap & sd_initstate)                            /* something initializing? */
502         plex->state = plex_initializing;                    /* yup, that makes the plex the same */
503     else if (statemap == sd_upstate)
504         /*
505          * All the subdisks are up.  This also means that
506          * they are consistent, so we can just bring
507          * the plex up
508          */
509         plex->state = plex_up;
510     else if (isparity(plex)                                 /* RAID-4 or RAID-5 plex */
511     &&(plex->sddowncount == 1))                             /* and exactly one subdisk down */
512         plex->state = plex_degraded;                        /* limping a bit */
513     else if (((statemap & ~sd_downstate) == sd_emptystate)  /* all subdisks empty */
514     ||((statemap & ~sd_downstate)
515             == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
516         if ((vps & volplex_otherup) == 0) {                 /* no other plex is up */
517             struct volume *vol = &VOL[plex->volno];         /* possible volume to which it points */
518
519             /*
520              * If we're a striped or concat plex
521              * associated with a volume, none of whose
522              * plexes are up, and we're new and untested,
523              * and the volume has the setupstate bit set,
524              * we can pretend to be in a consistent state.
525              *
526              * We need to do this in one swell foop: on
527              * the next call we will no longer be just
528              * empty.
529              *
530              * This code assumes that all the other plexes
531              * are also capable of coming up (i.e. all the
532              * sds are up), but that's OK: we'll come back
533              * to this function for the remaining plexes
534              * in the volume.
535              */
536             if ((plex->state == plex_init)
537                 && (plex->volno >= 0)
538                 && (vol->flags & VF_CONFIG_SETUPSTATE)) {
539                 for (plexno = 0; plexno < vol->plexes; plexno++)
540                     forceup(VOL[plex->volno].plex[plexno]);
541             } else if ((statemap == sd_initializedstate)    /* if it's initialized (not empty) */
542 ||(plex->organization == plex_concat)                       /* and we're not RAID-4 or RAID-5 */
543             ||(plex->organization == plex_striped))
544                 forceup(plexno);                            /* we'll do it */
545             /*
546              * This leaves a case where things don't get
547              * done: the plex is RAID-4 or RAID-5, and
548              * the subdisks are all empty.  They need to
549              * be initialized first.
550              */
551         } else {
552             if (statemap == sd_upstate)                     /* all subdisks up */
553                 plex->state = plex_up;                      /* we can come up too */
554             else
555                 plex->state = plex_faulty;
556         }
557     } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
558         plex->state = plex_flaky;
559     else if (statemap & (sd_upstate | sd_rebornstate))      /* some up or reborn */
560         plex->state = plex_corrupt;                         /* corrupt */
561     else if (statemap & (sd_initstate | sd_emptystate))     /* some subdisks empty or initializing */
562         plex->state = plex_initializing;
563     else                                                    /* nothing at all up */
564         plex->state = plex_faulty;
565
566     if (plex->state != oldstate)                            /* state has changed, */
567         log(LOG_INFO,                                       /* tell them about it */
568             "vinum: %s is %s\n",
569             plex->name,
570             plex_state(plex->state));
571     if (plex->volno >= 0)                                   /* we're part of a volume, */
572         update_volume_state(plex->volno);                   /* update its state */
573 }
574
575 /* Set volume state based on its components */
576 void
577 update_volume_state(int volno)
578 {
579     struct volume *vol;                                     /* our volume */
580     int plexno;
581     enum volumestate oldstate;
582
583     vol = &VOL[volno];                                      /* point to our volume */
584     oldstate = vol->state;
585
586     for (plexno = 0; plexno < vol->plexes; plexno++) {
587         struct plex *plex = &PLEX[vol->plex[plexno]];       /* point to the plex */
588         if (plex->state >= plex_corrupt) {                  /* something accessible, */
589             vol->state = volume_up;
590             break;
591         }
592     }
593     if (plexno == vol->plexes)                              /* didn't find an up plex */
594         vol->state = volume_down;
595
596     if (vol->state != oldstate) {                           /* state changed */
597         log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
598         save_config();                                      /* save the updated configuration */
599     }
600 }
601
602 /*
603  * Called from request routines when they find
604  * a subdisk which is not kosher.  Decide whether
605  * it warrants changing the state.  Return
606  * REQUEST_DOWN if we can't use the subdisk,
607  * REQUEST_OK if we can.
608  */
609 /*
610  * A prior version of this function checked the plex
611  * state as well.  At the moment, consider plex states
612  * information for the user only.  We'll ignore them
613  * and use the subdisk state only.  The last version of
614  * this file with the old logic was 2.7. XXX
615  */
616 enum requeststatus
617 checksdstate(struct sd *sd, struct request *rq, vinum_off_t diskaddr, vinum_off_t diskend)
618 {
619     struct plex *plex = &PLEX[sd->plexno];
620     int writeop = (rq->bio->bio_buf->b_cmd != BUF_CMD_READ);        /* note if we're writing */
621
622     switch (sd->state) {
623         /* We shouldn't get called if the subdisk is up */
624     case sd_up:
625         return REQUEST_OK;
626
627     case sd_reviving:
628         /*
629          * Access to a reviving subdisk depends on the
630          * organization of the plex:
631          *
632          * - If it's concatenated, access the subdisk
633          *   up to its current revive point.  If we
634          *   want to write to the subdisk overlapping
635          *   the current revive block, set the
636          *   conflict flag in the request, asking the
637          *   caller to put the request on the wait
638          *   list, which will be attended to by
639          *   revive_block when it's done.
640          * - if it's striped, we can't do it (we could
641          *   do some hairy calculations, but it's
642          *   unlikely to work).
643          * - if it's RAID-4 or RAID-5, we can do it as
644          *   long as only one subdisk is down
645          */
646         if (plex->organization == plex_striped)             /* plex is striped, */
647             return REQUEST_DOWN;
648
649         else if (isparity(plex)) {                          /* RAID-4 or RAID-5 plex */
650             if (plex->sddowncount > 1)                      /* with more than one sd down, */
651                 return REQUEST_DOWN;
652             else
653                 /*
654                  * XXX We shouldn't do this if we can find a
655                  * better way.  Check the other plexes
656                  * first, and return a DOWN if another
657                  * plex will do it better
658                  */
659                 return REQUEST_OK;                          /* OK, we'll find a way */
660         }
661         if (diskaddr > (sd->revived
662                 + sd->plexoffset
663                 + (sd->revive_blocksize >> DEV_BSHIFT)))    /* we're beyond the end */
664             return REQUEST_DOWN;
665         else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
666             if (writeop) {
667                 rq->flags |= XFR_REVIVECONFLICT;            /* note a potential conflict */
668                 rq->sdno = sd->sdno;                        /* and which sd last caused it */
669             } else
670                 return REQUEST_DOWN;
671         }
672         return REQUEST_OK;
673
674     case sd_reborn:
675         if (writeop)
676             return REQUEST_OK;                              /* always write to a reborn disk */
677         else                                                /* don't allow a read */
678             /*
679                * Handle the mapping.  We don't want to reject
680                * a read request to a reborn subdisk if that's
681                * all we have. XXX
682              */
683             return REQUEST_DOWN;
684
685     case sd_down:
686         if (writeop)                                        /* writing to a consistent down disk */
687             set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
688         return REQUEST_DOWN;
689
690     case sd_crashed:
691         if (writeop)                                        /* writing to a consistent down disk */
692             set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
693         return REQUEST_DOWN;
694
695     default:
696         return REQUEST_DOWN;
697     }
698 }
699
700 /* return a state map for the subdisks of a plex */
701 enum sdstates
702 sdstatemap(struct plex *plex)
703 {
704     int sdno;
705     enum sdstates statemap = 0;                             /* note the states we find */
706
707     plex->sddowncount = 0;                                  /* no subdisks down yet */
708     for (sdno = 0; sdno < plex->subdisks; sdno++) {
709         struct sd *sd = &SD[plex->sdnos[sdno]];             /* point to the subdisk */
710
711         switch (sd->state) {
712         case sd_empty:
713             statemap |= sd_emptystate;
714             (plex->sddowncount)++;                          /* another unusable subdisk */
715             break;
716
717         case sd_init:
718             statemap |= sd_initstate;
719             (plex->sddowncount)++;                          /* another unusable subdisk */
720             break;
721
722         case sd_down:
723             statemap |= sd_downstate;
724             (plex->sddowncount)++;                          /* another unusable subdisk */
725             break;
726
727         case sd_crashed:
728             statemap |= sd_crashedstate;
729             (plex->sddowncount)++;                          /* another unusable subdisk */
730             break;
731
732         case sd_obsolete:
733             statemap |= sd_obsoletestate;
734             (plex->sddowncount)++;                          /* another unusable subdisk */
735             break;
736
737         case sd_stale:
738             statemap |= sd_stalestate;
739             (plex->sddowncount)++;                          /* another unusable subdisk */
740             break;
741
742         case sd_reborn:
743             statemap |= sd_rebornstate;
744             break;
745
746         case sd_up:
747             statemap |= sd_upstate;
748             break;
749
750         case sd_initializing:
751             statemap |= sd_initstate;
752             (plex->sddowncount)++;                          /* another unusable subdisk */
753             break;
754
755         case sd_initialized:
756             statemap |= sd_initializedstate;
757             (plex->sddowncount)++;                          /* another unusable subdisk */
758             break;
759
760         case sd_unallocated:
761         case sd_uninit:
762         case sd_reviving:
763         case sd_referenced:
764             statemap |= sd_otherstate;
765             (plex->sddowncount)++;                          /* another unusable subdisk */
766         }
767     }
768     return statemap;
769 }
770
771 /* determine the state of the volume relative to this plex */
772 enum volplexstate
773 vpstate(struct plex *plex)
774 {
775     struct volume *vol;
776     enum volplexstate state = volplex_onlyusdown;           /* state to return */
777     int plexno;
778
779     if (plex->volno < 0) {                                  /* not associated with a volume */
780         if (plex->state > plex_degraded)
781             return volplex_onlyus;                          /* just us */
782         else
783             return volplex_onlyusdown;                      /* assume the worst */
784     }
785     vol = &VOL[plex->volno];                                /* point to our volume */
786     for (plexno = 0; plexno < vol->plexes; plexno++) {
787         if (&PLEX[vol->plex[plexno]] == plex) {             /* us */
788             if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
789                 state |= volplex_onlyus;                    /* yes */
790         } else {
791             if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
792                 state |= volplex_otherup;                   /* and when they were up, they were up */
793             else
794                 state |= volplex_alldown;                   /* and when they were down, they were down */
795         }
796     }
797     return state;                                           /* and when they were only halfway up */
798 }                                                           /* they were neither up nor down */
799
800 /* Check if all bits b are set in a */
801 int allset(int a, int b);
802
803 int
804 allset(int a, int b)
805 {
806     return (a & b) == b;
807 }
808
809 /* Invalidate the subdisks belonging to a plex */
810 void
811 invalidate_subdisks(struct plex *plex, enum sdstate state)
812 {
813     int sdno;
814
815     for (sdno = 0; sdno < plex->subdisks; sdno++) {         /* for each subdisk */
816         struct sd *sd = &SD[plex->sdnos[sdno]];
817
818         switch (sd->state) {
819         case sd_unallocated:
820         case sd_uninit:
821         case sd_init:
822         case sd_initializing:
823         case sd_initialized:
824         case sd_empty:
825         case sd_obsolete:
826         case sd_stale:
827         case sd_crashed:
828         case sd_down:
829         case sd_referenced:
830             break;
831
832         case sd_reviving:
833         case sd_reborn:
834         case sd_up:
835             set_sd_state(plex->sdnos[sdno], state, setstate_force);
836         }
837     }
838 }
839
840 /*
841  * Start an object, in other words do what we can to get it up.
842  * This is called from vinumioctl (VINUMSTART).
843  * Return error indications via ioctl_reply
844  */
845 void
846 start_object(struct vinum_ioctl_msg *data)
847 {
848     int status;
849     int objindex = data->index;                             /* data gets overwritten */
850     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
851     enum setstateflags flags;
852
853     if (data->force != 0)                                   /* are we going to use force? */
854         flags = setstate_force;                             /* yes */
855     else
856         flags = setstate_none;                              /* no */
857
858     switch (data->type) {
859     case drive_object:
860         status = set_drive_state(objindex, drive_up, flags);
861         if (DRIVE[objindex].state != drive_up)              /* set status on whether we really did it */
862             ioctl_reply->error = EBUSY;
863         else
864             ioctl_reply->error = 0;
865         break;
866
867     case sd_object:
868         if (DRIVE[SD[objindex].driveno].state != drive_up) {
869             ioctl_reply->error = EIO;
870             strcpy(ioctl_reply->msg, "Drive is down");
871             return;
872         }
873         if (data->blocksize)
874             SD[objindex].revive_blocksize = data->blocksize;
875         if ((SD[objindex].state == sd_reviving)             /* reviving, */
876         ||(SD[objindex].state == sd_stale)) {               /* or stale, will revive */
877             SD[objindex].state = sd_reviving;               /* make sure we're reviving */
878             ioctl_reply->error = revive_block(objindex);    /* revive another block */
879             ioctl_reply->msg[0] = '\0';                     /* no comment */
880             return;
881         } else if (SD[objindex].state == sd_initializing) { /* initializing, */
882             if (data->blocksize)
883                 SD[objindex].init_blocksize = data->blocksize;
884             ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
885             ioctl_reply->msg[0] = '\0';                     /* no comment */
886             return;
887         }
888         status = set_sd_state(objindex, sd_up, flags);      /* set state */
889         if (status != EAGAIN) {                             /* not first revive or initialize, */
890             if (SD[objindex].state != sd_up)                /* set status on whether we really did it */
891                 ioctl_reply->error = EBUSY;
892             else
893                 ioctl_reply->error = 0;
894         } else
895             ioctl_reply->error = status;
896         break;
897
898     case plex_object:
899         status = set_plex_state(objindex, plex_up, flags);
900         if (PLEX[objindex].state != plex_up)                /* set status on whether we really did it */
901             ioctl_reply->error = EBUSY;
902         else
903             ioctl_reply->error = 0;
904         break;
905
906     case volume_object:
907         status = set_volume_state(objindex, volume_up, flags);
908         if (VOL[objindex].state != volume_up)               /* set status on whether we really did it */
909             ioctl_reply->error = EBUSY;
910         else
911             ioctl_reply->error = 0;
912         break;
913
914     default:
915         ioctl_reply->error = EINVAL;
916         strcpy(ioctl_reply->msg, "Invalid object type");
917         return;
918     }
919     /*
920      * There's no point in saying anything here:
921      * the userland program does it better
922      */
923     ioctl_reply->msg[0] = '\0';
924 }
925
926 /*
927  * Stop an object, in other words do what we can to get it down
928  * This is called from vinumioctl (VINUMSTOP).
929  * Return error indications via ioctl_reply.
930  */
931 void
932 stop_object(struct vinum_ioctl_msg *data)
933 {
934     int status = 1;
935     int objindex = data->index;                             /* save the number from change */
936     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
937
938     switch (data->type) {
939     case drive_object:
940         status = set_drive_state(objindex, drive_down, data->force);
941         break;
942
943     case sd_object:
944         status = set_sd_state(objindex, sd_down, data->force);
945         break;
946
947     case plex_object:
948         status = set_plex_state(objindex, plex_down, data->force);
949         break;
950
951     case volume_object:
952         status = set_volume_state(objindex, volume_down, data->force);
953         break;
954
955     default:
956         ioctl_reply->error = EINVAL;
957         strcpy(ioctl_reply->msg, "Invalid object type");
958         return;
959     }
960     ioctl_reply->msg[0] = '\0';
961     if (status == 0)                                        /* couldn't do it */
962         ioctl_reply->error = EBUSY;
963     else
964         ioctl_reply->error = 0;
965 }
966
967 /*
968  * VINUM_SETSTATE ioctl: set an object state.
969  * msg is the message passed by the user.
970  */
971 void
972 setstate(struct vinum_ioctl_msg *msg)
973 {
974     int sdno;
975     struct sd *sd;
976     struct plex *plex;
977     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
978
979     switch (msg->state) {
980     case object_down:
981         stop_object(msg);
982         break;
983
984     case object_initializing:
985         switch (msg->type) {
986         case sd_object:
987             sd = &SD[msg->index];
988             if ((msg->index >= vinum_conf.subdisks_allocated)
989                 || (sd->state <= sd_referenced)) {
990                 ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
991                 ioctl_reply->error = EFAULT;
992                 return;
993             }
994             set_sd_state(msg->index, sd_initializing, msg->force);
995             if (sd->state != sd_initializing) {
996                 strcpy(ioctl_reply->msg, "Can't set state");
997                 ioctl_reply->error = EBUSY;
998             } else
999                 ioctl_reply->error = 0;
1000             break;
1001
1002         case plex_object:
1003             plex = &PLEX[msg->index];
1004             if ((msg->index >= vinum_conf.plexes_allocated)
1005                 || (plex->state <= plex_unallocated)) {
1006                 ksprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
1007                 ioctl_reply->error = EFAULT;
1008                 return;
1009             }
1010             set_plex_state(msg->index, plex_initializing, msg->force);
1011             if (plex->state != plex_initializing) {
1012                 strcpy(ioctl_reply->msg, "Can't set state");
1013                 ioctl_reply->error = EBUSY;
1014             } else {
1015                 ioctl_reply->error = 0;
1016                 for (sdno = 0; sdno < plex->subdisks; sdno++) {
1017                     sd = &SD[plex->sdnos[sdno]];
1018                     set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
1019                     if (sd->state != sd_initializing) {
1020                         strcpy(ioctl_reply->msg, "Can't set state");
1021                         ioctl_reply->error = EBUSY;
1022                         break;
1023                     }
1024                 }
1025             }
1026             break;
1027
1028         default:
1029             strcpy(ioctl_reply->msg, "Invalid object");
1030             ioctl_reply->error = EINVAL;
1031         }
1032         break;
1033
1034     case object_initialized:
1035         if (msg->type == sd_object) {
1036             sd = &SD[msg->index];
1037             if ((msg->index >= vinum_conf.subdisks_allocated)
1038                 || (sd->state <= sd_referenced)) {
1039                 ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
1040                 ioctl_reply->error = EFAULT;
1041                 return;
1042             }
1043             set_sd_state(msg->index, sd_initialized, msg->force);
1044             if (sd->state != sd_initializing) {
1045                 strcpy(ioctl_reply->msg, "Can't set state");
1046                 ioctl_reply->error = EBUSY;
1047             } else
1048                 ioctl_reply->error = 0;
1049         } else {
1050             strcpy(ioctl_reply->msg, "Invalid object");
1051             ioctl_reply->error = EINVAL;
1052         }
1053         break;
1054
1055     case object_up:
1056         start_object(msg);
1057     }
1058 }
1059
1060 /*
1061  * Brute force set state function.  Don't look at
1062  * any dependencies, just do it.  This is mainly
1063  * intended for testing and recovery.
1064  */
1065 void
1066 setstate_by_force(struct vinum_ioctl_msg *msg)
1067 {
1068     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
1069
1070     switch (msg->type) {
1071     case drive_object:
1072         DRIVE[msg->index].state = msg->state;
1073         break;
1074
1075     case sd_object:
1076         SD[msg->index].state = msg->state;
1077         break;
1078
1079     case plex_object:
1080         PLEX[msg->index].state = msg->state;
1081         break;
1082
1083     case volume_object:
1084         VOL[msg->index].state = msg->state;
1085         break;
1086
1087     default:
1088         break;
1089     }
1090     ioctl_reply->error = 0;
1091 }
1092 /* Local Variables: */
1093 /* fill-column: 50 */
1094 /* End: */