Merge branch 'vendor/OPENPAM'
[dragonfly.git] / sys / dev / raid / vinum / vinumstate.c
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *      Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $
41  * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $
42  */
43
44 #include "vinumhdr.h"
45 #include "request.h"
46
47 /* Update drive state */
48 /* Return 1 if the state changes, otherwise 0 */
49 int
50 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
51 {
52     union daemoninfo di;
53     struct drive *drive = &DRIVE[driveno];
54     int oldstate = drive->state;
55     int sdno;
56
57     if (drive->state == drive_unallocated)                  /* no drive to do anything with, */
58         return 0;
59
60     if (newstate == oldstate)                               /* don't change it if it's not different */
61         return 1;                                           /* all OK */
62     if ((newstate == drive_down)                            /* the drive's going down */
63     &&(!(flags & setstate_force))
64         && (drive->opencount != 0))                         /* we can't do it */
65         return 0;                                           /* don't do it */
66     drive->state = newstate;                                /* set the state */
67     if (drive->label.name[0] != '\0')                       /* we have a name, */
68         log(LOG_INFO,
69             "vinum: drive %s is %s\n",
70             drive->label.name,
71             drive_state(drive->state));
72     if (drive->state != oldstate) {                         /* state has changed */
73         for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
74             if ((SD[sdno].state >= sd_referenced)
75                 && (SD[sdno].driveno == driveno))           /* belongs to this drive */
76                 update_sd_state(sdno);                      /* update the state */
77         }
78     }
79     if (newstate == drive_up) {                             /* want to bring it up */
80         if ((drive->flags & VF_OPEN) == 0)                  /* should be open, but we're not */
81             init_drive(drive, 1);                           /* which changes the state again */
82     } else {                                                /* taking it down or worse */
83         di.drive = drive;
84         queue_daemon_request(daemonrq_closedrive, di);      /* get the daemon to close it */
85     }
86     if ((flags & setstate_configuring) == 0)                /* configuring? */
87         save_config();                                      /* no: save the updated configuration now */
88     return 1;
89 }
90
91 /*
92  * Try to set the subdisk state.  Return 1 if state changed to
93  * what we wanted, -1 if it changed to something else, and 0
94  * if no change.
95  *
96  * This routine is called both from the user (up, down states only)
97  * and internally.
98  *
99  * The setstate_force bit in the flags enables the state change even
100  * if it could be dangerous to data consistency.  It shouldn't allow
101  * nonsense.
102  */
103 int
104 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
105 {
106     struct sd *sd = &SD[sdno];
107     struct plex *plex;
108     struct volume *vol;
109     int oldstate = sd->state;
110     int status = 1;                                         /* status to return */
111
112     if (newstate == oldstate)                               /* already there, */
113         return 1;
114     else if (sd->state == sd_unallocated)                   /* no subdisk to do anything with, */
115         return 0;                                           /* can't do it */
116
117     if (sd->driveoffset < 0) {                              /* not allocated space */
118         sd->state = sd_down;
119         if (newstate != sd_down) {
120             if (sd->plexno >= 0)
121                 sdstatemap(&PLEX[sd->plexno]);              /* count up subdisks */
122             return -1;
123         }
124     } else {                                                /* space allocated */
125         switch (newstate) {
126         case sd_down:                                       /* take it down? */
127             /*
128              * If we're attached to a plex, and we're
129              * not reborn, we won't go down without
130              * use of force.
131              */
132             if (!(flags & setstate_force)
133                 && (sd->plexno >= 0)
134                 && (sd->state != sd_reborn))
135                 return 0;                                   /* don't do it */
136             break;
137
138         case sd_initialized:
139             if ((sd->state == sd_initializing)              /* we were initializing */
140             ||(flags & setstate_force))                     /* or we forced it */
141                 break;
142             return 0;                                       /* can't do it otherwise */
143
144         case sd_up:
145             if (DRIVE[sd->driveno].state != drive_up)       /* can't bring the sd up if the drive isn't, */
146                 return 0;                                   /* not even by force */
147             if (flags & setstate_force)                     /* forcing it, */
148                 break;                                      /* just do it, and damn the consequences */
149             switch (sd->state) {
150                 /*
151                  * Perform the necessary tests.  To allow
152                  * the state transition, just break out of
153                  * the switch.
154                  */
155             case sd_crashed:
156             case sd_reborn:
157             case sd_down:                                   /* been down, no data lost */
158                 /*
159                  * If we're associated with a plex, and
160                  * the plex isn't up, or we're the only
161                  * subdisk in the plex, we can do it.
162                  */
163                 if ((sd->plexno >= 0)
164                     && (((PLEX[sd->plexno].state < plex_firstup)
165                             || (PLEX[sd->plexno].subdisks > 1))))
166                     break;                                  /* do it */
167                 if (oldstate != sd_reborn) {
168                     sd->state = sd_reborn;                  /* here it is again */
169                     log(LOG_INFO,
170                         "vinum: %s is %s, not %s\n",
171                         sd->name,
172                         sd_state(sd->state),
173                         sd_state(newstate));
174                 }
175                 status = -1;
176                 break;
177
178             case sd_init:                                   /* brand new */
179                 if (flags & setstate_configuring)           /* we're doing this while configuring */
180                     break;
181                 /* otherwise it's like being empty */
182                 /* FALLTHROUGH */
183
184             case sd_empty:
185             case sd_initialized:
186                 /*
187                  * If we're not part of a plex, or the
188                  * plex is not part of a volume with other
189                  * plexes which are up, we can come up
190                  * without being inconsistent.
191                  *
192                  * If we're part of a parity plex, we'll
193                  * come up if the caller uses force.  This
194                  * is the way we bring them up after
195                  * initialization.
196                  */
197                 if ((sd->plexno < 0)
198                     || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
199                     || (isparity((&PLEX[sd->plexno]))
200                         && (flags & setstate_force)))
201                     break;
202
203                 /* Otherwise it's just out of date */
204                 /* FALLTHROUGH */
205
206             case sd_stale:                                  /* out of date info, need reviving */
207             case sd_obsolete:
208                 /*
209
210                  * 1.  If the subdisk is not part of a
211                  *     plex, bring it up, don't revive.
212                  *
213                  * 2.  If the subdisk is part of a
214                  *     one-plex volume or an unattached
215                  *     plex, and it's not RAID-4 or
216                  *     RAID-5, we *can't revive*.  The
217                  *     subdisk doesn't change its state.
218                  *
219                  * 3.  If the subdisk is part of a
220                  *     one-plex volume or an unattached
221                  *     plex, and it's RAID-4 or RAID-5,
222                  *     but more than one subdisk is down,
223                  *     we *still can't revive*.  The
224                  *     subdisk doesn't change its state.
225                  *
226                  * 4.  If the subdisk is part of a
227                  *     multi-plex volume, we'll change to
228                  *     reviving and let the revive
229                  *     routines find out whether it will
230                  *     work or not.  If they don't, the
231                  *     revive stops with an error message,
232                  *     but the state doesn't change
233                  *     (FWIW).
234                  */
235                 if (sd->plexno < 0)                         /* no plex associated, */
236                     break;                                  /* bring it up */
237                 plex = &PLEX[sd->plexno];
238                 if (plex->volno >= 0)                       /* have a volume */
239                     vol = &VOL[plex->volno];
240                 else
241                     vol = NULL;
242                 /*
243                  * We can't do it if:
244                  *
245                  * 1: we don't have a volume
246                  * 2: we're the only plex in the volume
247                  * 3: we're a RAID-4 or RAID-5 plex, and
248                  *    more than one subdisk is down.
249                  */
250                 if (((vol == NULL)
251                         || (vol->plexes == 1))
252                     && ((!isparity(plex))
253                         || (plex->sddowncount > 1))) {
254                     if (sd->state == sd_initializing)       /* it's finished initializing  */
255                         sd->state = sd_initialized;
256                     else
257                         return 0;                           /* can't do it */
258                 } else {
259                     sd->state = sd_reviving;                /* put in reviving state */
260                     sd->revived = 0;                        /* nothing done yet */
261                     status = EAGAIN;                        /* need to repeat */
262                 }
263                 break;
264
265             case sd_reviving:
266                 if (flags & setstate_force)                 /* insist, */
267                     break;
268                 return EAGAIN;                              /* no, try again */
269
270             default:                                        /* can't do it */
271                 /*
272                  * There's no way to bring subdisks up directly from
273                  * other states.  First they need to be initialized
274                  * or revived.
275                  */
276                 return 0;
277             }
278             break;
279
280         default:                                            /* other ones, only internal with force */
281             if ((flags & setstate_force) == 0)              /* no force?  What's this? */
282                 return 0;                                   /* don't do it */
283         }
284     }
285     if (status == 1) {                                      /* we can do it, */
286         sd->state = newstate;
287         if (flags & setstate_force)
288             log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
289         else
290             log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
291     } else                                                  /* we don't get here with status 0 */
292         log(LOG_INFO,
293             "vinum: %s is %s, not %s\n",
294             sd->name,
295             sd_state(sd->state),
296             sd_state(newstate));
297     if (sd->plexno >= 0)                                    /* we belong to a plex */
298         update_plex_state(sd->plexno);                      /* update plex state */
299     if ((flags & setstate_configuring) == 0)                /* save config now */
300         save_config();
301     return status;
302 }
303
304 /*
305  * Set the state of a plex dependent on its subdisks.
306  * This time round, we'll let plex state just reflect
307  * aggregate subdisk state, so this becomes an order of
308  * magnitude less complicated.  In particular, ignore
309  * the requested state.
310  */
311 int
312 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
313 {
314     struct plex *plex;                                      /* point to our plex */
315     enum plexstate oldstate;
316     enum volplexstate vps;                                  /* how do we compare with the other plexes? */
317
318     plex = &PLEX[plexno];                                   /* point to our plex */
319     oldstate = plex->state;
320
321     /* If the plex isn't allocated, we can't do it. */
322     if (plex->state == plex_unallocated)
323         return 0;
324
325     /*
326      * If it's already in the the state we want,
327      * and it's not up, just return.  If it's up,
328      * we still need to do some housekeeping.
329      */
330     if ((state == oldstate)
331         && (state != plex_up))
332         return 1;
333     vps = vpstate(plex);                                    /* how do we compare with the other plexes? */
334     switch (state) {
335         /*
336          * We can't bring the plex up, even by force,
337          * unless it's ready.  update_plex_state
338          * checks that.
339          */
340     case plex_up:                                           /* bring the plex up */
341         update_plex_state(plex->plexno);                    /* it'll come up if it can */
342         break;
343
344     case plex_down:                                         /* want to take it down */
345         /*
346          * If we're the only one, or the only one
347          * which is up, we need force to do it.
348          */
349         if (((vps == volplex_onlyus)
350                 || (vps == volplex_onlyusup))
351             && (!(flags & setstate_force)))
352             return 0;                                       /* can't do it */
353         plex->state = state;                                /* do it */
354         invalidate_subdisks(plex, sd_down);                 /* and down all up subdisks */
355         break;
356
357         /*
358          * This is only requested internally.
359          * Trust ourselves
360          */
361     case plex_faulty:
362         plex->state = state;                                /* do it */
363         invalidate_subdisks(plex, sd_crashed);              /* and crash all up subdisks */
364         break;
365
366     case plex_initializing:
367         /* XXX consider what safeguards we need here */
368         if ((flags & setstate_force) == 0)
369             return 0;
370         plex->state = state;                                /* do it */
371         break;
372
373         /* What's this? */
374     default:
375         return 0;
376     }
377     if (plex->state != oldstate)                            /* we've changed, */
378         log(LOG_INFO,                                       /* tell them about it */
379             "vinum: %s is %s\n",
380             plex->name,
381             plex_state(plex->state));
382     /*
383      * Now see what we have left, and whether
384      * we're taking the volume down
385      */
386     if (plex->volno >= 0)                                   /* we have a volume */
387         update_volume_state(plex->volno);                   /* update its state */
388     if ((flags & setstate_configuring) == 0)                /* save config now */
389         save_config();                                      /* yes: save the updated configuration */
390     return 1;
391 }
392
393 /* Update the state of a plex dependent on its plexes. */
394 int
395 set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
396 {
397     struct volume *vol = &VOL[volno];                       /* point to our volume */
398
399     if (vol->state == volume_unallocated)                   /* no volume to do anything with, */
400         return 0;
401     if (vol->state == state)                                /* we're there already */
402         return 1;
403
404     if (state == volume_up)                                 /* want to come up */
405         update_volume_state(volno);
406     else if (state == volume_down) {                        /* want to go down */
407         if (((vol->flags & VF_OPEN) == 0)                   /* not open */
408         ||((flags & setstate_force) != 0)) {                /* or we're forcing */
409             vol->state = volume_down;
410             log(LOG_INFO,
411                 "vinum: volume %s is %s\n",
412                 vol->name,
413                 volume_state(vol->state));
414             if ((flags & setstate_configuring) == 0)        /* save config now */
415                 save_config();                              /* yes: save the updated configuration */
416             return 1;
417         }
418     }
419     return 0;                                               /* no change */
420 }
421
422 /* Set the state of a subdisk based on its environment */
423 void
424 update_sd_state(int sdno)
425 {
426     struct sd *sd;
427     struct drive *drive;
428     enum sdstate oldstate;
429
430     sd = &SD[sdno];
431     oldstate = sd->state;
432     drive = &DRIVE[sd->driveno];
433
434     if (drive->state == drive_up) {
435         switch (sd->state) {
436         case sd_down:
437         case sd_crashed:
438             sd->state = sd_reborn;                          /* back up again with no loss */
439             break;
440
441         default:
442             break;
443         }
444     } else {                                                /* down or worse */
445         switch (sd->state) {
446         case sd_up:
447         case sd_reborn:
448         case sd_reviving:
449         case sd_empty:
450             sd->state = sd_crashed;                         /* lost our drive */
451             break;
452
453         default:
454             break;
455         }
456     }
457     if (sd->state != oldstate)                              /* state has changed, */
458         log(LOG_INFO,                                       /* say so */
459             "vinum: %s is %s\n",
460             sd->name,
461             sd_state(sd->state));
462     if (sd->plexno >= 0)                                    /* we're part of a plex, */
463         update_plex_state(sd->plexno);                      /* update its state */
464 }
465
466 /*
467  * Force a plex and all its subdisks
468  * into an 'up' state.  This is a helper
469  * for update_plex_state.
470  */
471 void
472 forceup(int plexno)
473 {
474     struct plex *plex;
475     int sdno;
476
477     plex = &PLEX[plexno];                                   /* point to the plex */
478     plex->state = plex_up;                                  /* and bring it up */
479
480     /* change the subdisks to up state */
481     for (sdno = 0; sdno < plex->subdisks; sdno++) {
482         SD[plex->sdnos[sdno]].state = sd_up;
483         log(LOG_INFO,                                       /* tell them about it */
484             "vinum: %s is up\n",
485             SD[plex->sdnos[sdno]].name);
486     }
487 }
488
489 /* Set the state of a plex based on its environment */
490 void
491 update_plex_state(int plexno)
492 {
493     struct plex *plex;                                      /* point to our plex */
494     enum plexstate oldstate;
495     enum sdstates statemap;                                 /* get a map of the subdisk states */
496     enum volplexstate vps;                                  /* how do we compare with the other plexes? */
497
498     plex = &PLEX[plexno];                                   /* point to our plex */
499     oldstate = plex->state;
500     statemap = sdstatemap(plex);                            /* get a map of the subdisk states */
501     vps = vpstate(plex);                                    /* how do we compare with the other plexes? */
502
503     if (statemap & sd_initstate)                            /* something initializing? */
504         plex->state = plex_initializing;                    /* yup, that makes the plex the same */
505     else if (statemap == sd_upstate)
506         /*
507          * All the subdisks are up.  This also means that
508          * they are consistent, so we can just bring
509          * the plex up
510          */
511         plex->state = plex_up;
512     else if (isparity(plex)                                 /* RAID-4 or RAID-5 plex */
513     &&(plex->sddowncount == 1))                             /* and exactly one subdisk down */
514         plex->state = plex_degraded;                        /* limping a bit */
515     else if (((statemap & ~sd_downstate) == sd_emptystate)  /* all subdisks empty */
516     ||((statemap & ~sd_downstate)
517             == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
518         if ((vps & volplex_otherup) == 0) {                 /* no other plex is up */
519             struct volume *vol = &VOL[plex->volno];         /* possible volume to which it points */
520
521             /*
522              * If we're a striped or concat plex
523              * associated with a volume, none of whose
524              * plexes are up, and we're new and untested,
525              * and the volume has the setupstate bit set,
526              * we can pretend to be in a consistent state.
527              *
528              * We need to do this in one swell foop: on
529              * the next call we will no longer be just
530              * empty.
531              *
532              * This code assumes that all the other plexes
533              * are also capable of coming up (i.e. all the
534              * sds are up), but that's OK: we'll come back
535              * to this function for the remaining plexes
536              * in the volume.
537              */
538             if ((plex->state == plex_init)
539                 && (plex->volno >= 0)
540                 && (vol->flags & VF_CONFIG_SETUPSTATE)) {
541                 for (plexno = 0; plexno < vol->plexes; plexno++)
542                     forceup(VOL[plex->volno].plex[plexno]);
543             } else if ((statemap == sd_initializedstate)    /* if it's initialized (not empty) */
544 ||(plex->organization == plex_concat)                       /* and we're not RAID-4 or RAID-5 */
545             ||(plex->organization == plex_striped))
546                 forceup(plexno);                            /* we'll do it */
547             /*
548              * This leaves a case where things don't get
549              * done: the plex is RAID-4 or RAID-5, and
550              * the subdisks are all empty.  They need to
551              * be initialized first.
552              */
553         } else {
554             if (statemap == sd_upstate)                     /* all subdisks up */
555                 plex->state = plex_up;                      /* we can come up too */
556             else
557                 plex->state = plex_faulty;
558         }
559     } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
560         plex->state = plex_flaky;
561     else if (statemap & (sd_upstate | sd_rebornstate))      /* some up or reborn */
562         plex->state = plex_corrupt;                         /* corrupt */
563     else if (statemap & (sd_initstate | sd_emptystate))     /* some subdisks empty or initializing */
564         plex->state = plex_initializing;
565     else                                                    /* nothing at all up */
566         plex->state = plex_faulty;
567
568     if (plex->state != oldstate)                            /* state has changed, */
569         log(LOG_INFO,                                       /* tell them about it */
570             "vinum: %s is %s\n",
571             plex->name,
572             plex_state(plex->state));
573     if (plex->volno >= 0)                                   /* we're part of a volume, */
574         update_volume_state(plex->volno);                   /* update its state */
575 }
576
577 /* Set volume state based on its components */
578 void
579 update_volume_state(int volno)
580 {
581     struct volume *vol;                                     /* our volume */
582     int plexno;
583     enum volumestate oldstate;
584
585     vol = &VOL[volno];                                      /* point to our volume */
586     oldstate = vol->state;
587
588     for (plexno = 0; plexno < vol->plexes; plexno++) {
589         struct plex *plex = &PLEX[vol->plex[plexno]];       /* point to the plex */
590         if (plex->state >= plex_corrupt) {                  /* something accessible, */
591             vol->state = volume_up;
592             break;
593         }
594     }
595     if (plexno == vol->plexes)                              /* didn't find an up plex */
596         vol->state = volume_down;
597
598     if (vol->state != oldstate) {                           /* state changed */
599         log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
600         save_config();                                      /* save the updated configuration */
601     }
602 }
603
604 /*
605  * Called from request routines when they find
606  * a subdisk which is not kosher.  Decide whether
607  * it warrants changing the state.  Return
608  * REQUEST_DOWN if we can't use the subdisk,
609  * REQUEST_OK if we can.
610  */
611 /*
612  * A prior version of this function checked the plex
613  * state as well.  At the moment, consider plex states
614  * information for the user only.  We'll ignore them
615  * and use the subdisk state only.  The last version of
616  * this file with the old logic was 2.7. XXX
617  */
618 enum requeststatus
619 checksdstate(struct sd *sd, struct request *rq, vinum_off_t diskaddr, vinum_off_t diskend)
620 {
621     struct plex *plex = &PLEX[sd->plexno];
622     int writeop = (rq->bio->bio_buf->b_cmd != BUF_CMD_READ);        /* note if we're writing */
623
624     switch (sd->state) {
625         /* We shouldn't get called if the subdisk is up */
626     case sd_up:
627         return REQUEST_OK;
628
629     case sd_reviving:
630         /*
631          * Access to a reviving subdisk depends on the
632          * organization of the plex:
633          *
634          * - If it's concatenated, access the subdisk
635          *   up to its current revive point.  If we
636          *   want to write to the subdisk overlapping
637          *   the current revive block, set the
638          *   conflict flag in the request, asking the
639          *   caller to put the request on the wait
640          *   list, which will be attended to by
641          *   revive_block when it's done.
642          * - if it's striped, we can't do it (we could
643          *   do some hairy calculations, but it's
644          *   unlikely to work).
645          * - if it's RAID-4 or RAID-5, we can do it as
646          *   long as only one subdisk is down
647          */
648         if (plex->organization == plex_striped)             /* plex is striped, */
649             return REQUEST_DOWN;
650
651         else if (isparity(plex)) {                          /* RAID-4 or RAID-5 plex */
652             if (plex->sddowncount > 1)                      /* with more than one sd down, */
653                 return REQUEST_DOWN;
654             else
655                 /*
656                  * XXX We shouldn't do this if we can find a
657                  * better way.  Check the other plexes
658                  * first, and return a DOWN if another
659                  * plex will do it better
660                  */
661                 return REQUEST_OK;                          /* OK, we'll find a way */
662         }
663         if (diskaddr > (sd->revived
664                 + sd->plexoffset
665                 + (sd->revive_blocksize >> DEV_BSHIFT)))    /* we're beyond the end */
666             return REQUEST_DOWN;
667         else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
668             if (writeop) {
669                 rq->flags |= XFR_REVIVECONFLICT;            /* note a potential conflict */
670                 rq->sdno = sd->sdno;                        /* and which sd last caused it */
671             } else
672                 return REQUEST_DOWN;
673         }
674         return REQUEST_OK;
675
676     case sd_reborn:
677         if (writeop)
678             return REQUEST_OK;                              /* always write to a reborn disk */
679         else                                                /* don't allow a read */
680             /*
681                * Handle the mapping.  We don't want to reject
682                * a read request to a reborn subdisk if that's
683                * all we have. XXX
684              */
685             return REQUEST_DOWN;
686
687     case sd_down:
688         if (writeop)                                        /* writing to a consistent down disk */
689             set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
690         return REQUEST_DOWN;
691
692     case sd_crashed:
693         if (writeop)                                        /* writing to a consistent down disk */
694             set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
695         return REQUEST_DOWN;
696
697     default:
698         return REQUEST_DOWN;
699     }
700 }
701
702 /* return a state map for the subdisks of a plex */
703 enum sdstates
704 sdstatemap(struct plex *plex)
705 {
706     int sdno;
707     enum sdstates statemap = 0;                             /* note the states we find */
708
709     plex->sddowncount = 0;                                  /* no subdisks down yet */
710     for (sdno = 0; sdno < plex->subdisks; sdno++) {
711         struct sd *sd = &SD[plex->sdnos[sdno]];             /* point to the subdisk */
712
713         switch (sd->state) {
714         case sd_empty:
715             statemap |= sd_emptystate;
716             (plex->sddowncount)++;                          /* another unusable subdisk */
717             break;
718
719         case sd_init:
720             statemap |= sd_initstate;
721             (plex->sddowncount)++;                          /* another unusable subdisk */
722             break;
723
724         case sd_down:
725             statemap |= sd_downstate;
726             (plex->sddowncount)++;                          /* another unusable subdisk */
727             break;
728
729         case sd_crashed:
730             statemap |= sd_crashedstate;
731             (plex->sddowncount)++;                          /* another unusable subdisk */
732             break;
733
734         case sd_obsolete:
735             statemap |= sd_obsoletestate;
736             (plex->sddowncount)++;                          /* another unusable subdisk */
737             break;
738
739         case sd_stale:
740             statemap |= sd_stalestate;
741             (plex->sddowncount)++;                          /* another unusable subdisk */
742             break;
743
744         case sd_reborn:
745             statemap |= sd_rebornstate;
746             break;
747
748         case sd_up:
749             statemap |= sd_upstate;
750             break;
751
752         case sd_initializing:
753             statemap |= sd_initstate;
754             (plex->sddowncount)++;                          /* another unusable subdisk */
755             break;
756
757         case sd_initialized:
758             statemap |= sd_initializedstate;
759             (plex->sddowncount)++;                          /* another unusable subdisk */
760             break;
761
762         case sd_unallocated:
763         case sd_uninit:
764         case sd_reviving:
765         case sd_referenced:
766             statemap |= sd_otherstate;
767             (plex->sddowncount)++;                          /* another unusable subdisk */
768         }
769     }
770     return statemap;
771 }
772
773 /* determine the state of the volume relative to this plex */
774 enum volplexstate
775 vpstate(struct plex *plex)
776 {
777     struct volume *vol;
778     enum volplexstate state = volplex_onlyusdown;           /* state to return */
779     int plexno;
780
781     if (plex->volno < 0) {                                  /* not associated with a volume */
782         if (plex->state > plex_degraded)
783             return volplex_onlyus;                          /* just us */
784         else
785             return volplex_onlyusdown;                      /* assume the worst */
786     }
787     vol = &VOL[plex->volno];                                /* point to our volume */
788     for (plexno = 0; plexno < vol->plexes; plexno++) {
789         if (&PLEX[vol->plex[plexno]] == plex) {             /* us */
790             if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
791                 state |= volplex_onlyus;                    /* yes */
792         } else {
793             if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
794                 state |= volplex_otherup;                   /* and when they were up, they were up */
795             else
796                 state |= volplex_alldown;                   /* and when they were down, they were down */
797         }
798     }
799     return state;                                           /* and when they were only halfway up */
800 }                                                           /* they were neither up nor down */
801
802 /* Check if all bits b are set in a */
803 int allset(int a, int b);
804
805 int
806 allset(int a, int b)
807 {
808     return (a & b) == b;
809 }
810
811 /* Invalidate the subdisks belonging to a plex */
812 void
813 invalidate_subdisks(struct plex *plex, enum sdstate state)
814 {
815     int sdno;
816
817     for (sdno = 0; sdno < plex->subdisks; sdno++) {         /* for each subdisk */
818         struct sd *sd = &SD[plex->sdnos[sdno]];
819
820         switch (sd->state) {
821         case sd_unallocated:
822         case sd_uninit:
823         case sd_init:
824         case sd_initializing:
825         case sd_initialized:
826         case sd_empty:
827         case sd_obsolete:
828         case sd_stale:
829         case sd_crashed:
830         case sd_down:
831         case sd_referenced:
832             break;
833
834         case sd_reviving:
835         case sd_reborn:
836         case sd_up:
837             set_sd_state(plex->sdnos[sdno], state, setstate_force);
838         }
839     }
840 }
841
842 /*
843  * Start an object, in other words do what we can to get it up.
844  * This is called from vinumioctl (VINUMSTART).
845  * Return error indications via ioctl_reply
846  */
847 void
848 start_object(struct vinum_ioctl_msg *data)
849 {
850     int status;
851     int objindex = data->index;                             /* data gets overwritten */
852     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
853     enum setstateflags flags;
854
855     if (data->force != 0)                                   /* are we going to use force? */
856         flags = setstate_force;                             /* yes */
857     else
858         flags = setstate_none;                              /* no */
859
860     switch (data->type) {
861     case drive_object:
862         status = set_drive_state(objindex, drive_up, flags);
863         if (DRIVE[objindex].state != drive_up)              /* set status on whether we really did it */
864             ioctl_reply->error = EBUSY;
865         else
866             ioctl_reply->error = 0;
867         break;
868
869     case sd_object:
870         if (DRIVE[SD[objindex].driveno].state != drive_up) {
871             ioctl_reply->error = EIO;
872             strcpy(ioctl_reply->msg, "Drive is down");
873             return;
874         }
875         if (data->blocksize)
876             SD[objindex].revive_blocksize = data->blocksize;
877         if ((SD[objindex].state == sd_reviving)             /* reviving, */
878         ||(SD[objindex].state == sd_stale)) {               /* or stale, will revive */
879             SD[objindex].state = sd_reviving;               /* make sure we're reviving */
880             ioctl_reply->error = revive_block(objindex);    /* revive another block */
881             ioctl_reply->msg[0] = '\0';                     /* no comment */
882             return;
883         } else if (SD[objindex].state == sd_initializing) { /* initializing, */
884             if (data->blocksize)
885                 SD[objindex].init_blocksize = data->blocksize;
886             ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
887             ioctl_reply->msg[0] = '\0';                     /* no comment */
888             return;
889         }
890         status = set_sd_state(objindex, sd_up, flags);      /* set state */
891         if (status != EAGAIN) {                             /* not first revive or initialize, */
892             if (SD[objindex].state != sd_up)                /* set status on whether we really did it */
893                 ioctl_reply->error = EBUSY;
894             else
895                 ioctl_reply->error = 0;
896         } else
897             ioctl_reply->error = status;
898         break;
899
900     case plex_object:
901         status = set_plex_state(objindex, plex_up, flags);
902         if (PLEX[objindex].state != plex_up)                /* set status on whether we really did it */
903             ioctl_reply->error = EBUSY;
904         else
905             ioctl_reply->error = 0;
906         break;
907
908     case volume_object:
909         status = set_volume_state(objindex, volume_up, flags);
910         if (VOL[objindex].state != volume_up)               /* set status on whether we really did it */
911             ioctl_reply->error = EBUSY;
912         else
913             ioctl_reply->error = 0;
914         break;
915
916     default:
917         ioctl_reply->error = EINVAL;
918         strcpy(ioctl_reply->msg, "Invalid object type");
919         return;
920     }
921     /*
922      * There's no point in saying anything here:
923      * the userland program does it better
924      */
925     ioctl_reply->msg[0] = '\0';
926 }
927
928 /*
929  * Stop an object, in other words do what we can to get it down
930  * This is called from vinumioctl (VINUMSTOP).
931  * Return error indications via ioctl_reply.
932  */
933 void
934 stop_object(struct vinum_ioctl_msg *data)
935 {
936     int status = 1;
937     int objindex = data->index;                             /* save the number from change */
938     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
939
940     switch (data->type) {
941     case drive_object:
942         status = set_drive_state(objindex, drive_down, data->force);
943         break;
944
945     case sd_object:
946         status = set_sd_state(objindex, sd_down, data->force);
947         break;
948
949     case plex_object:
950         status = set_plex_state(objindex, plex_down, data->force);
951         break;
952
953     case volume_object:
954         status = set_volume_state(objindex, volume_down, data->force);
955         break;
956
957     default:
958         ioctl_reply->error = EINVAL;
959         strcpy(ioctl_reply->msg, "Invalid object type");
960         return;
961     }
962     ioctl_reply->msg[0] = '\0';
963     if (status == 0)                                        /* couldn't do it */
964         ioctl_reply->error = EBUSY;
965     else
966         ioctl_reply->error = 0;
967 }
968
969 /*
970  * VINUM_SETSTATE ioctl: set an object state.
971  * msg is the message passed by the user.
972  */
973 void
974 setstate(struct vinum_ioctl_msg *msg)
975 {
976     int sdno;
977     struct sd *sd;
978     struct plex *plex;
979     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
980
981     switch (msg->state) {
982     case object_down:
983         stop_object(msg);
984         break;
985
986     case object_initializing:
987         switch (msg->type) {
988         case sd_object:
989             sd = &SD[msg->index];
990             if ((msg->index >= vinum_conf.subdisks_allocated)
991                 || (sd->state <= sd_referenced)) {
992                 ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
993                 ioctl_reply->error = EFAULT;
994                 return;
995             }
996             set_sd_state(msg->index, sd_initializing, msg->force);
997             if (sd->state != sd_initializing) {
998                 strcpy(ioctl_reply->msg, "Can't set state");
999                 ioctl_reply->error = EBUSY;
1000             } else
1001                 ioctl_reply->error = 0;
1002             break;
1003
1004         case plex_object:
1005             plex = &PLEX[msg->index];
1006             if ((msg->index >= vinum_conf.plexes_allocated)
1007                 || (plex->state <= plex_unallocated)) {
1008                 ksprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
1009                 ioctl_reply->error = EFAULT;
1010                 return;
1011             }
1012             set_plex_state(msg->index, plex_initializing, msg->force);
1013             if (plex->state != plex_initializing) {
1014                 strcpy(ioctl_reply->msg, "Can't set state");
1015                 ioctl_reply->error = EBUSY;
1016             } else {
1017                 ioctl_reply->error = 0;
1018                 for (sdno = 0; sdno < plex->subdisks; sdno++) {
1019                     sd = &SD[plex->sdnos[sdno]];
1020                     set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
1021                     if (sd->state != sd_initializing) {
1022                         strcpy(ioctl_reply->msg, "Can't set state");
1023                         ioctl_reply->error = EBUSY;
1024                         break;
1025                     }
1026                 }
1027             }
1028             break;
1029
1030         default:
1031             strcpy(ioctl_reply->msg, "Invalid object");
1032             ioctl_reply->error = EINVAL;
1033         }
1034         break;
1035
1036     case object_initialized:
1037         if (msg->type == sd_object) {
1038             sd = &SD[msg->index];
1039             if ((msg->index >= vinum_conf.subdisks_allocated)
1040                 || (sd->state <= sd_referenced)) {
1041                 ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
1042                 ioctl_reply->error = EFAULT;
1043                 return;
1044             }
1045             set_sd_state(msg->index, sd_initialized, msg->force);
1046             if (sd->state != sd_initializing) {
1047                 strcpy(ioctl_reply->msg, "Can't set state");
1048                 ioctl_reply->error = EBUSY;
1049             } else
1050                 ioctl_reply->error = 0;
1051         } else {
1052             strcpy(ioctl_reply->msg, "Invalid object");
1053             ioctl_reply->error = EINVAL;
1054         }
1055         break;
1056
1057     case object_up:
1058         start_object(msg);
1059     }
1060 }
1061
1062 /*
1063  * Brute force set state function.  Don't look at
1064  * any dependencies, just do it.  This is mainly
1065  * intended for testing and recovery.
1066  */
1067 void
1068 setstate_by_force(struct vinum_ioctl_msg *msg)
1069 {
1070     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
1071
1072     switch (msg->type) {
1073     case drive_object:
1074         DRIVE[msg->index].state = msg->state;
1075         break;
1076
1077     case sd_object:
1078         SD[msg->index].state = msg->state;
1079         break;
1080
1081     case plex_object:
1082         PLEX[msg->index].state = msg->state;
1083         break;
1084
1085     case volume_object:
1086         VOL[msg->index].state = msg->state;
1087         break;
1088
1089     default:
1090         break;
1091     }
1092     ioctl_reply->error = 0;
1093 }
1094 /* Local Variables: */
1095 /* fill-column: 50 */
1096 /* End: */