Change the kernel dev_t, representing a pointer to a specinfo structure,
[dragonfly.git] / sys / kern / kern_shutdown.c
index 06cdd7f..cea484e 100644 (file)
@@ -37,7 +37,7 @@
  *
  *     @(#)kern_shutdown.c     8.3 (Berkeley) 1/21/94
  * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $
- * $DragonFly: src/sys/kern/kern_shutdown.c,v 1.19 2005/04/19 17:54:42 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_shutdown.c,v 1.36 2006/09/10 01:26:39 dillon Exp $
  */
 
 #include "opt_ddb.h"
@@ -64,6 +64,7 @@
 #include <sys/sysproto.h>
 #include <sys/device.h>
 #include <sys/cons.h>
+#include <sys/thread2.h>
 #include <sys/buf2.h>
 
 #include <machine/pcb.h>
@@ -128,15 +129,21 @@ watchdog_tickle_fn wdog_tickler = NULL;
 const char *panicstr;
 
 int dumping;                           /* system is dumping */
+#ifdef SMP
+u_int panic_cpu_interlock;             /* panic interlock */
+globaldata_t panic_cpu_gd;             /* which cpu took the panic */
+#endif
 
 static void boot (int) __dead2;
 static void dumpsys (void);
-static int setdumpdev (dev_t dev);
+static int setdumpdev (cdev_t dev);
 static void poweroff_wait (void *, int);
 static void print_uptime (void);
 static void shutdown_halt (void *junk, int howto);
 static void shutdown_panic (void *junk, int howto);
 static void shutdown_reset (void *junk, int howto);
+static int shutdown_busycount1(struct buf *bp, void *info);
+static int shutdown_busycount2(struct buf *bp, void *info);
 
 /* register various local shutdown events */
 static void 
@@ -156,7 +163,7 @@ SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
  * The system call that results in a reboot
  */
 int
-reboot(struct reboot_args *uap)
+sys_reboot(struct reboot_args *uap)
 {
        struct thread *td = curthread;
        int error;
@@ -180,7 +187,7 @@ shutdown_nice(int howto)
        
        /* Send a signal to init(8) and have it shutdown the world */
        if (initproc != NULL) {
-               psignal(initproc, SIGINT);
+               ksignal(initproc, SIGINT);
        } else {
                /* No init(8) running, so simply reboot */
                boot(RB_NOSYNC);
@@ -188,6 +195,7 @@ shutdown_nice(int howto)
        return;
 }
 static int     waittime = -1;
+static struct thread *dumpthread;
 static struct pcb dumppcb;
 
 static void
@@ -225,14 +233,29 @@ print_uptime()
 static void
 boot(int howto)
 {
+       /*
+        * Get rid of any user scheduler baggage and then give
+        * us a high priority.
+        */
+       if (curthread->td_release)
+               curthread->td_release(curthread);
+       lwkt_setpri_self(TDPRI_MAX);
 
        /* collect extra flags that shutdown_nice might have set */
        howto |= shutdown_howto;
 
 #ifdef SMP
+       /*
+        * We really want to shutdown on the BSP.  Subsystems such as ACPI
+        * can't power-down the box otherwise.
+        */
        if (smp_active_mask > 1) {
                printf("boot() called on cpu#%d\n", mycpu->gd_cpuid);
        }
+       if (panicstr == NULL && mycpu->gd_cpuid != 0) {
+               printf("Switching to cpu #0 for shutdown\n");
+               lwkt_setcpu_self(globaldata_find(0));
+       }
 #endif
        /*
         * Do any callouts that should be done BEFORE syncing the filesystems.
@@ -243,13 +266,12 @@ boot(int howto)
         * Now sync filesystems
         */
        if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
-               struct buf *bp;
                int iter, nbusy, pbusy;
 
                waittime = 0;
                printf("\nsyncing disks... ");
 
-               sync(NULL);     /* YYY was sync(&proc0, NULL). why proc0 ? */
+               sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
 
                /*
                 * With soft updates, some buffers that are
@@ -257,17 +279,7 @@ boot(int howto)
                 * buffers are written.
                 */
                for (iter = pbusy = 0; iter < 20; iter++) {
-                       nbusy = 0;
-                       for (bp = &buf[nbuf]; --bp >= buf; ) {
-                               if ((bp->b_flags & B_INVAL) == 0 &&
-                                   BUF_REFCNT(bp) > 0) {
-                                       nbusy++;
-                               } else if ((bp->b_flags & (B_DELWRI | B_INVAL))
-                                               == B_DELWRI) {
-                                       /* bawrite(bp);*/
-                                       nbusy++;
-                               }
-                       }
+                       nbusy = scan_all_buffers(shutdown_busycount1, NULL);
                        if (nbusy == 0)
                                break;
                        printf("%d ", nbusy);
@@ -281,32 +293,15 @@ boot(int howto)
                         */
                        if (iter > 5 && bioops.io_sync)
                                (*bioops.io_sync)(NULL);
-                       sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
-                       DELAY(50000 * iter);
+                       sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
+                       tsleep(boot, 0, "shutdn", hz * iter / 20 + 1);
                }
                printf("\n");
                /*
                 * Count only busy local buffers to prevent forcing 
                 * a fsck if we're just a client of a wedged NFS server
                 */
-               nbusy = 0;
-               for (bp = &buf[nbuf]; --bp >= buf; ) {
-                       if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) ||
-                           ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
-                               if (bp->b_dev == NODEV) {
-                                       mountlist_remove(bp->b_vp->v_mount);
-                                       continue;
-                               }
-                               nbusy++;
-#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
-                               printf(
-                           "%p %d: dev:%s, flags:%08lx, blkno:%ld, lblkno:%ld\n",
-                                   bp, nbusy, devtoname(bp->b_dev),
-                                   bp->b_flags, (long)bp->b_blkno,
-                                   (long)bp->b_lblkno);
-#endif
-                       }
-               }
+               nbusy = scan_all_buffers(shutdown_busycount2, NULL);
                if (nbusy) {
                        /*
                         * Failed to sync all blocks. Indicate this and don't
@@ -316,7 +311,7 @@ boot(int howto)
 #ifdef DDB
                        Debugger("busy buffer problem");
 #endif /* DDB */
-                       DELAY(5000000); /* 5 seconds */
+                       tsleep(boot, 0, "shutdn", hz * 5 + 1);
                } else {
                        printf("done\n");
                        /*
@@ -325,7 +320,7 @@ boot(int howto)
                        if (panicstr == 0)
                                vfs_unmountall();
                }
-               DELAY(100000);          /* wait for console output to finish */
+               tsleep(boot, 0, "shutdn", hz / 10 + 1);
        }
 
        print_uptime();
@@ -335,7 +330,7 @@ boot(int howto)
         * been completed.
         */
        EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
-       splhigh();
+       crit_enter();
        if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold)
                dumpsys();
 
@@ -346,6 +341,41 @@ boot(int howto)
        /* NOTREACHED */
 }
 
+static int
+shutdown_busycount1(struct buf *bp, void *info)
+{
+       if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0)
+               return(1);
+       if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)
+               return (1);
+       return (0);
+}
+
+static int
+shutdown_busycount2(struct buf *bp, void *info)
+{
+       if (((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp)) ||
+           ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
+               /*
+                * Only count buffers undergoing write I/O
+                * on the related vnode.
+                */
+               if (bp->b_vp == NULL || 
+                   bp->b_vp->v_track_write.bk_active == 0) {
+                       return (0);
+               }
+#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
+               printf(
+           "%p dev:?, flags:%08x, loffset:%lld, doffset:%lld\n",
+                   bp, 
+                   bp->b_flags, bp->b_loffset,
+                   bp->b_bio2.bio_offset);
+#endif
+               return(1);
+       }
+       return(0);
+}
+
 /*
  * If the shutdown was a clean halt, behave accordingly.
  */
@@ -431,12 +461,12 @@ SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
 
 static int
 setdumpdev(dev)
-       dev_t dev;
+       cdev_t dev;
 {
        int psize;
        long newdumplo;
 
-       if (dev == NODEV) {
+       if (dev == NOCDEV) {
                dumpdev = dev;
                return (0);
        }
@@ -462,17 +492,17 @@ dump_conf(dummy)
        void *dummy;
 {
        char *path;
-       dev_t dev;
+       cdev_t dev;
 
-       path = malloc(MNAMELEN, M_TEMP, M_WAITOK);
+       path = kmalloc(MNAMELEN, M_TEMP, M_WAITOK);
        if (TUNABLE_STR_FETCH("dumpdev", path, MNAMELEN) != 0) {
-               dev = getdiskbyname(path);
-               if (dev != NODEV)
+               dev = kgetdiskbyname(path);
+               if (dev != NOCDEV)
                        dumpdev = dev;
        }
-       free(path, M_TEMP);
+       kfree(path, M_TEMP);
        if (setdumpdev(dumpdev) != 0)
-               dumpdev = NODEV;
+               dumpdev = NOCDEV;
 }
 
 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
@@ -491,7 +521,7 @@ sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS)
 }
 
 SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
-       0, sizeof dumpdev, sysctl_kern_dumpdev, "T,dev_t", "");
+       0, sizeof dumpdev, sysctl_kern_dumpdev, "T,cdev_t", "");
 
 /*
  * Doadump comes here after turning off memory management and
@@ -504,13 +534,14 @@ dumpsys(void)
        int     error;
 
        savectx(&dumppcb);
+       dumpthread = curthread;
        if (dumping++) {
                printf("Dump already in progress, bailing...\n");
                return;
        }
        if (!dodump)
                return;
-       if (dumpdev == NODEV)
+       if (dumpdev == NOCDEV)
                return;
        dumpsize = Maxmem;
        printf("\ndumping to dev %s, offset %ld\n", devtoname(dumpdev), dumplo);
@@ -586,6 +617,40 @@ panic(const char *fmt, ...)
        __va_list ap;
        static char buf[256];
 
+#ifdef SMP
+       /*
+        * If a panic occurs on multiple cpus before the first is able to
+        * halt the other cpus, only one cpu is allowed to take the panic.
+        * Attempt to be verbose about this situation but if the printf() 
+        * itself panics don't let us overrun the kernel stack.
+        *
+        * Be very nasty about descheduling our thread at the lowest
+        * level possible in an attempt to freeze the thread without
+        * inducing further panics.
+        *
+        * Bumping gd_trap_nesting_level will also bypass assertions in
+        * lwkt_switch() and allow us to switch away even if we are a
+        * FAST interrupt or IPI.
+        */
+       if (atomic_poll_acquire_int(&panic_cpu_interlock)) {
+               panic_cpu_gd = mycpu;
+       } else if (panic_cpu_gd != mycpu) {
+               crit_enter();
+               ++mycpu->gd_trap_nesting_level;
+               if (mycpu->gd_trap_nesting_level < 25) {
+                       printf("SECONDARY PANIC ON CPU %d THREAD %p\n",
+                               mycpu->gd_cpuid, curthread);
+               }
+               curthread->td_release = NULL;   /* be a grinch */
+               for (;;) {
+                       lwkt_deschedule_self(curthread);
+                       lwkt_switch();
+               }
+               /* NOT REACHED */
+               /* --mycpu->gd_trap_nesting_level */
+               /* crit_exit() */
+       }
+#endif
        bootopt = RB_AUTOBOOT | RB_DUMP;
        if (sync_on_panic == 0)
                bootopt |= RB_NOSYNC;