2 * Copyright (c) 1986, 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $
40 * $DragonFly: src/sys/kern/kern_shutdown.c,v 1.62 2008/01/05 13:23:48 corecode Exp $
44 #include "opt_ddb_trace.h"
45 #include "opt_hw_wdog.h"
46 #include "opt_panic.h"
47 #include "opt_show_busybufs.h"
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/eventhandler.h>
53 #include <sys/diskslice.h>
54 #include <sys/reboot.h>
57 #include <sys/fcntl.h> /* FREAD */
58 #include <sys/stat.h> /* S_IFCHR */
59 #include <sys/vnode.h>
60 #include <sys/kernel.h>
61 #include <sys/kthread.h>
62 #include <sys/malloc.h>
63 #include <sys/mount.h>
64 #include <sys/queue.h>
65 #include <sys/sysctl.h>
66 #include <sys/vkernel.h>
68 #include <sys/sysproto.h>
69 #include <sys/device.h>
72 #include <sys/kern_syscall.h>
73 #include <vm/vm_map.h>
76 #include <sys/thread2.h>
79 #include <machine/pcb.h>
80 #include <machine/clock.h>
81 #include <machine/md_var.h>
82 #include <machine/smp.h> /* smp_active_mask, cpuid */
83 #include <machine/vmparam.h>
85 #include <sys/signalvar.h>
88 #include <dev/misc/gpio/gpio.h>
90 #ifndef PANIC_REBOOT_WAIT_TIME
91 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
95 * Note that stdarg.h and the ANSI style va_start macro is used for both
96 * ANSI and traditional C compilers. We use the machine version to stay
97 * within the confines of the kernel header files.
99 #include <machine/stdarg.h>
103 #ifdef DDB_UNATTENDED
104 int debugger_on_panic = 0;
106 int debugger_on_panic = 1;
108 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
109 &debugger_on_panic, 0, "Run debugger on kernel panic");
112 int trace_on_panic = 1;
114 int trace_on_panic = 0;
116 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
117 &trace_on_panic, 0, "Print stack trace on kernel panic");
120 static int sync_on_panic = 0;
121 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
122 &sync_on_panic, 0, "Do a sync before rebooting from a panic");
124 SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
128 * If there is a hardware watchdog, point this at the function needed to
130 * It's needed when the kernel needs to do some lengthy operations.
131 * e.g. in wd.c when dumping core.. It's most annoying to have
132 * your precious core-dump only half written because the wdog kicked in.
134 watchdog_tickle_fn wdog_tickler = NULL;
138 * Variable panicstr contains argument to first call to panic; used as flag
139 * to indicate that the kernel has already called panic.
141 const char *panicstr;
143 int dumping; /* system is dumping */
145 u_int panic_cpu_interlock; /* panic interlock */
146 globaldata_t panic_cpu_gd; /* which cpu took the panic */
149 int bootverbose = 0; /* note: assignment to force non-bss */
150 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW,
151 &bootverbose, 0, "Verbose kernel messages");
153 int cold = 1; /* note: assignment to force non-bss */
154 int dumplo; /* OBSOLETE - savecore compat */
157 static void boot (int) __dead2;
158 static void dumpsys (void);
159 static int setdumpdev (cdev_t dev);
160 static void poweroff_wait (void *, int);
161 static void print_uptime (void);
162 static void shutdown_halt (void *junk, int howto);
163 static void shutdown_panic (void *junk, int howto);
164 static void shutdown_reset (void *junk, int howto);
165 static int shutdown_busycount1(struct buf *bp, void *info);
166 static int shutdown_busycount2(struct buf *bp, void *info);
167 static void shutdown_cleanup_proc(struct proc *p);
169 /* register various local shutdown events */
171 shutdown_conf(void *unused)
173 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST);
174 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100);
175 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100);
176 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200);
179 SYSINIT(shutdown_conf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, shutdown_conf, NULL)
184 * The system call that results in a reboot
187 sys_reboot(struct reboot_args *uap)
189 struct thread *td = curthread;
192 if ((error = priv_check(td, PRIV_REBOOT)))
200 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC
202 static int shutdown_howto = 0;
205 shutdown_nice(int howto)
207 shutdown_howto = howto;
209 /* Send a signal to init(8) and have it shutdown the world */
210 if (initproc != NULL) {
211 ksignal(initproc, SIGINT);
213 /* No init(8) running, so simply reboot */
218 static int waittime = -1;
219 static struct thread *dumpthread;
220 static struct pcb dumppcb;
231 if (ts.tv_sec >= 86400) {
232 kprintf("%ldd", ts.tv_sec / 86400);
236 if (f || ts.tv_sec >= 3600) {
237 kprintf("%ldh", ts.tv_sec / 3600);
241 if (f || ts.tv_sec >= 60) {
242 kprintf("%ldm", ts.tv_sec / 60);
246 kprintf("%lds\n", ts.tv_sec);
250 * Go through the rigmarole of shutting down..
251 * this used to be in machdep.c but I'll be dammned if I could see
252 * anything machine dependant in it.
258 * Get rid of any user scheduler baggage and then give
259 * us a high priority.
261 if (curthread->td_release)
262 curthread->td_release(curthread);
263 lwkt_setpri_self(TDPRI_MAX);
265 /* collect extra flags that shutdown_nice might have set */
266 howto |= shutdown_howto;
270 * We really want to shutdown on the BSP. Subsystems such as ACPI
271 * can't power-down the box otherwise.
273 if (smp_active_mask > 1) {
274 kprintf("boot() called on cpu#%d\n", mycpu->gd_cpuid);
276 if (panicstr == NULL && mycpu->gd_cpuid != 0) {
277 kprintf("Switching to cpu #0 for shutdown\n");
278 lwkt_setcpu_self(globaldata_find(0));
282 * Do any callouts that should be done BEFORE syncing the filesystems.
284 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
287 * Try to get rid of any remaining FS references. The calling
288 * process, proc0, and init may still hold references. The
289 * VFS cache subsystem may still hold a root reference to root.
291 * XXX this needs work. We really need to SIGSTOP all remaining
292 * processes in order to avoid blowups due to proc0's filesystem
293 * references going away. For now just make sure that the init
294 * process is stopped.
296 if (panicstr == NULL) {
297 shutdown_cleanup_proc(curproc);
298 shutdown_cleanup_proc(&proc0);
300 if (initproc != curproc) {
301 ksignal(initproc, SIGSTOP);
302 tsleep(boot, 0, "shutdn", hz / 20);
304 shutdown_cleanup_proc(initproc);
306 vfs_cache_setroot(NULL, NULL);
310 * Now sync filesystems
312 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
313 int iter, nbusy, pbusy;
316 kprintf("\nsyncing disks... ");
318 sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
321 * With soft updates, some buffers that are
322 * written will be remarked as dirty until other
323 * buffers are written.
325 for (iter = pbusy = 0; iter < 20; iter++) {
326 nbusy = scan_all_buffers(shutdown_busycount1, NULL);
329 kprintf("%d ", nbusy);
335 * Process soft update work queue if buffers don't sync
336 * after 6 iterations by permitting the syncer to run.
341 sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
342 tsleep(boot, 0, "shutdn", hz * iter / 20 + 1);
346 * Count only busy local buffers to prevent forcing
347 * a fsck if we're just a client of a wedged NFS server
349 nbusy = scan_all_buffers(shutdown_busycount2, NULL);
352 * Failed to sync all blocks. Indicate this and don't
353 * unmount filesystems (thus forcing an fsck on reboot).
355 kprintf("giving up on %d buffers\n", nbusy);
357 Debugger("busy buffer problem");
359 tsleep(boot, 0, "shutdn", hz * 5 + 1);
363 * Unmount filesystems
365 if (panicstr == NULL)
368 tsleep(boot, 0, "shutdn", hz / 10 + 1);
374 * Dump before doing post_sync shutdown ops
377 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold)
381 * Ok, now do things that assume all filesystem activity has
382 * been completed. This will also call the device shutdown
385 EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
387 /* Now that we're going to really halt the system... */
388 EVENTHANDLER_INVOKE(shutdown_final, howto);
390 for(;;) ; /* safety against shutdown_reset not working */
395 shutdown_busycount1(struct buf *bp, void *info)
397 if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0)
399 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)
405 shutdown_busycount2(struct buf *bp, void *info)
407 if (((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp)) ||
408 ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
410 * Only count buffers undergoing write I/O
411 * on the related vnode.
413 if (bp->b_vp == NULL ||
414 bio_track_active(&bp->b_vp->v_track_write) == 0) {
417 #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
419 "%p dev:?, flags:%08x, loffset:%lld, doffset:%lld\n",
421 bp->b_flags, bp->b_loffset,
422 bp->b_bio2.bio_offset);
430 * If the shutdown was a clean halt, behave accordingly.
433 shutdown_halt(void *junk, int howto)
435 if (howto & RB_HALT) {
437 kprintf("The operating system has halted.\n");
438 #ifdef _KERNEL_VIRTUAL
441 kprintf("Please press any key to reboot.\n\n");
443 case -1: /* No console, just die */
455 * Check to see if the system paniced, pause and then reboot
456 * according to the specified delay.
459 shutdown_panic(void *junk, int howto)
463 if (howto & RB_DUMP) {
464 if (PANIC_REBOOT_WAIT_TIME != 0) {
465 if (PANIC_REBOOT_WAIT_TIME != -1) {
466 kprintf("Automatic reboot in %d seconds - "
467 "press a key on the console to abort\n",
468 PANIC_REBOOT_WAIT_TIME);
469 for (loop = PANIC_REBOOT_WAIT_TIME * 10;
471 DELAY(1000 * 100); /* 1/10th second */
472 /* Did user type a key? */
473 if (cncheckc() != -1)
479 } else { /* zero time specified - reboot NOW */
482 kprintf("--> Press a key on the console to reboot,\n");
483 kprintf("--> or switch off the system now.\n");
489 * Everything done, now reset
492 shutdown_reset(void *junk, int howto)
494 kprintf("Rebooting...\n");
495 DELAY(1000000); /* wait 1 sec for kprintf's to complete and be read */
496 /* cpu_boot(howto); */ /* doesn't do anything at the moment */
498 /* NOTREACHED */ /* assuming reset worked */
502 * Try to remove FS references in the specified process. This function
503 * is used during shutdown
507 shutdown_cleanup_proc(struct proc *p)
509 struct filedesc *fdp;
514 if ((fdp = p->p_fd) != NULL) {
517 cache_drop(&fdp->fd_ncdir);
522 cache_drop(&fdp->fd_nrdir);
527 cache_drop(&fdp->fd_njdir);
540 pmap_remove_pages(vmspace_pmap(vm),
542 VM_MAX_USER_ADDRESS);
543 vm_map_remove(&vm->vm_map,
545 VM_MAX_USER_ADDRESS);
550 * Magic number for savecore
552 * exported (symorder) and used at least by savecore(8)
554 * Mark it as used so that gcc doesn't optimize it away.
556 __attribute__((__used__))
557 static u_long const dumpmag = 0x8fca0101UL;
559 static int dumpsize = 0; /* also for savecore */
561 static int dodump = 1;
563 SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
564 "Try to perform coredump on kernel panic");
567 setdumpdev(cdev_t dev)
569 struct partinfo pinfo;
578 bzero(&pinfo, sizeof(pinfo));
581 * We have to open the device before we can perform ioctls on it,
582 * or the slice/label data may not be present. Device opens are
583 * usually tracked by specfs, but the dump device can be set in
584 * early boot and may not be open so this is somewhat of a hack.
586 doopen = (dev->si_sysref.refcnt == 1);
588 error = dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred);
592 error = dev_dioctl(dev, DIOCGPART, (void *)&pinfo, 0,
593 proc0.p_ucred, NULL);
595 dev_dclose(dev, FREAD, S_IFCHR);
596 if (error || pinfo.media_blocks == 0 || pinfo.media_blksize == 0)
599 newdumplo = pinfo.media_blocks -
600 ((u_int64_t)Maxmem * PAGE_SIZE / DEV_BSIZE);
601 if ((int64_t)newdumplo < (int64_t)pinfo.reserved_blocks)
604 dumplo64 = newdumplo;
610 static void dump_conf (void *dummy);
612 dump_conf(void *dummy)
617 path = kmalloc(MNAMELEN, M_TEMP, M_WAITOK);
618 if (TUNABLE_STR_FETCH("dumpdev", path, MNAMELEN) != 0) {
619 dev = kgetdiskbyname(path);
624 if (setdumpdev(dumpdev) != 0)
628 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
631 sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS)
636 ndumpdev = dev2udev(dumpdev);
637 error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req);
638 if (error == 0 && req->newptr != NULL)
639 error = setdumpdev(udev2dev(ndumpdev, 0));
643 SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
644 0, sizeof dumpdev, sysctl_kern_dumpdev, "T,udev_t", "");
647 * Doadump comes here after turning off memory management and
648 * getting on the dump stack, either when called above, or by
649 * the auto-restart code.
657 dumpthread = curthread;
659 kprintf("Dump already in progress, bailing...\n");
667 kprintf("\ndumping to dev %s, blockno %lld\n",
669 (long long)dumplo64);
671 error = dev_ddump(dumpdev);
673 kprintf("succeeded\n");
676 kprintf("failed, reason: ");
680 kprintf("device doesn't support a dump routine\n");
684 kprintf("device bad\n");
688 kprintf("device not ready\n");
692 kprintf("area improper\n");
696 kprintf("i/o error\n");
700 kprintf("aborted from console\n");
704 kprintf("unknown, error = %d\n", error);
710 dumpstatus(vm_offset_t addr, off_t count)
714 if (addr % (1024 * 1024) == 0) {
719 kprintf("%ld ", (long)(count / (1024 * 1024)));
722 if ((c = cncheckc()) == 0x03)
725 kprintf("[CTRL-C to abort] ");
731 * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
732 * and then reboots. If we are called twice, then we avoid trying to sync
733 * the disks as this often leads to recursive panics.
736 panic(const char *fmt, ...)
738 int bootopt, newpanic;
740 static char buf[256];
744 * If a panic occurs on multiple cpus before the first is able to
745 * halt the other cpus, only one cpu is allowed to take the panic.
746 * Attempt to be verbose about this situation but if the kprintf()
747 * itself panics don't let us overrun the kernel stack.
749 * Be very nasty about descheduling our thread at the lowest
750 * level possible in an attempt to freeze the thread without
751 * inducing further panics.
753 * Bumping gd_trap_nesting_level will also bypass assertions in
754 * lwkt_switch() and allow us to switch away even if we are a
755 * FAST interrupt or IPI.
757 if (atomic_poll_acquire_int(&panic_cpu_interlock)) {
758 panic_cpu_gd = mycpu;
759 } else if (panic_cpu_gd != mycpu) {
761 ++mycpu->gd_trap_nesting_level;
762 if (mycpu->gd_trap_nesting_level < 25) {
763 kprintf("SECONDARY PANIC ON CPU %d THREAD %p\n",
764 mycpu->gd_cpuid, curthread);
766 curthread->td_release = NULL; /* be a grinch */
768 lwkt_deschedule_self(curthread);
772 /* --mycpu->gd_trap_nesting_level */
776 bootopt = RB_AUTOBOOT | RB_DUMP;
777 if (sync_on_panic == 0)
778 bootopt |= RB_NOSYNC;
781 bootopt |= RB_NOSYNC;
788 kvsnprintf(buf, sizeof(buf), fmt, ap);
792 kprintf("panic: %s\n", buf);
794 /* two separate prints in case of an unmapped page and trap */
795 kprintf("mp_lock = %08x; ", mp_lock);
796 kprintf("cpuid = %d\n", mycpu->gd_cpuid);
799 #if (NGPIO > 0) && defined(ERROR_LED_ON_PANIC)
800 led_switch("error", 1);
803 #if defined(WDOG_DISABLE_ON_PANIC) && defined(WATCHDOG_ENABLE)
808 if (newpanic && trace_on_panic)
810 if (debugger_on_panic)
817 * Support for poweroff delay.
819 #ifndef POWEROFF_DELAY
820 # define POWEROFF_DELAY 5000
822 static int poweroff_delay = POWEROFF_DELAY;
824 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
825 &poweroff_delay, 0, "");
828 poweroff_wait(void *junk, int howto)
830 if(!(howto & RB_POWEROFF) || poweroff_delay <= 0)
832 DELAY(poweroff_delay * 1000);
836 * Some system processes (e.g. syncer) need to be stopped at appropriate
837 * points in their main loops prior to a system shutdown, so that they
838 * won't interfere with the shutdown process (e.g. by holding a disk buf
839 * to cause sync to fail). For each of these system processes, register
840 * shutdown_kproc() as a handler for one of shutdown events.
842 static int kproc_shutdown_wait = 60;
843 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
844 &kproc_shutdown_wait, 0, "");
847 shutdown_kproc(void *arg, int howto)
856 td = (struct thread *)arg;
857 if ((p = td->td_proc) != NULL) {
858 kprintf("Waiting (max %d seconds) for system process `%s' to stop...",
859 kproc_shutdown_wait, p->p_comm);
861 kprintf("Waiting (max %d seconds) for system thread %s to stop...",
862 kproc_shutdown_wait, td->td_comm);
864 error = suspend_kproc(td, kproc_shutdown_wait * hz);
866 if (error == EWOULDBLOCK)
867 kprintf("timed out\n");
869 kprintf("stopped\n");