kernel: Make SMP support default (and non-optional).
[dragonfly.git] / sys / kern / kern_shutdown.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1986, 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $
40 */
41
42#include "opt_ddb.h"
b1e04573 43#include "opt_ddb_trace.h"
984263bc
MD
44#include "opt_panic.h"
45#include "opt_show_busybufs.h"
dc2ee94e 46#include "use_gpio.h"
984263bc
MD
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/eventhandler.h>
51#include <sys/buf.h>
b24cd69c 52#include <sys/disk.h>
a6c0f342 53#include <sys/diskslice.h>
984263bc
MD
54#include <sys/reboot.h>
55#include <sys/proc.h>
895c1f85 56#include <sys/priv.h>
faa575e7
MD
57#include <sys/fcntl.h> /* FREAD */
58#include <sys/stat.h> /* S_IFCHR */
984263bc
MD
59#include <sys/vnode.h>
60#include <sys/kernel.h>
b24cd69c 61#include <sys/kerneldump.h>
984263bc
MD
62#include <sys/kthread.h>
63#include <sys/malloc.h>
64#include <sys/mount.h>
65#include <sys/queue.h>
66#include <sys/sysctl.h>
f6a22062 67#include <sys/vkernel.h>
984263bc
MD
68#include <sys/conf.h>
69#include <sys/sysproto.h>
335dda38 70#include <sys/device.h>
984263bc 71#include <sys/cons.h>
f6a22062
MD
72#include <sys/shm.h>
73#include <sys/kern_syscall.h>
74#include <vm/vm_map.h>
75#include <vm/pmap.h>
76
9ec81f83 77#include <sys/thread2.h>
3020e3be 78#include <sys/buf2.h>
684a93c4 79#include <sys/mplock2.h>
984263bc 80
5ea440eb 81#include <machine/cpu.h>
984263bc
MD
82#include <machine/clock.h>
83#include <machine/md_var.h>
0f7a3396 84#include <machine/smp.h> /* smp_active_mask, cpuid */
c439ad8f 85#include <machine/vmparam.h>
b24cd69c 86#include <machine/thread.h>
984263bc
MD
87
88#include <sys/signalvar.h>
89
3db39609
AH
90#include <sys/wdog.h>
91#include <dev/misc/gpio/gpio.h>
92
984263bc
MD
93#ifndef PANIC_REBOOT_WAIT_TIME
94#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
95#endif
96
97/*
98 * Note that stdarg.h and the ANSI style va_start macro is used for both
e2565a42
MD
99 * ANSI and traditional C compilers. We use the machine version to stay
100 * within the confines of the kernel header files.
984263bc
MD
101 */
102#include <machine/stdarg.h>
103
104#ifdef DDB
1e5fb84b 105#include <ddb/ddb.h>
984263bc
MD
106#ifdef DDB_UNATTENDED
107int debugger_on_panic = 0;
108#else
109int debugger_on_panic = 1;
110#endif
111SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
112 &debugger_on_panic, 0, "Run debugger on kernel panic");
b1e04573 113
b1e04573
JH
114#ifdef DDB_TRACE
115int trace_on_panic = 1;
116#else
117int trace_on_panic = 0;
118#endif
119SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
120 &trace_on_panic, 0, "Print stack trace on kernel panic");
984263bc
MD
121#endif
122
fd43fc00 123static int sync_on_panic = 0;
baf88b6b
MD
124SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
125 &sync_on_panic, 0, "Do a sync before rebooting from a panic");
126
984263bc
MD
127SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
128
984263bc
MD
129/*
130 * Variable panicstr contains argument to first call to panic; used as flag
131 * to indicate that the kernel has already called panic.
132 */
133const char *panicstr;
134
135int dumping; /* system is dumping */
b24cd69c
AH
136static struct dumperinfo dumper; /* selected dumper */
137
ddcc5e10 138globaldata_t panic_cpu_gd; /* which cpu took the panic */
3933a3ab
MD
139struct lwkt_tokref panic_tokens[LWKT_MAXTOKENS];
140int panic_tokens_count;
984263bc 141
a8a86718 142int bootverbose = 0; /* note: assignment to force non-bss */
ffe71962
SS
143SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW,
144 &bootverbose, 0, "Verbose kernel messages");
145
a8a86718 146int cold = 1; /* note: assignment to force non-bss */
e0fc5693
MD
147int dumplo; /* OBSOLETE - savecore compat */
148u_int64_t dumplo64;
a8a86718 149
402ed7e1 150static void boot (int) __dead2;
b13267a5 151static int setdumpdev (cdev_t dev);
402ed7e1
RG
152static void poweroff_wait (void *, int);
153static void print_uptime (void);
154static void shutdown_halt (void *junk, int howto);
155static void shutdown_panic (void *junk, int howto);
156static void shutdown_reset (void *junk, int howto);
41cbf46c
MD
157static int shutdown_busycount1(struct buf *bp, void *info);
158static int shutdown_busycount2(struct buf *bp, void *info);
f6a22062 159static void shutdown_cleanup_proc(struct proc *p);
984263bc
MD
160
161/* register various local shutdown events */
162static void
163shutdown_conf(void *unused)
164{
165 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST);
166 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100);
167 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100);
168 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200);
169}
170
ba39e2e0 171SYSINIT(shutdown_conf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, shutdown_conf, NULL)
984263bc
MD
172
173/* ARGSUSED */
174
175/*
176 * The system call that results in a reboot
3919ced0
MD
177 *
178 * MPALMOSTSAFE
984263bc
MD
179 */
180int
753fd850 181sys_reboot(struct reboot_args *uap)
984263bc 182{
dadab5e9 183 struct thread *td = curthread;
984263bc
MD
184 int error;
185
3b1d99e9 186 if ((error = priv_check(td, PRIV_REBOOT)))
984263bc
MD
187 return (error);
188
3919ced0 189 get_mplock();
984263bc 190 boot(uap->opt);
3919ced0 191 rel_mplock();
984263bc
MD
192 return (0);
193}
194
195/*
196 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC
197 */
198static int shutdown_howto = 0;
199
200void
201shutdown_nice(int howto)
202{
203 shutdown_howto = howto;
204
205 /* Send a signal to init(8) and have it shutdown the world */
206 if (initproc != NULL) {
84204577 207 ksignal(initproc, SIGINT);
984263bc
MD
208 } else {
209 /* No init(8) running, so simply reboot */
210 boot(RB_NOSYNC);
211 }
212 return;
213}
214static int waittime = -1;
b24cd69c
AH
215struct pcb dumppcb;
216struct thread *dumpthread;
984263bc
MD
217
218static void
c972a82f 219print_uptime(void)
984263bc
MD
220{
221 int f;
222 struct timespec ts;
223
224 getnanouptime(&ts);
6ea70f76 225 kprintf("Uptime: ");
984263bc
MD
226 f = 0;
227 if (ts.tv_sec >= 86400) {
6ea70f76 228 kprintf("%ldd", ts.tv_sec / 86400);
984263bc
MD
229 ts.tv_sec %= 86400;
230 f = 1;
231 }
232 if (f || ts.tv_sec >= 3600) {
6ea70f76 233 kprintf("%ldh", ts.tv_sec / 3600);
984263bc
MD
234 ts.tv_sec %= 3600;
235 f = 1;
236 }
237 if (f || ts.tv_sec >= 60) {
6ea70f76 238 kprintf("%ldm", ts.tv_sec / 60);
984263bc
MD
239 ts.tv_sec %= 60;
240 f = 1;
241 }
6ea70f76 242 kprintf("%lds\n", ts.tv_sec);
984263bc
MD
243}
244
245/*
246 * Go through the rigmarole of shutting down..
247 * this used to be in machdep.c but I'll be dammned if I could see
248 * anything machine dependant in it.
249 */
250static void
41c20dac 251boot(int howto)
984263bc 252{
3ad8cc28
MD
253 /*
254 * Get rid of any user scheduler baggage and then give
255 * us a high priority.
256 */
257 if (curthread->td_release)
258 curthread->td_release(curthread);
259 lwkt_setpri_self(TDPRI_MAX);
984263bc
MD
260
261 /* collect extra flags that shutdown_nice might have set */
262 howto |= shutdown_howto;
263
7710b6e4
MD
264 /*
265 * We really want to shutdown on the BSP. Subsystems such as ACPI
266 * can't power-down the box otherwise.
267 */
0f7a3396 268 if (smp_active_mask > 1) {
6ea70f76 269 kprintf("boot() called on cpu#%d\n", mycpu->gd_cpuid);
984263bc 270 }
7710b6e4 271 if (panicstr == NULL && mycpu->gd_cpuid != 0) {
6ea70f76 272 kprintf("Switching to cpu #0 for shutdown\n");
7710b6e4
MD
273 lwkt_setcpu_self(globaldata_find(0));
274 }
984263bc
MD
275 /*
276 * Do any callouts that should be done BEFORE syncing the filesystems.
277 */
278 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
279
f6a22062
MD
280 /*
281 * Try to get rid of any remaining FS references. The calling
282 * process, proc0, and init may still hold references. The
283 * VFS cache subsystem may still hold a root reference to root.
ee92d6d6
MD
284 *
285 * XXX this needs work. We really need to SIGSTOP all remaining
286 * processes in order to avoid blowups due to proc0's filesystem
287 * references going away. For now just make sure that the init
288 * process is stopped.
f6a22062
MD
289 */
290 if (panicstr == NULL) {
291 shutdown_cleanup_proc(curproc);
292 shutdown_cleanup_proc(&proc0);
ee92d6d6
MD
293 if (initproc) {
294 if (initproc != curproc) {
295 ksignal(initproc, SIGSTOP);
296 tsleep(boot, 0, "shutdn", hz / 20);
297 }
f6a22062 298 shutdown_cleanup_proc(initproc);
ee92d6d6 299 }
f6a22062
MD
300 vfs_cache_setroot(NULL, NULL);
301 }
302
984263bc
MD
303 /*
304 * Now sync filesystems
305 */
306 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
984263bc
MD
307 int iter, nbusy, pbusy;
308
309 waittime = 0;
6ea70f76 310 kprintf("\nsyncing disks... ");
984263bc 311
753fd850 312 sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
984263bc
MD
313
314 /*
315 * With soft updates, some buffers that are
316 * written will be remarked as dirty until other
317 * buffers are written.
318 */
319 for (iter = pbusy = 0; iter < 20; iter++) {
41cbf46c 320 nbusy = scan_all_buffers(shutdown_busycount1, NULL);
984263bc
MD
321 if (nbusy == 0)
322 break;
6ea70f76 323 kprintf("%d ", nbusy);
984263bc
MD
324 if (nbusy < pbusy)
325 iter = 0;
326 pbusy = nbusy;
e200c40d
DR
327 /*
328 * XXX:
329 * Process soft update work queue if buffers don't sync
330 * after 6 iterations by permitting the syncer to run.
331 */
408357d8
MD
332 if (iter > 5)
333 bio_ops_sync(NULL);
334
753fd850 335 sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
3ad8cc28 336 tsleep(boot, 0, "shutdn", hz * iter / 20 + 1);
984263bc 337 }
6ea70f76 338 kprintf("\n");
984263bc
MD
339 /*
340 * Count only busy local buffers to prevent forcing
341 * a fsck if we're just a client of a wedged NFS server
342 */
41cbf46c 343 nbusy = scan_all_buffers(shutdown_busycount2, NULL);
984263bc
MD
344 if (nbusy) {
345 /*
346 * Failed to sync all blocks. Indicate this and don't
347 * unmount filesystems (thus forcing an fsck on reboot).
348 */
6ea70f76 349 kprintf("giving up on %d buffers\n", nbusy);
a88429e8 350#ifdef DDB
4ac83a78
MD
351 if (debugger_on_panic)
352 Debugger("busy buffer problem");
a88429e8 353#endif /* DDB */
3ad8cc28 354 tsleep(boot, 0, "shutdn", hz * 5 + 1);
984263bc 355 } else {
6ea70f76 356 kprintf("done\n");
984263bc
MD
357 /*
358 * Unmount filesystems
359 */
f6a22062 360 if (panicstr == NULL)
984263bc
MD
361 vfs_unmountall();
362 }
3ad8cc28 363 tsleep(boot, 0, "shutdn", hz / 10 + 1);
984263bc
MD
364 }
365
366 print_uptime();
367
368 /*
87e2fa7e 369 * Dump before doing post_sync shutdown ops
984263bc 370 */
9ec81f83 371 crit_enter();
505230a8
AH
372 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) {
373 dumpsys();
b24cd69c 374 }
984263bc 375
87e2fa7e
MD
376 /*
377 * Ok, now do things that assume all filesystem activity has
378 * been completed. This will also call the device shutdown
379 * methods.
380 */
381 EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
382
984263bc
MD
383 /* Now that we're going to really halt the system... */
384 EVENTHANDLER_INVOKE(shutdown_final, howto);
385
386 for(;;) ; /* safety against shutdown_reset not working */
387 /* NOTREACHED */
388}
389
4ac83a78
MD
390/*
391 * Pass 1 - Figure out if there are any busy or dirty buffers still present.
392 *
393 * We ignore TMPFS mounts in this pass.
394 */
41cbf46c
MD
395static int
396shutdown_busycount1(struct buf *bp, void *info)
397{
4ac83a78
MD
398 struct vnode *vp;
399
400 if ((vp = bp->b_vp) != NULL && vp->v_tag == VT_TMPFS)
401 return (0);
41cbf46c
MD
402 if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0)
403 return(1);
404 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)
405 return (1);
406 return (0);
407}
408
4ac83a78
MD
409/*
410 * Pass 2 - only run after pass 1 has completed or has given up
411 *
412 * We ignore TMPFS, NFS, MFS, and SMBFS mounts in this pass.
413 */
41cbf46c
MD
414static int
415shutdown_busycount2(struct buf *bp, void *info)
416{
4ac83a78
MD
417 struct vnode *vp;
418
419 /*
420 * Ignore tmpfs and nfs mounts
421 */
422 if ((vp = bp->b_vp) != NULL) {
423 if (vp->v_tag == VT_TMPFS)
424 return (0);
425 if (vp->v_tag == VT_NFS)
426 return (0);
427 if (vp->v_tag == VT_MFS)
428 return (0);
429 if (vp->v_tag == VT_SMBFS)
430 return (0);
431 }
432
433 /*
434 * Only count buffers stuck on I/O, ignore everything else
435 */
41cbf46c
MD
436 if (((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp)) ||
437 ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
438 /*
439 * Only count buffers undergoing write I/O
440 * on the related vnode.
441 */
442 if (bp->b_vp == NULL ||
a9a20f98 443 bio_track_active(&bp->b_vp->v_track_write) == 0) {
41cbf46c
MD
444 return (0);
445 }
446#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
6ea70f76 447 kprintf(
0e6594a8 448 "%p dev:?, flags:%08x, loffset:%jd, doffset:%jd\n",
2dc6c129 449 bp,
0e6594a8
SW
450 bp->b_flags, (intmax_t)bp->b_loffset,
451 (intmax_t)bp->b_bio2.bio_offset);
41cbf46c
MD
452#endif
453 return(1);
454 }
455 return(0);
456}
457
984263bc
MD
458/*
459 * If the shutdown was a clean halt, behave accordingly.
460 */
461static void
462shutdown_halt(void *junk, int howto)
463{
464 if (howto & RB_HALT) {
6ea70f76
SW
465 kprintf("\n");
466 kprintf("The operating system has halted.\n");
40d0276c
MD
467#ifdef _KERNEL_VIRTUAL
468 cpu_halt();
469#else
6ea70f76 470 kprintf("Please press any key to reboot.\n\n");
984263bc
MD
471 switch (cngetc()) {
472 case -1: /* No console, just die */
473 cpu_halt();
474 /* NOTREACHED */
475 default:
476 howto &= ~RB_HALT;
477 break;
478 }
40d0276c 479#endif
984263bc
MD
480 }
481}
482
483/*
484 * Check to see if the system paniced, pause and then reboot
485 * according to the specified delay.
486 */
487static void
488shutdown_panic(void *junk, int howto)
489{
490 int loop;
491
492 if (howto & RB_DUMP) {
493 if (PANIC_REBOOT_WAIT_TIME != 0) {
494 if (PANIC_REBOOT_WAIT_TIME != -1) {
6ea70f76 495 kprintf("Automatic reboot in %d seconds - "
984263bc
MD
496 "press a key on the console to abort\n",
497 PANIC_REBOOT_WAIT_TIME);
498 for (loop = PANIC_REBOOT_WAIT_TIME * 10;
499 loop > 0; --loop) {
500 DELAY(1000 * 100); /* 1/10th second */
501 /* Did user type a key? */
502 if (cncheckc() != -1)
503 break;
504 }
505 if (!loop)
506 return;
507 }
508 } else { /* zero time specified - reboot NOW */
509 return;
510 }
6ea70f76
SW
511 kprintf("--> Press a key on the console to reboot,\n");
512 kprintf("--> or switch off the system now.\n");
984263bc
MD
513 cngetc();
514 }
515}
516
517/*
518 * Everything done, now reset
519 */
520static void
521shutdown_reset(void *junk, int howto)
522{
6ea70f76
SW
523 kprintf("Rebooting...\n");
524 DELAY(1000000); /* wait 1 sec for kprintf's to complete and be read */
984263bc
MD
525 /* cpu_boot(howto); */ /* doesn't do anything at the moment */
526 cpu_reset();
527 /* NOTREACHED */ /* assuming reset worked */
528}
529
f6a22062
MD
530/*
531 * Try to remove FS references in the specified process. This function
532 * is used during shutdown
533 */
534static
535void
536shutdown_cleanup_proc(struct proc *p)
537{
538 struct filedesc *fdp;
539 struct vmspace *vm;
540
541 if (p == NULL)
542 return;
543 if ((fdp = p->p_fd) != NULL) {
544 kern_closefrom(0);
545 if (fdp->fd_cdir) {
28623bf9 546 cache_drop(&fdp->fd_ncdir);
f6a22062
MD
547 vrele(fdp->fd_cdir);
548 fdp->fd_cdir = NULL;
f6a22062
MD
549 }
550 if (fdp->fd_rdir) {
28623bf9 551 cache_drop(&fdp->fd_nrdir);
f6a22062
MD
552 vrele(fdp->fd_rdir);
553 fdp->fd_rdir = NULL;
f6a22062
MD
554 }
555 if (fdp->fd_jdir) {
28623bf9 556 cache_drop(&fdp->fd_njdir);
f6a22062
MD
557 vrele(fdp->fd_jdir);
558 fdp->fd_jdir = NULL;
f6a22062
MD
559 }
560 }
4a22e893
MD
561 if (p->p_vkernel)
562 vkernel_exit(p);
f6a22062
MD
563 if (p->p_textvp) {
564 vrele(p->p_textvp);
565 p->p_textvp = NULL;
566 }
567 vm = p->p_vmspace;
568 if (vm != NULL) {
569 pmap_remove_pages(vmspace_pmap(vm),
88181b08
MD
570 VM_MIN_USER_ADDRESS,
571 VM_MAX_USER_ADDRESS);
f6a22062 572 vm_map_remove(&vm->vm_map,
88181b08
MD
573 VM_MIN_USER_ADDRESS,
574 VM_MAX_USER_ADDRESS);
f6a22062
MD
575 }
576}
577
984263bc
MD
578/*
579 * Magic number for savecore
580 *
581 * exported (symorder) and used at least by savecore(8)
582 *
04a77147 583 * Mark it as used so that gcc doesn't optimize it away.
984263bc 584 */
04a77147 585__attribute__((__used__))
b24cd69c 586 static u_long const dumpmag = 0x8fca0101UL;
984263bc 587
b24cd69c
AH
588__attribute__((__used__))
589 static int dumpsize = 0; /* also for savecore */
984263bc
MD
590
591static int dodump = 1;
592
593SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
594 "Try to perform coredump on kernel panic");
595
b24cd69c
AH
596void
597mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver,
598 uint64_t dumplen, uint32_t blksz)
599{
600 bzero(kdh, sizeof(*kdh));
601 strncpy(kdh->magic, magic, sizeof(kdh->magic));
602 strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
603 kdh->version = htod32(KERNELDUMPVERSION);
604 kdh->architectureversion = htod32(archver);
605 kdh->dumplength = htod64(dumplen);
606 kdh->dumptime = htod64(time_second);
607 kdh->blocksize = htod32(blksz);
608 strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
609 strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
610 if (panicstr != NULL)
611 strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
612 kdh->parity = kerneldump_parity(kdh);
613}
614
984263bc 615static int
c972a82f 616setdumpdev(cdev_t dev)
984263bc 617{
a6c0f342 618 int error;
faa575e7 619 int doopen;
984263bc 620
028066b1 621 if (dev == NULL) {
b24cd69c 622 disk_dumpconf(NULL, 0/*off*/);
984263bc
MD
623 return (0);
624 }
faa575e7
MD
625
626 /*
627 * We have to open the device before we can perform ioctls on it,
628 * or the slice/label data may not be present. Device opens are
629 * usually tracked by specfs, but the dump device can be set in
630 * early boot and may not be open so this is somewhat of a hack.
631 */
632 doopen = (dev->si_sysref.refcnt == 1);
633 if (doopen) {
634 error = dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred);
635 if (error)
636 return (error);
637 }
b24cd69c 638 error = disk_dumpconf(dev, 1/*on*/);
984263bc 639
b24cd69c
AH
640 return error;
641}
984263bc
MD
642
643/* ARGSUSED */
402ed7e1 644static void dump_conf (void *dummy);
984263bc 645static void
c972a82f 646dump_conf(void *dummy)
984263bc
MD
647{
648 char *path;
b13267a5 649 cdev_t dev;
c7c136fe 650 int _dummy;
984263bc 651
efda3bd0 652 path = kmalloc(MNAMELEN, M_TEMP, M_WAITOK);
984263bc 653 if (TUNABLE_STR_FETCH("dumpdev", path, MNAMELEN) != 0) {
c7c136fe
AH
654 /*
655 * Make sure all disk devices created so far have also been
656 * probed, and also make sure that the newly created device
657 * nodes for probed disks are ready, too.
658 *
659 * XXX - Delay an additional 2 seconds to help drivers which
660 * pickup devices asynchronously and are not caught by
661 * CAM's initial probe.
662 */
cc152643 663 sync_devs();
c7c136fe
AH
664 tsleep(&_dummy, 0, "syncer", hz*2);
665
bc01a404 666 dev = kgetdiskbyname(path);
028066b1 667 if (dev != NULL)
984263bc
MD
668 dumpdev = dev;
669 }
efda3bd0 670 kfree(path, M_TEMP);
984263bc 671 if (setdumpdev(dumpdev) != 0)
028066b1 672 dumpdev = NULL;
984263bc
MD
673}
674
675SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
676
677static int
678sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS)
679{
680 int error;
681 udev_t ndumpdev;
682
683 ndumpdev = dev2udev(dumpdev);
684 error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req);
685 if (error == 0 && req->newptr != NULL)
686 error = setdumpdev(udev2dev(ndumpdev, 0));
687 return (error);
688}
689
690SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
936c90c4 691 0, sizeof dumpdev, sysctl_kern_dumpdev, "T,udev_t", "");
984263bc 692
984263bc
MD
693/*
694 * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
695 * and then reboots. If we are called twice, then we avoid trying to sync
696 * the disks as this often leads to recursive panics.
697 */
698void
699panic(const char *fmt, ...)
700{
b1e04573 701 int bootopt, newpanic;
5fddbda2 702 globaldata_t gd = mycpu;
3933a3ab 703 thread_t td = gd->gd_curthread;
e2565a42 704 __va_list ap;
984263bc
MD
705 static char buf[256];
706
ddcc5e10
MD
707 /*
708 * If a panic occurs on multiple cpus before the first is able to
709 * halt the other cpus, only one cpu is allowed to take the panic.
6ea70f76 710 * Attempt to be verbose about this situation but if the kprintf()
ddcc5e10
MD
711 * itself panics don't let us overrun the kernel stack.
712 *
713 * Be very nasty about descheduling our thread at the lowest
714 * level possible in an attempt to freeze the thread without
715 * inducing further panics.
716 *
717 * Bumping gd_trap_nesting_level will also bypass assertions in
718 * lwkt_switch() and allow us to switch away even if we are a
719 * FAST interrupt or IPI.
5fddbda2
MD
720 *
721 * The setting of panic_cpu_gd also determines how kprintf()
722 * spin-locks itself. DDB can set panic_cpu_gd as well.
ddcc5e10 723 */
5fddbda2
MD
724 for (;;) {
725 globaldata_t xgd = panic_cpu_gd;
726
727 /*
728 * Someone else got the panic cpu
729 */
730 if (xgd && xgd != gd) {
731 crit_enter();
732 ++mycpu->gd_trap_nesting_level;
733 if (mycpu->gd_trap_nesting_level < 25) {
734 kprintf("SECONDARY PANIC ON CPU %d THREAD %p\n",
3933a3ab 735 mycpu->gd_cpuid, td);
5fddbda2 736 }
3933a3ab 737 td->td_release = NULL; /* be a grinch */
5fddbda2 738 for (;;) {
3933a3ab 739 lwkt_deschedule_self(td);
5fddbda2
MD
740 lwkt_switch();
741 }
742 /* NOT REACHED */
743 /* --mycpu->gd_trap_nesting_level */
744 /* crit_exit() */
ddcc5e10 745 }
5fddbda2
MD
746
747 /*
748 * Reentrant panic
749 */
750 if (xgd && xgd == gd)
751 break;
752
753 /*
754 * We got it
755 */
756 if (atomic_cmpset_ptr(&panic_cpu_gd, NULL, gd))
757 break;
ddcc5e10 758 }
3933a3ab
MD
759 /*
760 * Try to get the system into a working state. Save information
761 * we are about to destroy.
762 */
55b6aa35 763 kvcreinitspin();
3933a3ab
MD
764 if (panicstr == NULL) {
765 bcopy(td->td_toks_array, panic_tokens, sizeof(panic_tokens));
766 panic_tokens_count = td->td_toks_stop - &td->td_toks_base;
767 }
768 lwkt_relalltokens(td);
769 td->td_toks_stop = &td->td_toks_base;
770
5fddbda2
MD
771 /*
772 * Setup
773 */
984263bc 774 bootopt = RB_AUTOBOOT | RB_DUMP;
baf88b6b
MD
775 if (sync_on_panic == 0)
776 bootopt |= RB_NOSYNC;
b1e04573 777 newpanic = 0;
5fddbda2 778 if (panicstr) {
984263bc 779 bootopt |= RB_NOSYNC;
5fddbda2 780 } else {
984263bc 781 panicstr = fmt;
b1e04573
JH
782 newpanic = 1;
783 }
984263bc 784
5fddbda2
MD
785 /*
786 * Format the panic string.
787 */
e2565a42 788 __va_start(ap, fmt);
379210cb 789 kvsnprintf(buf, sizeof(buf), fmt, ap);
984263bc
MD
790 if (panicstr == fmt)
791 panicstr = buf;
e2565a42 792 __va_end(ap);
6ea70f76 793 kprintf("panic: %s\n", buf);
6a8aa90e 794 /* two separate prints in case of an unmapped page and trap */
d1a8af1f 795 kprintf("cpuid = %d\n", mycpu->gd_cpuid);
984263bc 796
3db39609
AH
797#if (NGPIO > 0) && defined(ERROR_LED_ON_PANIC)
798 led_switch("error", 1);
799#endif
800
cf03d6cd 801#if defined(WDOG_DISABLE_ON_PANIC)
3db39609
AH
802 wdog_disable();
803#endif
804
3735beb9
MD
805 /*
806 * Enter the debugger or fall through & dump. Entering the
807 * debugger will stop cpus. If not entering the debugger stop
808 * cpus here.
809 */
984263bc 810#if defined(DDB)
b1e04573 811 if (newpanic && trace_on_panic)
7ce2998e 812 print_backtrace(-1);
984263bc 813 if (debugger_on_panic)
3cc1a5a9 814 Debugger("panic");
3735beb9 815 else
984263bc 816#endif
3735beb9
MD
817 if (newpanic)
818 stop_cpus(mycpu->gd_other_cpus);
984263bc
MD
819 boot(bootopt);
820}
821
822/*
823 * Support for poweroff delay.
824 */
825#ifndef POWEROFF_DELAY
826# define POWEROFF_DELAY 5000
827#endif
828static int poweroff_delay = POWEROFF_DELAY;
829
830SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
831 &poweroff_delay, 0, "");
832
833static void
834poweroff_wait(void *junk, int howto)
835{
836 if(!(howto & RB_POWEROFF) || poweroff_delay <= 0)
837 return;
838 DELAY(poweroff_delay * 1000);
839}
840
841/*
842 * Some system processes (e.g. syncer) need to be stopped at appropriate
843 * points in their main loops prior to a system shutdown, so that they
844 * won't interfere with the shutdown process (e.g. by holding a disk buf
845 * to cause sync to fail). For each of these system processes, register
846 * shutdown_kproc() as a handler for one of shutdown events.
847 */
848static int kproc_shutdown_wait = 60;
849SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
850 &kproc_shutdown_wait, 0, "");
851
852void
853shutdown_kproc(void *arg, int howto)
854{
bc6dffab 855 struct thread *td;
984263bc
MD
856 struct proc *p;
857 int error;
858
859 if (panicstr)
860 return;
861
bc6dffab
MD
862 td = (struct thread *)arg;
863 if ((p = td->td_proc) != NULL) {
6ea70f76 864 kprintf("Waiting (max %d seconds) for system process `%s' to stop...",
bc6dffab
MD
865 kproc_shutdown_wait, p->p_comm);
866 } else {
6ea70f76 867 kprintf("Waiting (max %d seconds) for system thread %s to stop...",
0cfcada1 868 kproc_shutdown_wait, td->td_comm);
bc6dffab
MD
869 }
870 error = suspend_kproc(td, kproc_shutdown_wait * hz);
984263bc
MD
871
872 if (error == EWOULDBLOCK)
6ea70f76 873 kprintf("timed out\n");
984263bc 874 else
6ea70f76 875 kprintf("stopped\n");
984263bc 876}
b24cd69c
AH
877
878/* Registration of dumpers */
879int
880set_dumper(struct dumperinfo *di)
881{
882 if (di == NULL) {
883 bzero(&dumper, sizeof(dumper));
884 return 0;
885 }
886
887 if (dumper.dumper != NULL)
888 return (EBUSY);
889
890 dumper = *di;
891 return 0;
892}
6abe3bd0 893
6abe3bd0 894void
b815579b 895dumpsys(void)
6abe3bd0 896{
505230a8
AH
897#if defined (_KERNEL_VIRTUAL)
898 /* VKERNELs don't support dumps */
6abe3bd0 899 kprintf("VKERNEL doesn't support dumps\n");
505230a8 900 return;
6abe3bd0 901#endif
505230a8
AH
902 /*
903 * If there is a dumper registered and we aren't dumping already, call
904 * the machine dependent dumpsys (md_dumpsys) to do the hard work.
905 *
906 * XXX: while right now the md_dumpsys() of x86 and x86_64 could be
907 * factored out completely into here, I rather keep them machine
908 * dependent in case we ever add a platform which does not share
909 * the same dumpsys() code, such as arm.
910 */
911 if (dumper.dumper != NULL && !dumping) {
912 dumping++;
913 md_dumpsys(&dumper);
914 }
915}
5ea440eb
AH
916
917int dump_stop_usertds = 0;
918
919static
920void
921need_user_resched_remote(void *dummy)
922{
923 need_user_resched();
924}
925
926void
927dump_reactivate_cpus(void)
928{
929 globaldata_t gd;
930 int cpu, seq;
931
932 dump_stop_usertds = 1;
933
934 need_user_resched();
935
936 for (cpu = 0; cpu < ncpus; cpu++) {
937 gd = globaldata_find(cpu);
938 seq = lwkt_send_ipiq(gd, need_user_resched_remote, NULL);
939 lwkt_wait_ipiq(gd, seq);
940 }
941
942 restart_cpus(stopped_cpus);
943}