dump - Bring in FreeBSD's dumping (new dumps & minidumps)
authorAlex Hornung <ahornung@gmail.com>
Sun, 6 Dec 2009 19:48:53 +0000 (19:48 +0000)
committerAlex Hornung <ahornung@gmail.com>
Sun, 6 Dec 2009 20:06:39 +0000 (20:06 +0000)
* Bring in FreeBSD's dumps and minidumps, which use an ELF header instead of
  a raw dump.

* Adapt to our needs by, for example, saving the dumppcb and dumpthread.

Obtained-from: FreeBSD

sys/platform/pc32/conf/files
sys/platform/pc32/i386/dump_machdep.c [new file with mode: 0644]
sys/platform/pc32/i386/minidump_machdep.c [new file with mode: 0644]
sys/platform/pc32/include/minidump.h [new file with mode: 0644]
sys/platform/pc64/conf/files
sys/platform/pc64/include/minidump.h [new file with mode: 0644]
sys/platform/pc64/x86_64/dump_machdep.c [new file with mode: 0644]
sys/platform/pc64/x86_64/minidump_machdep.c [new file with mode: 0644]

index 466375f..04ec244 100644 (file)
@@ -146,6 +146,7 @@ platform/pc32/i386/bioscall.s               standard
 platform/pc32/i386/busdma_machdep.c    standard
 platform/pc32/i386/db_interface.c      optional        ddb
 platform/pc32/i386/db_trace.c          optional        ddb
+platform/pc32/i386/dump_machdep.c      standard
 platform/pc32/i386/elan-mmcr.c         optional        cpu_elan
 platform/pc32/i386/geode.c             optional        cpu_geode
 platform/pc32/i386/cs5536.c            optional        cpu_geode
@@ -156,6 +157,7 @@ platform/pc32/i386/i686_mem.c               standard
 platform/pc32/i386/identcpu.c          standard
 platform/pc32/i386/initcpu.c           standard
 platform/pc32/i386/k6_mem.c            standard
+platform/pc32/i386/minidump_machdep.c  standard
 platform/pc32/i386/tls.c               standard
 # locore.s needs to be handled in Makefile to put it first.  Otherwise it's
 # now normal.
diff --git a/sys/platform/pc32/i386/dump_machdep.c b/sys/platform/pc32/i386/dump_machdep.c
new file mode 100644 (file)
index 0000000..2c84c33
--- /dev/null
@@ -0,0 +1,367 @@
+/*-
+ * Copyright (c) 2002 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/sysctl.h>
+#include <sys/device.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/elf.h>
+#include <machine/md_var.h>
+#include <machine/thread.h>
+#include <sys/thread2.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+int do_minidump = 1;
+TUNABLE_INT("debug.minidump", &do_minidump);
+SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0,
+    "Enable mini crash dumps");
+
+
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define        SIZEOF_METADATA         (64*1024)
+
+#define        MD_ALIGN(x)     (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define        DEV_ALIGN(x)    (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+struct md_pa {
+       vm_paddr_t md_start;
+       vm_paddr_t md_size;
+};
+
+typedef int callback_t(struct md_pa *, int, void *);
+
+static struct kerneldumpheader kdh;
+static off_t dumplo, fileofs;
+
+/* Handle buffered writes. */
+static char buffer[DEV_BSIZE];
+static size_t fragsz;
+
+/* 20 phys_avail entry pairs correspond to 10 md_pa's */
+static struct md_pa dump_map[10];
+
+static void
+md_pa_init(void)
+{
+       int n, idx;
+
+       bzero(dump_map, sizeof(dump_map));
+       for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) {
+               idx = n * 2;
+               if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
+                       break;
+               dump_map[n].md_start = dump_avail[idx];
+               dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx];
+       }
+}
+
+static struct md_pa *
+md_pa_first(void)
+{
+
+       return (&dump_map[0]);
+}
+
+static struct md_pa *
+md_pa_next(struct md_pa *mdp)
+{
+
+       mdp++;
+       if (mdp->md_size == 0)
+               mdp = NULL;
+       return (mdp);
+}
+
+static int
+buf_write(struct dumperinfo *di, char *ptr, size_t sz)
+{
+       size_t len;
+       int error;
+
+       while (sz) {
+               len = DEV_BSIZE - fragsz;
+               if (len > sz)
+                       len = sz;
+               bcopy(ptr, buffer + fragsz, len);
+               fragsz += len;
+               ptr += len;
+               sz -= len;
+               if (fragsz == DEV_BSIZE) {
+                       error = dev_ddump(di->priv, buffer, 0, dumplo,
+                           DEV_BSIZE);
+                       if (error)
+                               return error;
+                       dumplo += DEV_BSIZE;
+                       fragsz = 0;
+               }
+       }
+
+       return (0);
+}
+
+static int
+buf_flush(struct dumperinfo *di)
+{
+       int error;
+
+       if (fragsz == 0)
+               return (0);
+
+       error = dev_ddump(di->priv, buffer, 0, dumplo, DEV_BSIZE);
+       dumplo += DEV_BSIZE;
+       fragsz = 0;
+       return (error);
+}
+
+#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8)
+
+static int
+cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg)
+{
+       struct dumperinfo *di = (struct dumperinfo*)arg;
+       vm_paddr_t a, pa;
+       void *va;
+       uint64_t pgs;
+       size_t counter, sz, chunk;
+       int i, c, error, twiddle;
+
+       error = 0;      /* catch case in which chunk size is 0 */
+       counter = 0;    /* Update twiddle every 16MB */
+       twiddle = 0;
+       va = 0;
+       pgs = mdp->md_size / PAGE_SIZE;
+       pa = mdp->md_start;
+
+       kprintf("  chunk %d: %lldMB (%lld pages)", seqnr, PG2MB(pgs), pgs);
+
+       while (pgs) {
+               chunk = pgs;
+               if (chunk > MAXDUMPPGS)
+                       chunk = MAXDUMPPGS;
+               sz = chunk << PAGE_SHIFT;
+               counter += sz;
+               if (counter >> 24) {
+                       kprintf(" %lld", PG2MB(pgs));
+                       counter &= (1<<24) - 1;
+               }
+               for (i = 0; i < chunk; i++) {
+                       a = pa + i * PAGE_SIZE;
+                       va = pmap_kenter_temporary(trunc_page(a), i);
+               }
+               error = dev_ddump(di->priv, va, 0, dumplo, sz);
+               if (error)
+                       break;
+               dumplo += sz;
+               pgs -= chunk;
+               pa += sz;
+
+               /* Check for user abort. */
+               c = cncheckc();
+               if (c == 0x03)
+                       return (ECANCELED);
+               if (c != -1)
+                       kprintf(" (CTRL-C to abort) ");
+       }
+       kprintf(" ... %s\n", (error) ? "fail" : "ok");
+       return (error);
+}
+
+static int
+cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg)
+{
+       struct dumperinfo *di = (struct dumperinfo*)arg;
+       Elf_Phdr phdr;
+       uint64_t size;
+       int error;
+
+       size = mdp->md_size;
+       bzero(&phdr, sizeof(phdr));
+       phdr.p_type = PT_LOAD;
+       phdr.p_flags = PF_R;                    /* XXX */
+       phdr.p_offset = fileofs;
+       phdr.p_vaddr = mdp->md_start;
+       phdr.p_paddr = mdp->md_start;
+       phdr.p_filesz = size;
+       phdr.p_memsz = size;
+       phdr.p_align = PAGE_SIZE;
+
+       error = buf_write(di, (char*)&phdr, sizeof(phdr));
+       fileofs += phdr.p_filesz;
+       return (error);
+}
+
+static int
+cb_size(struct md_pa *mdp, int seqnr, void *arg)
+{
+       uint64_t *sz = (uint64_t*)arg;
+
+       *sz += (uint64_t)mdp->md_size;
+       return (0);
+}
+
+static int
+foreach_chunk(callback_t cb, void *arg)
+{
+       struct md_pa *mdp;
+       int error, seqnr;
+
+       seqnr = 0;
+       mdp = md_pa_first();
+       while (mdp != NULL) {
+               error = (*cb)(mdp, seqnr++, arg);
+               if (error)
+                       return (-error);
+               mdp = md_pa_next(mdp);
+       }
+       return (seqnr);
+}
+
+void
+dumpsys(struct dumperinfo *di)
+{
+       Elf_Ehdr ehdr;
+       uint64_t dumpsize;
+       off_t hdrgap;
+       size_t hdrsz;
+       int error;
+
+       savectx(&dumppcb);
+       dumpthread = curthread;
+
+       if (do_minidump) {
+               minidumpsys(di);
+               return;
+       }
+
+       bzero(&ehdr, sizeof(ehdr));
+       ehdr.e_ident[EI_MAG0] = ELFMAG0;
+       ehdr.e_ident[EI_MAG1] = ELFMAG1;
+       ehdr.e_ident[EI_MAG2] = ELFMAG2;
+       ehdr.e_ident[EI_MAG3] = ELFMAG3;
+       ehdr.e_ident[EI_CLASS] = ELF_CLASS;
+#if BYTE_ORDER == LITTLE_ENDIAN
+       ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
+#else
+       ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
+#endif
+       ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+       ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE;   /* XXX big picture? */
+       ehdr.e_type = ET_CORE;
+       ehdr.e_machine = EM_386;
+       ehdr.e_phoff = sizeof(ehdr);
+       ehdr.e_flags = 0;
+       ehdr.e_ehsize = sizeof(ehdr);
+       ehdr.e_phentsize = sizeof(Elf_Phdr);
+       ehdr.e_shentsize = sizeof(Elf_Shdr);
+
+       md_pa_init();
+
+       /* Calculate dump size. */
+       dumpsize = 0L;
+       ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize);
+       hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
+       fileofs = MD_ALIGN(hdrsz);
+       dumpsize += fileofs;
+       hdrgap = fileofs - DEV_ALIGN(hdrsz);
+
+       /* Determine dump offset on device. */
+       if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+               error = ENOSPC;
+               goto fail;
+       }
+       dumplo = di->mediaoffset + di->mediasize - dumpsize;
+       dumplo -= sizeof(kdh) * 2;
+
+       mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
+           dumpsize, di->blocksize);
+
+       kprintf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20,
+           ehdr.e_phnum);
+
+       /* Dump leader */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+       dumplo += sizeof(kdh);
+
+       /* Dump ELF header */
+       error = buf_write(di, (char*)&ehdr, sizeof(ehdr));
+       if (error)
+               goto fail;
+
+       /* Dump program headers */
+       error = foreach_chunk(cb_dumphdr, di);
+       if (error < 0)
+               goto fail;
+       buf_flush(di);
+
+       /*
+        * All headers are written using blocked I/O, so we know the
+        * current offset is (still) block aligned. Skip the alignement
+        * in the file to have the segment contents aligned at page
+        * boundary. We cannot use MD_ALIGN on dumplo, because we don't
+        * care and may very well be unaligned within the dump device.
+        */
+       dumplo += hdrgap;
+
+       /* Dump memory chunks (updates dumplo) */
+       error = foreach_chunk(cb_dumpdata, di);
+       if (error < 0)
+               goto fail;
+
+       /* Dump trailer */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+
+       /* Signal completion, signoff and exit stage left. */
+       dev_ddump(di->priv, NULL, 0, 0, 0);
+       kprintf("\nDump complete\n");
+       return;
+
+ fail:
+       if (error < 0)
+               error = -error;
+
+       if (error == ECANCELED)
+               kprintf("\nDump aborted\n");
+       else if (error == ENOSPC)
+               kprintf("\nDump failed. Partition too small.\n");
+       else
+               kprintf("\n** DUMP FAILED (ERROR %d) **\n", error);
+}
diff --git a/sys/platform/pc32/i386/minidump_machdep.c b/sys/platform/pc32/i386/minidump_machdep.c
new file mode 100644 (file)
index 0000000..0d9bd63
--- /dev/null
@@ -0,0 +1,396 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/device.h>
+#include <sys/globaldata.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <sys/msgbuf.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+#include <machine/atomic.h>
+#include <machine/elf.h>
+#include <machine/globaldata.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+#include <machine/minidump.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define        SIZEOF_METADATA         (64*1024)
+
+#define        MD_ALIGN(x)     (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define        DEV_ALIGN(x)    (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+uint32_t *vm_page_dump;
+int vm_page_dump_size;
+
+static struct kerneldumpheader kdh;
+static off_t dumplo;
+
+/* Handle chunked writes. */
+static size_t fragsz;
+static void *dump_va;
+static uint64_t counter, progress;
+
+CTASSERT(sizeof(*vm_page_dump) == 4);
+
+static int
+is_dumpable(vm_paddr_t pa)
+{
+       int i;
+
+       for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
+               if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+                       return (1);
+       }
+       return (0);
+}
+
+#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
+
+static int
+blk_flush(struct dumperinfo *di)
+{
+       int error;
+
+       if (fragsz == 0)
+               return (0);
+
+       error = dev_ddump(di->priv, dump_va, 0, dumplo, fragsz);
+       dumplo += fragsz;
+       fragsz = 0;
+       return (error);
+}
+
+static int
+blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
+{
+       size_t len;
+       int error, i, c;
+
+       error = 0;
+       if ((sz % PAGE_SIZE) != 0) {
+               kprintf("size not page aligned\n");
+               return (EINVAL);
+       }
+       if (ptr != NULL && pa != 0) {
+               kprintf("cant have both va and pa!\n");
+               return (EINVAL);
+       }
+       if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
+               kprintf("address not page aligned\n");
+               return (EINVAL);
+       }
+       if (ptr != NULL) {
+               /* If we're doing a virtual dump, flush any pre-existing pa pages */
+               error = blk_flush(di);
+               if (error)
+                       return (error);
+       }
+       while (sz) {
+               len = (MAXDUMPPGS * PAGE_SIZE) - fragsz;
+               if (len > sz)
+                       len = sz;
+               counter += len;
+               progress -= len;
+               if (counter >> 24) {
+                       kprintf(" %lld", PG2MB(progress >> PAGE_SHIFT));
+                       counter &= (1<<24) - 1;
+               }
+               if (ptr) {
+                       error = dev_ddump(di->priv, ptr, 0, dumplo, len);
+                       if (error)
+                               return (error);
+                       dumplo += len;
+                       ptr += len;
+                       sz -= len;
+               } else {
+                       for (i = 0; i < len; i += PAGE_SIZE)
+                               dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
+                       fragsz += len;
+                       pa += len;
+                       sz -= len;
+                       if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) {
+                               error = blk_flush(di);
+                               if (error)
+                                       return (error);
+                       }
+               }
+
+               /* Check for user abort. */
+               c = cncheckc();
+               if (c == 0x03)
+                       return (ECANCELED);
+               if (c != -1)
+                       kprintf(" (CTRL-C to abort) ");
+       }
+
+       return (0);
+}
+
+/* A fake page table page, to avoid having to handle both 4K and 2M pages */
+static pt_entry_t fakept[NPTEPG];
+
+void
+minidumpsys(struct dumperinfo *di)
+{
+       uint64_t dumpsize;
+       uint32_t ptesize;
+       vm_offset_t va;
+       vm_offset_t kern_end;
+       int error;
+       uint32_t bits;
+       uint64_t pa;
+       pd_entry_t *pd;
+       pt_entry_t *pt;
+       int i, j, k, bit;
+       struct minidumphdr mdhdr;
+       struct mdglobaldata *md;
+
+       counter = 0;
+       ptesize = 0;
+
+       md = (struct mdglobaldata *)globaldata_find(0);
+
+       kern_end = kernel_vm_end;
+       if (kern_end < (vm_offset_t)&(md[ncpus]))
+               kern_end = (vm_offset_t)&(md[ncpus]);
+#if 0
+       kern_end = 0xFFFFF000;
+#endif
+
+       /* Walk page table pages, set bits in vm_page_dump */
+       for (va = KERNBASE; va < kern_end; va += NBPDR) {
+               /*
+                * We always write a page, even if it is zero. Each
+                * page written corresponds to 2MB of space
+                */
+               ptesize += PAGE_SIZE;
+               pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);     /* always mapped! */
+               j = va >> PDRSHIFT;
+               if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
+                       /* This is an entire 2M page. */
+                       pa = pd[j] & PG_FRAME & ~PDRMASK;
+                       for (k = 0; k < NPTEPG; k++) {
+                               if (is_dumpable(pa))
+                                       dump_add_page(pa);
+                               pa += PAGE_SIZE;
+                       }
+                       continue;
+               }
+               if ((pd[j] & PG_V) == PG_V) {
+                       /* set bit for each valid page in this 2MB block */
+                       pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
+                       for (k = 0; k < NPTEPG; k++) {
+                               if ((pt[k] & PG_V) == PG_V) {
+                                       pa = pt[k] & PG_FRAME;
+                                       if (is_dumpable(pa))
+                                               dump_add_page(pa);
+                               }
+                       }
+               } else {
+                       /* nothing, we're going to dump a null page */
+               }
+       }
+
+       /* Calculate dump size. */
+       dumpsize = ptesize;
+       dumpsize += round_page(msgbufp->msg_size);
+       dumpsize += round_page(vm_page_dump_size);
+       for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+               bits = vm_page_dump[i];
+               while (bits) {
+                       bit = bsfl(bits);
+                       pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+                       /* Clear out undumpable pages now if needed */
+                       if (is_dumpable(pa)) {
+                               dumpsize += PAGE_SIZE;
+                       } else {
+                               dump_drop_page(pa);
+                       }
+                       bits &= ~(1ul << bit);
+               }
+       }
+       dumpsize += PAGE_SIZE;
+
+       /* Determine dump offset on device. */
+       if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+               error = ENOSPC;
+               goto fail;
+       }
+       dumplo = di->mediaoffset + di->mediasize - dumpsize;
+       dumplo -= sizeof(kdh) * 2;
+       progress = dumpsize;
+
+       /* Initialize mdhdr */
+       bzero(&mdhdr, sizeof(mdhdr));
+       strcpy(mdhdr.magic, MINIDUMP_MAGIC);
+       mdhdr.version = MINIDUMP_VERSION;
+       mdhdr.msgbufsize = msgbufp->msg_size;
+       mdhdr.bitmapsize = vm_page_dump_size;
+       mdhdr.ptesize = ptesize;
+       mdhdr.kernbase = KERNBASE;
+
+       mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
+           dumpsize, di->blocksize);
+
+       kprintf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
+       kprintf("Dumping %llu MB:", (long long)dumpsize >> 20);
+
+       /* Dump leader */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+       dumplo += sizeof(kdh);
+
+       /* Dump my header */
+       bzero(&fakept, sizeof(fakept));
+       bcopy(&mdhdr, &fakept, sizeof(mdhdr));
+       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+       if (error)
+               goto fail;
+
+       /* Dump msgbuf up front */
+       error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
+       if (error)
+               goto fail;
+
+       /* Dump bitmap */
+       error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
+       if (error)
+               goto fail;
+
+       /* Dump kernel page table pages */
+       for (va = KERNBASE; va < kern_end; va += NBPDR) {
+               /* We always write a page, even if it is zero */
+               pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);     /* always mapped! */
+               j = va >> PDRSHIFT;
+               if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
+                       /* This is a single 2M block. Generate a fake PTP */
+                       pa = pd[j] & PG_FRAME & ~PDRMASK;
+                       for (k = 0; k < NPTEPG; k++) {
+                               fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
+                       }
+                       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       /* flush, in case we reuse fakept in the same block */
+                       error = blk_flush(di);
+                       if (error)
+                               goto fail;
+                       continue;
+               }
+               if ((pd[j] & PG_V) == PG_V) {
+                       pa = pd[j] & PG_FRAME;
+                       error = blk_write(di, 0, pa, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+               } else {
+                       bzero(fakept, sizeof(fakept));
+                       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       /* flush, in case we reuse fakept in the same block */
+                       error = blk_flush(di);
+                       if (error)
+                               goto fail;
+               }
+       }
+
+       /* Dump memory chunks */
+       /* XXX cluster it up and use blk_dump() */
+       for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+               bits = vm_page_dump[i];
+               while (bits) {
+                       bit = bsfl(bits);
+                       pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+                       error = blk_write(di, 0, pa, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       bits &= ~(1ul << bit);
+               }
+       }
+
+       error = blk_flush(di);
+       if (error)
+               goto fail;
+
+       /* Dump trailer */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+       dumplo += sizeof(kdh);
+
+       /* Signal completion, signoff and exit stage left. */
+       dev_ddump(di->priv, NULL, 0, 0, 0);
+       kprintf("\nDump complete\n");
+       return;
+
+ fail:
+       if (error < 0)
+               error = -error;
+
+       if (error == ECANCELED)
+               kprintf("\nDump aborted\n");
+       else if (error == ENOSPC)
+               kprintf("\nDump failed. Partition too small.\n");
+       else
+               kprintf("\n** DUMP FAILED (ERROR %d) **\n", error);
+}
+
+void
+dump_add_page(vm_paddr_t pa)
+{
+       int idx, bit;
+
+       pa >>= PAGE_SHIFT;
+       idx = pa >> 5;          /* 2^5 = 32 */
+       bit = pa & 31;
+       atomic_set_int(&vm_page_dump[idx], 1ul << bit);
+}
+
+void
+dump_drop_page(vm_paddr_t pa)
+{
+       int idx, bit;
+
+       pa >>= PAGE_SHIFT;
+       idx = pa >> 5;          /* 2^5 = 32 */
+       bit = pa & 31;
+       atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
+}
diff --git a/sys/platform/pc32/include/minidump.h b/sys/platform/pc32/include/minidump.h
new file mode 100644 (file)
index 0000000..55508a3
--- /dev/null
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef        _MACHINE_MINIDUMP_H_
+#define        _MACHINE_MINIDUMP_H_ 1
+
+#define        MINIDUMP_MAGIC          "minidump FreeBSD/i386"
+#define        MINIDUMP_VERSION        1
+
+struct minidumphdr {
+       char magic[24];
+       uint32_t version;
+       uint32_t msgbufsize;
+       uint32_t bitmapsize;
+       uint32_t ptesize;
+       uint32_t kernbase;
+       uint32_t paemode;
+};
+
+#endif /* _MACHINE_MINIDUMP_H_ */
index 77f7057..6101a53 100644 (file)
@@ -122,6 +122,8 @@ platform/pc64/x86_64/swtch.s                standard
 platform/pc64/x86_64/npx.c             standard
 platform/pc64/x86_64/db_interface.c    standard
 platform/pc64/x86_64/db_trace.c                standard
+platform/pc64/x86_64/dump_machdep.c    standard
+platform/pc64/x86_64/minidump_machdep.c        standard
 platform/pc64/x86_64/vm_machdep.c      standard
 platform/pc64/x86_64/machdep.c         standard
 platform/pc64/x86_64/userldt.c         standard
diff --git a/sys/platform/pc64/include/minidump.h b/sys/platform/pc64/include/minidump.h
new file mode 100644 (file)
index 0000000..1ea92b7
--- /dev/null
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef        _MACHINE_MINIDUMP_H_
+#define        _MACHINE_MINIDUMP_H_ 1
+
+#define        MINIDUMP_MAGIC          "minidump FreeBSD/amd64"
+#define        MINIDUMP_VERSION        1
+
+struct minidumphdr {
+       char magic[24];
+       uint32_t version;
+       uint32_t msgbufsize;
+       uint32_t bitmapsize;
+       uint32_t ptesize;
+       uint64_t kernbase;
+       uint64_t dmapbase;
+       uint64_t dmapend;
+};
+
+#endif /* _MACHINE_MINIDUMP_H_ */
diff --git a/sys/platform/pc64/x86_64/dump_machdep.c b/sys/platform/pc64/x86_64/dump_machdep.c
new file mode 100644 (file)
index 0000000..61003d3
--- /dev/null
@@ -0,0 +1,365 @@
+/*-
+ * Copyright (c) 2002 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/sysctl.h>
+#include <sys/device.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/elf.h>
+#include <machine/md_var.h>
+#include <machine/thread.h>
+#include <sys/thread2.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+int do_minidump = 1;
+TUNABLE_INT("debug.minidump", &do_minidump);
+SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0,
+    "Enable mini crash dumps");
+
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define        SIZEOF_METADATA         (64*1024)
+
+#define        MD_ALIGN(x)     (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define        DEV_ALIGN(x)    (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+struct md_pa {
+       vm_paddr_t md_start;
+       vm_paddr_t md_size;
+};
+
+typedef int callback_t(struct md_pa *, int, void *);
+
+static struct kerneldumpheader kdh;
+static off_t dumplo, fileofs;
+
+/* Handle buffered writes. */
+static char buffer[DEV_BSIZE];
+static size_t fragsz;
+
+/* 20 phys_avail entry pairs correspond to 10 md_pa's */
+static struct md_pa dump_map[10];
+
+static void
+md_pa_init(void)
+{
+       int n, idx;
+
+       bzero(dump_map, sizeof(dump_map));
+       for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) {
+               idx = n * 2;
+               if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
+                       break;
+               dump_map[n].md_start = dump_avail[idx];
+               dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx];
+       }
+}
+
+static struct md_pa *
+md_pa_first(void)
+{
+
+       return (&dump_map[0]);
+}
+
+static struct md_pa *
+md_pa_next(struct md_pa *mdp)
+{
+
+       mdp++;
+       if (mdp->md_size == 0)
+               mdp = NULL;
+       return (mdp);
+}
+
+static int
+buf_write(struct dumperinfo *di, char *ptr, size_t sz)
+{
+       size_t len;
+       int error;
+
+       while (sz) {
+               len = DEV_BSIZE - fragsz;
+               if (len > sz)
+                       len = sz;
+               bcopy(ptr, buffer + fragsz, len);
+               fragsz += len;
+               ptr += len;
+               sz -= len;
+               if (fragsz == DEV_BSIZE) {
+                       error = dev_ddump(di->priv, buffer, 0, dumplo,
+                           DEV_BSIZE);
+                       if (error)
+                               return error;
+                       dumplo += DEV_BSIZE;
+                       fragsz = 0;
+               }
+       }
+
+       return (0);
+}
+
+static int
+buf_flush(struct dumperinfo *di)
+{
+       int error;
+
+       if (fragsz == 0)
+               return (0);
+
+       error = dev_ddump(di->priv, buffer, 0, dumplo, DEV_BSIZE);
+       dumplo += DEV_BSIZE;
+       fragsz = 0;
+       return (error);
+}
+
+#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8)
+
+static int
+cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg)
+{
+       struct dumperinfo *di = (struct dumperinfo*)arg;
+       vm_paddr_t a, pa;
+       void *va;
+       uint64_t pgs;
+       size_t counter, sz, chunk;
+       int i, c, error, twiddle;
+
+       error = 0;      /* catch case in which chunk size is 0 */
+       counter = 0;    /* Update twiddle every 16MB */
+       twiddle = 0;
+       va = 0;
+       pgs = mdp->md_size / PAGE_SIZE;
+       pa = mdp->md_start;
+
+       kprintf("  chunk %d: %ldMB (%ld pages)", seqnr, PG2MB(pgs), pgs);
+
+       while (pgs) {
+               chunk = pgs;
+               if (chunk > MAXDUMPPGS)
+                       chunk = MAXDUMPPGS;
+               sz = chunk << PAGE_SHIFT;
+               counter += sz;
+               if (counter >> 24) {
+                       kprintf(" %ld", PG2MB(pgs));
+                       counter &= (1<<24) - 1;
+               }
+               for (i = 0; i < chunk; i++) {
+                       a = pa + i * PAGE_SIZE;
+                       va = pmap_kenter_temporary(trunc_page(a), i);
+               }
+               error = dev_ddump(di->priv, va, 0, dumplo, sz);
+               if (error)
+                       break;
+               dumplo += sz;
+               pgs -= chunk;
+               pa += sz;
+
+               /* Check for user abort. */
+               c = cncheckc();
+               if (c == 0x03)
+                       return (ECANCELED);
+               if (c != -1)
+                       kprintf(" (CTRL-C to abort) ");
+       }
+       kprintf(" ... %s\n", (error) ? "fail" : "ok");
+       return (error);
+}
+
+static int
+cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg)
+{
+       struct dumperinfo *di = (struct dumperinfo*)arg;
+       Elf_Phdr phdr;
+       uint64_t size;
+       int error;
+
+       size = mdp->md_size;
+       bzero(&phdr, sizeof(phdr));
+       phdr.p_type = PT_LOAD;
+       phdr.p_flags = PF_R;                    /* XXX */
+       phdr.p_offset = fileofs;
+       phdr.p_vaddr = mdp->md_start;
+       phdr.p_paddr = mdp->md_start;
+       phdr.p_filesz = size;
+       phdr.p_memsz = size;
+       phdr.p_align = PAGE_SIZE;
+
+       error = buf_write(di, (char*)&phdr, sizeof(phdr));
+       fileofs += phdr.p_filesz;
+       return (error);
+}
+
+static int
+cb_size(struct md_pa *mdp, int seqnr, void *arg)
+{
+       uint64_t *sz = (uint64_t*)arg;
+
+       *sz += (uint64_t)mdp->md_size;
+       return (0);
+}
+
+static int
+foreach_chunk(callback_t cb, void *arg)
+{
+       struct md_pa *mdp;
+       int error, seqnr;
+
+       seqnr = 0;
+       mdp = md_pa_first();
+       while (mdp != NULL) {
+               error = (*cb)(mdp, seqnr++, arg);
+               if (error)
+                       return (-error);
+               mdp = md_pa_next(mdp);
+       }
+       return (seqnr);
+}
+
+void
+dumpsys(struct dumperinfo *di)
+{
+       Elf_Ehdr ehdr;
+       uint64_t dumpsize;
+       off_t hdrgap;
+       size_t hdrsz;
+       int error;
+
+       savectx(&dumppcb);
+       dumpthread = curthread;
+
+       if (do_minidump) {
+               minidumpsys(di);
+               return;
+       }
+       bzero(&ehdr, sizeof(ehdr));
+       ehdr.e_ident[EI_MAG0] = ELFMAG0;
+       ehdr.e_ident[EI_MAG1] = ELFMAG1;
+       ehdr.e_ident[EI_MAG2] = ELFMAG2;
+       ehdr.e_ident[EI_MAG3] = ELFMAG3;
+       ehdr.e_ident[EI_CLASS] = ELF_CLASS;
+#if BYTE_ORDER == LITTLE_ENDIAN
+       ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
+#else
+       ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
+#endif
+       ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+       ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE;   /* XXX big picture? */
+       ehdr.e_type = ET_CORE;
+       ehdr.e_machine = EM_X86_64;
+       ehdr.e_phoff = sizeof(ehdr);
+       ehdr.e_flags = 0;
+       ehdr.e_ehsize = sizeof(ehdr);
+       ehdr.e_phentsize = sizeof(Elf_Phdr);
+       ehdr.e_shentsize = sizeof(Elf_Shdr);
+
+       md_pa_init();
+
+       /* Calculate dump size. */
+       dumpsize = 0L;
+       ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize);
+       hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
+       fileofs = MD_ALIGN(hdrsz);
+       dumpsize += fileofs;
+       hdrgap = fileofs - DEV_ALIGN(hdrsz);
+
+       /* Determine dump offset on device. */
+       if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+               error = ENOSPC;
+               goto fail;
+       }
+       dumplo = di->mediaoffset + di->mediasize - dumpsize;
+       dumplo -= sizeof(kdh) * 2;
+
+       mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION,
+           dumpsize, di->blocksize);
+
+       kprintf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20,
+           ehdr.e_phnum);
+
+       /* Dump leader */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+       dumplo += sizeof(kdh);
+
+       /* Dump ELF header */
+       error = buf_write(di, (char*)&ehdr, sizeof(ehdr));
+       if (error)
+               goto fail;
+
+       /* Dump program headers */
+       error = foreach_chunk(cb_dumphdr, di);
+       if (error < 0)
+               goto fail;
+       buf_flush(di);
+
+       /*
+        * All headers are written using blocked I/O, so we know the
+        * current offset is (still) block aligned. Skip the alignement
+        * in the file to have the segment contents aligned at page
+        * boundary. We cannot use MD_ALIGN on dumplo, because we don't
+        * care and may very well be unaligned within the dump device.
+        */
+       dumplo += hdrgap;
+
+       /* Dump memory chunks (updates dumplo) */
+       error = foreach_chunk(cb_dumpdata, di);
+       if (error < 0)
+               goto fail;
+
+       /* Dump trailer */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+
+       /* Signal completion, signoff and exit stage left. */
+       dev_ddump(di->priv, NULL, 0, 0, 0);
+       kprintf("\nDump complete\n");
+       return;
+
+ fail:
+       if (error < 0)
+               error = -error;
+
+       if (error == ECANCELED)
+               kprintf("\nDump aborted\n");
+       else if (error == ENOSPC)
+               kprintf("\nDump failed. Partition too small.\n");
+       else
+               kprintf("\n** DUMP FAILED (ERROR %d) **\n", error);
+}
diff --git a/sys/platform/pc64/x86_64/minidump_machdep.c b/sys/platform/pc64/x86_64/minidump_machdep.c
new file mode 100644 (file)
index 0000000..4727d7c
--- /dev/null
@@ -0,0 +1,412 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/device.h>
+#include <sys/globaldata.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <sys/msgbuf.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+#include <machine/atomic.h>
+#include <machine/elf.h>
+#include <machine/globaldata.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+#include <machine/minidump.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define        SIZEOF_METADATA         (64*1024)
+
+#define        MD_ALIGN(x)     (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define        DEV_ALIGN(x)    (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+extern uint64_t KPDPphys;
+
+uint64_t *vm_page_dump;
+int vm_page_dump_size;
+
+static struct kerneldumpheader kdh;
+static off_t dumplo;
+
+/* Handle chunked writes. */
+static size_t fragsz;
+static void *dump_va;
+static size_t counter, progress;
+
+CTASSERT(sizeof(*vm_page_dump) == 8);
+
+static int
+is_dumpable(vm_paddr_t pa)
+{
+       int i;
+
+       for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
+               if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+                       return (1);
+       }
+       return (0);
+}
+
+#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
+
+static int
+blk_flush(struct dumperinfo *di)
+{
+       int error;
+
+       if (fragsz == 0)
+               return (0);
+
+       error = dev_ddump(di->priv, dump_va, 0, dumplo, fragsz);
+       dumplo += fragsz;
+       fragsz = 0;
+       return (error);
+}
+
+static int
+blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
+{
+       size_t len;
+       int error, i, c;
+
+       error = 0;
+       if ((sz % PAGE_SIZE) != 0) {
+               kprintf("size not page aligned\n");
+               return (EINVAL);
+       }
+       if (ptr != NULL && pa != 0) {
+               kprintf("cant have both va and pa!\n");
+               return (EINVAL);
+       }
+       if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
+               kprintf("address not page aligned\n");
+               return (EINVAL);
+       }
+       if (ptr != NULL) {
+               /* If we're doing a virtual dump, flush any pre-existing pa pages */
+               error = blk_flush(di);
+               if (error)
+                       return (error);
+       }
+       while (sz) {
+               len = (MAXDUMPPGS * PAGE_SIZE) - fragsz;
+               if (len > sz)
+                       len = sz;
+               counter += len;
+               progress -= len;
+               if (counter >> 24) {
+                       kprintf(" %ld", PG2MB(progress >> PAGE_SHIFT));
+                       counter &= (1<<24) - 1;
+               }
+               if (ptr) {
+                       error = dev_ddump(di->priv, ptr, 0, dumplo, len);
+                       if (error)
+                               return (error);
+                       dumplo += len;
+                       ptr += len;
+                       sz -= len;
+               } else {
+                       for (i = 0; i < len; i += PAGE_SIZE)
+                               dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
+                       fragsz += len;
+                       pa += len;
+                       sz -= len;
+                       if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) {
+                               error = blk_flush(di);
+                               if (error)
+                                       return (error);
+                       }
+               }
+
+               /* Check for user abort. */
+               c = cncheckc();
+               if (c == 0x03)
+                       return (ECANCELED);
+               if (c != -1)
+                       kprintf(" (CTRL-C to abort) ");
+       }
+
+       return (0);
+}
+
+/* A fake page table page, to avoid having to handle both 4K and 2M pages */
+static pt_entry_t fakept[NPTEPG];
+
+void
+minidumpsys(struct dumperinfo *di)
+{
+       uint64_t dumpsize;
+       uint32_t ptesize;
+       vm_offset_t va;
+       vm_offset_t kern_end;
+       int error;
+       uint64_t bits;
+       uint64_t *pdp, *pd, *pt, pa;
+       int i, j, k, bit;
+       struct minidumphdr mdhdr;
+       struct mdglobaldata *md;
+
+       counter = 0;
+       /* Walk page table pages, set bits in vm_page_dump */
+       ptesize = 0;
+
+       md = (struct mdglobaldata *)globaldata_find(0);
+
+       kern_end = kernel_vm_end;
+       if (kern_end < (vm_offset_t)&(md[ncpus]))
+               kern_end = (vm_offset_t)&(md[ncpus]);
+
+       pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
+       for (va = KERNBASE; va < kern_end; va += NBPDR) {
+               i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
+               /*
+                * We always write a page, even if it is zero. Each
+                * page written corresponds to 2MB of space
+                */
+               ptesize += PAGE_SIZE;
+               if ((pdp[i] & PG_V) == 0)
+                       continue;
+               pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
+               j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+               if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
+                       /* This is an entire 2M page. */
+                       pa = pd[j] & PG_PS_FRAME;
+                       for (k = 0; k < NPTEPG; k++) {
+                               if (is_dumpable(pa))
+                                       dump_add_page(pa);
+                               pa += PAGE_SIZE;
+                       }
+                       continue;
+               }
+               if ((pd[j] & PG_V) == PG_V) {
+                       /* set bit for each valid page in this 2MB block */
+                       pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
+                       for (k = 0; k < NPTEPG; k++) {
+                               if ((pt[k] & PG_V) == PG_V) {
+                                       pa = pt[k] & PG_FRAME;
+                                       if (is_dumpable(pa))
+                                               dump_add_page(pa);
+                               }
+                       }
+               } else {
+                       /* nothing, we're going to dump a null page */
+               }
+       }
+
+       /* Calculate dump size. */
+       dumpsize = ptesize;
+       dumpsize += round_page(msgbufp->msg_size);
+       dumpsize += round_page(vm_page_dump_size);
+       for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+               bits = vm_page_dump[i];
+               while (bits) {
+                       bit = bsfq(bits);
+                       pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+                       /* Clear out undumpable pages now if needed */
+                       if (is_dumpable(pa)) {
+                               dumpsize += PAGE_SIZE;
+                       } else {
+                               dump_drop_page(pa);
+                       }
+                       bits &= ~(1ul << bit);
+               }
+       }
+       dumpsize += PAGE_SIZE;
+
+       /* Determine dump offset on device. */
+       if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+               error = ENOSPC;
+               goto fail;
+       }
+       dumplo = di->mediaoffset + di->mediasize - dumpsize;
+       dumplo -= sizeof(kdh) * 2;
+       progress = dumpsize;
+
+       /* Initialize mdhdr */
+       bzero(&mdhdr, sizeof(mdhdr));
+       strcpy(mdhdr.magic, MINIDUMP_MAGIC);
+       mdhdr.version = MINIDUMP_VERSION;
+       mdhdr.msgbufsize = msgbufp->msg_size;
+       mdhdr.bitmapsize = vm_page_dump_size;
+       mdhdr.ptesize = ptesize;
+       mdhdr.kernbase = KERNBASE;
+       mdhdr.dmapbase = DMAP_MIN_ADDRESS;
+       mdhdr.dmapend = DMAP_MAX_ADDRESS;
+
+       mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION,
+           dumpsize, di->blocksize);
+
+       kprintf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
+       kprintf("Dumping %llu MB:", (long long)dumpsize >> 20);
+
+       /* Dump leader */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+       dumplo += sizeof(kdh);
+
+       /* Dump my header */
+       bzero(&fakept, sizeof(fakept));
+       bcopy(&mdhdr, &fakept, sizeof(mdhdr));
+       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+       if (error)
+               goto fail;
+
+       /* Dump msgbuf up front */
+       error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
+       if (error)
+               goto fail;
+
+       /* Dump bitmap */
+       error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
+       if (error)
+               goto fail;
+
+       /* Dump kernel page table pages */
+       pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
+       for (va = KERNBASE; va < kern_end; va += NBPDR) {
+               i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
+               /* We always write a page, even if it is zero */
+               if ((pdp[i] & PG_V) == 0) {
+                       bzero(fakept, sizeof(fakept));
+                       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       /* flush, in case we reuse fakept in the same block */
+                       error = blk_flush(di);
+                       if (error)
+                               goto fail;
+                       continue;
+               }
+               pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
+               j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+               if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
+                       /* This is a single 2M block. Generate a fake PTP */
+                       pa = pd[j] & PG_PS_FRAME;
+                       for (k = 0; k < NPTEPG; k++) {
+                               fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
+                       }
+                       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       /* flush, in case we reuse fakept in the same block */
+                       error = blk_flush(di);
+                       if (error)
+                               goto fail;
+                       continue;
+               }
+               if ((pd[j] & PG_V) == PG_V) {
+                       pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
+                       error = blk_write(di, (char *)pt, 0, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+               } else {
+                       bzero(fakept, sizeof(fakept));
+                       error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       /* flush, in case we reuse fakept in the same block */
+                       error = blk_flush(di);
+                       if (error)
+                               goto fail;
+               }
+       }
+
+       /* Dump memory chunks */
+       /* XXX cluster it up and use blk_dump() */
+       for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+               bits = vm_page_dump[i];
+               while (bits) {
+                       bit = bsfq(bits);
+                       pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+                       error = blk_write(di, 0, pa, PAGE_SIZE);
+                       if (error)
+                               goto fail;
+                       bits &= ~(1ul << bit);
+               }
+       }
+
+       error = blk_flush(di);
+       if (error)
+               goto fail;
+
+       /* Dump trailer */
+       error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+       if (error)
+               goto fail;
+       dumplo += sizeof(kdh);
+
+       /* Signal completion, signoff and exit stage left. */
+       dev_ddump(di->priv, NULL, 0, 0, 0);
+       kprintf("\nDump complete\n");
+       return;
+
+ fail:
+       if (error < 0)
+               error = -error;
+
+       if (error == ECANCELED)
+               kprintf("\nDump aborted\n");
+       else if (error == ENOSPC)
+               kprintf("\nDump failed. Partition too small.\n");
+       else
+               kprintf("\n** DUMP FAILED (ERROR %d) **\n", error);
+}
+
+void
+dump_add_page(vm_paddr_t pa)
+{
+       int idx, bit;
+
+       pa >>= PAGE_SHIFT;
+       idx = pa >> 6;          /* 2^6 = 64 */
+       bit = pa & 63;
+       atomic_set_long(&vm_page_dump[idx], 1ul << bit);
+}
+
+void
+dump_drop_page(vm_paddr_t pa)
+{
+       int idx, bit;
+
+       pa >>= PAGE_SHIFT;
+       idx = pa >> 6;          /* 2^6 = 64 */
+       bit = pa & 63;
+       atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
+}