From be66ad118c8d757a140a55c8309a4a772b041432 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Sun, 6 Dec 2009 19:48:53 +0000 Subject: [PATCH] dump - Bring in FreeBSD's dumping (new dumps & minidumps) * Bring in FreeBSD's dumps and minidumps, which use an ELF header instead of a raw dump. * Adapt to our needs by, for example, saving the dumppcb and dumpthread. Obtained-from: FreeBSD --- sys/platform/pc32/conf/files | 2 + sys/platform/pc32/i386/dump_machdep.c | 367 +++++++++++++++++ sys/platform/pc32/i386/minidump_machdep.c | 396 +++++++++++++++++++ sys/platform/pc32/include/minidump.h | 45 +++ sys/platform/pc64/conf/files | 2 + sys/platform/pc64/include/minidump.h | 46 +++ sys/platform/pc64/x86_64/dump_machdep.c | 365 +++++++++++++++++ sys/platform/pc64/x86_64/minidump_machdep.c | 412 ++++++++++++++++++++ 8 files changed, 1635 insertions(+) create mode 100644 sys/platform/pc32/i386/dump_machdep.c create mode 100644 sys/platform/pc32/i386/minidump_machdep.c create mode 100644 sys/platform/pc32/include/minidump.h create mode 100644 sys/platform/pc64/include/minidump.h create mode 100644 sys/platform/pc64/x86_64/dump_machdep.c create mode 100644 sys/platform/pc64/x86_64/minidump_machdep.c diff --git a/sys/platform/pc32/conf/files b/sys/platform/pc32/conf/files index 466375ffa2..04ec24432b 100644 --- a/sys/platform/pc32/conf/files +++ b/sys/platform/pc32/conf/files @@ -146,6 +146,7 @@ platform/pc32/i386/bioscall.s standard platform/pc32/i386/busdma_machdep.c standard platform/pc32/i386/db_interface.c optional ddb platform/pc32/i386/db_trace.c optional ddb +platform/pc32/i386/dump_machdep.c standard platform/pc32/i386/elan-mmcr.c optional cpu_elan platform/pc32/i386/geode.c optional cpu_geode platform/pc32/i386/cs5536.c optional cpu_geode @@ -156,6 +157,7 @@ platform/pc32/i386/i686_mem.c standard platform/pc32/i386/identcpu.c standard platform/pc32/i386/initcpu.c standard platform/pc32/i386/k6_mem.c standard +platform/pc32/i386/minidump_machdep.c standard platform/pc32/i386/tls.c standard # locore.s needs to be handled in Makefile to put it first. Otherwise it's # now normal. diff --git a/sys/platform/pc32/i386/dump_machdep.c b/sys/platform/pc32/i386/dump_machdep.c new file mode 100644 index 0000000000..2c84c336e5 --- /dev/null +++ b/sys/platform/pc32/i386/dump_machdep.c @@ -0,0 +1,367 @@ +/*- + * Copyright (c) 2002 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +int do_minidump = 1; +TUNABLE_INT("debug.minidump", &do_minidump); +SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0, + "Enable mini crash dumps"); + + +/* + * Don't touch the first SIZEOF_METADATA bytes on the dump device. This + * is to protect us from metadata and to protect metadata from us. + */ +#define SIZEOF_METADATA (64*1024) + +#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) +#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) + +struct md_pa { + vm_paddr_t md_start; + vm_paddr_t md_size; +}; + +typedef int callback_t(struct md_pa *, int, void *); + +static struct kerneldumpheader kdh; +static off_t dumplo, fileofs; + +/* Handle buffered writes. */ +static char buffer[DEV_BSIZE]; +static size_t fragsz; + +/* 20 phys_avail entry pairs correspond to 10 md_pa's */ +static struct md_pa dump_map[10]; + +static void +md_pa_init(void) +{ + int n, idx; + + bzero(dump_map, sizeof(dump_map)); + for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) { + idx = n * 2; + if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) + break; + dump_map[n].md_start = dump_avail[idx]; + dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx]; + } +} + +static struct md_pa * +md_pa_first(void) +{ + + return (&dump_map[0]); +} + +static struct md_pa * +md_pa_next(struct md_pa *mdp) +{ + + mdp++; + if (mdp->md_size == 0) + mdp = NULL; + return (mdp); +} + +static int +buf_write(struct dumperinfo *di, char *ptr, size_t sz) +{ + size_t len; + int error; + + while (sz) { + len = DEV_BSIZE - fragsz; + if (len > sz) + len = sz; + bcopy(ptr, buffer + fragsz, len); + fragsz += len; + ptr += len; + sz -= len; + if (fragsz == DEV_BSIZE) { + error = dev_ddump(di->priv, buffer, 0, dumplo, + DEV_BSIZE); + if (error) + return error; + dumplo += DEV_BSIZE; + fragsz = 0; + } + } + + return (0); +} + +static int +buf_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dev_ddump(di->priv, buffer, 0, dumplo, DEV_BSIZE); + dumplo += DEV_BSIZE; + fragsz = 0; + return (error); +} + +#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8) + +static int +cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg) +{ + struct dumperinfo *di = (struct dumperinfo*)arg; + vm_paddr_t a, pa; + void *va; + uint64_t pgs; + size_t counter, sz, chunk; + int i, c, error, twiddle; + + error = 0; /* catch case in which chunk size is 0 */ + counter = 0; /* Update twiddle every 16MB */ + twiddle = 0; + va = 0; + pgs = mdp->md_size / PAGE_SIZE; + pa = mdp->md_start; + + kprintf(" chunk %d: %lldMB (%lld pages)", seqnr, PG2MB(pgs), pgs); + + while (pgs) { + chunk = pgs; + if (chunk > MAXDUMPPGS) + chunk = MAXDUMPPGS; + sz = chunk << PAGE_SHIFT; + counter += sz; + if (counter >> 24) { + kprintf(" %lld", PG2MB(pgs)); + counter &= (1<<24) - 1; + } + for (i = 0; i < chunk; i++) { + a = pa + i * PAGE_SIZE; + va = pmap_kenter_temporary(trunc_page(a), i); + } + error = dev_ddump(di->priv, va, 0, dumplo, sz); + if (error) + break; + dumplo += sz; + pgs -= chunk; + pa += sz; + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + kprintf(" (CTRL-C to abort) "); + } + kprintf(" ... %s\n", (error) ? "fail" : "ok"); + return (error); +} + +static int +cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg) +{ + struct dumperinfo *di = (struct dumperinfo*)arg; + Elf_Phdr phdr; + uint64_t size; + int error; + + size = mdp->md_size; + bzero(&phdr, sizeof(phdr)); + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R; /* XXX */ + phdr.p_offset = fileofs; + phdr.p_vaddr = mdp->md_start; + phdr.p_paddr = mdp->md_start; + phdr.p_filesz = size; + phdr.p_memsz = size; + phdr.p_align = PAGE_SIZE; + + error = buf_write(di, (char*)&phdr, sizeof(phdr)); + fileofs += phdr.p_filesz; + return (error); +} + +static int +cb_size(struct md_pa *mdp, int seqnr, void *arg) +{ + uint64_t *sz = (uint64_t*)arg; + + *sz += (uint64_t)mdp->md_size; + return (0); +} + +static int +foreach_chunk(callback_t cb, void *arg) +{ + struct md_pa *mdp; + int error, seqnr; + + seqnr = 0; + mdp = md_pa_first(); + while (mdp != NULL) { + error = (*cb)(mdp, seqnr++, arg); + if (error) + return (-error); + mdp = md_pa_next(mdp); + } + return (seqnr); +} + +void +dumpsys(struct dumperinfo *di) +{ + Elf_Ehdr ehdr; + uint64_t dumpsize; + off_t hdrgap; + size_t hdrsz; + int error; + + savectx(&dumppcb); + dumpthread = curthread; + + if (do_minidump) { + minidumpsys(di); + return; + } + + bzero(&ehdr, sizeof(ehdr)); + ehdr.e_ident[EI_MAG0] = ELFMAG0; + ehdr.e_ident[EI_MAG1] = ELFMAG1; + ehdr.e_ident[EI_MAG2] = ELFMAG2; + ehdr.e_ident[EI_MAG3] = ELFMAG3; + ehdr.e_ident[EI_CLASS] = ELF_CLASS; +#if BYTE_ORDER == LITTLE_ENDIAN + ehdr.e_ident[EI_DATA] = ELFDATA2LSB; +#else + ehdr.e_ident[EI_DATA] = ELFDATA2MSB; +#endif + ehdr.e_ident[EI_VERSION] = EV_CURRENT; + ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ + ehdr.e_type = ET_CORE; + ehdr.e_machine = EM_386; + ehdr.e_phoff = sizeof(ehdr); + ehdr.e_flags = 0; + ehdr.e_ehsize = sizeof(ehdr); + ehdr.e_phentsize = sizeof(Elf_Phdr); + ehdr.e_shentsize = sizeof(Elf_Shdr); + + md_pa_init(); + + /* Calculate dump size. */ + dumpsize = 0L; + ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize); + hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; + fileofs = MD_ALIGN(hdrsz); + dumpsize += fileofs; + hdrgap = fileofs - DEV_ALIGN(hdrsz); + + /* Determine dump offset on device. */ + if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { + error = ENOSPC; + goto fail; + } + dumplo = di->mediaoffset + di->mediasize - dumpsize; + dumplo -= sizeof(kdh) * 2; + + mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, + dumpsize, di->blocksize); + + kprintf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20, + ehdr.e_phnum); + + /* Dump leader */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + dumplo += sizeof(kdh); + + /* Dump ELF header */ + error = buf_write(di, (char*)&ehdr, sizeof(ehdr)); + if (error) + goto fail; + + /* Dump program headers */ + error = foreach_chunk(cb_dumphdr, di); + if (error < 0) + goto fail; + buf_flush(di); + + /* + * All headers are written using blocked I/O, so we know the + * current offset is (still) block aligned. Skip the alignement + * in the file to have the segment contents aligned at page + * boundary. We cannot use MD_ALIGN on dumplo, because we don't + * care and may very well be unaligned within the dump device. + */ + dumplo += hdrgap; + + /* Dump memory chunks (updates dumplo) */ + error = foreach_chunk(cb_dumpdata, di); + if (error < 0) + goto fail; + + /* Dump trailer */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + + /* Signal completion, signoff and exit stage left. */ + dev_ddump(di->priv, NULL, 0, 0, 0); + kprintf("\nDump complete\n"); + return; + + fail: + if (error < 0) + error = -error; + + if (error == ECANCELED) + kprintf("\nDump aborted\n"); + else if (error == ENOSPC) + kprintf("\nDump failed. Partition too small.\n"); + else + kprintf("\n** DUMP FAILED (ERROR %d) **\n", error); +} diff --git a/sys/platform/pc32/i386/minidump_machdep.c b/sys/platform/pc32/i386/minidump_machdep.c new file mode 100644 index 0000000000..0d9bd63fd3 --- /dev/null +++ b/sys/platform/pc32/i386/minidump_machdep.c @@ -0,0 +1,396 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +/* + * Don't touch the first SIZEOF_METADATA bytes on the dump device. This + * is to protect us from metadata and to protect metadata from us. + */ +#define SIZEOF_METADATA (64*1024) + +#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) +#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) + +uint32_t *vm_page_dump; +int vm_page_dump_size; + +static struct kerneldumpheader kdh; +static off_t dumplo; + +/* Handle chunked writes. */ +static size_t fragsz; +static void *dump_va; +static uint64_t counter, progress; + +CTASSERT(sizeof(*vm_page_dump) == 4); + +static int +is_dumpable(vm_paddr_t pa) +{ + int i; + + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (1); + } + return (0); +} + +#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) + +static int +blk_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dev_ddump(di->priv, dump_va, 0, dumplo, fragsz); + dumplo += fragsz; + fragsz = 0; + return (error); +} + +static int +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) +{ + size_t len; + int error, i, c; + + error = 0; + if ((sz % PAGE_SIZE) != 0) { + kprintf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + kprintf("cant have both va and pa!\n"); + return (EINVAL); + } + if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { + kprintf("address not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL) { + /* If we're doing a virtual dump, flush any pre-existing pa pages */ + error = blk_flush(di); + if (error) + return (error); + } + while (sz) { + len = (MAXDUMPPGS * PAGE_SIZE) - fragsz; + if (len > sz) + len = sz; + counter += len; + progress -= len; + if (counter >> 24) { + kprintf(" %lld", PG2MB(progress >> PAGE_SHIFT)); + counter &= (1<<24) - 1; + } + if (ptr) { + error = dev_ddump(di->priv, ptr, 0, dumplo, len); + if (error) + return (error); + dumplo += len; + ptr += len; + sz -= len; + } else { + for (i = 0; i < len; i += PAGE_SIZE) + dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); + fragsz += len; + pa += len; + sz -= len; + if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) { + error = blk_flush(di); + if (error) + return (error); + } + } + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + kprintf(" (CTRL-C to abort) "); + } + + return (0); +} + +/* A fake page table page, to avoid having to handle both 4K and 2M pages */ +static pt_entry_t fakept[NPTEPG]; + +void +minidumpsys(struct dumperinfo *di) +{ + uint64_t dumpsize; + uint32_t ptesize; + vm_offset_t va; + vm_offset_t kern_end; + int error; + uint32_t bits; + uint64_t pa; + pd_entry_t *pd; + pt_entry_t *pt; + int i, j, k, bit; + struct minidumphdr mdhdr; + struct mdglobaldata *md; + + counter = 0; + ptesize = 0; + + md = (struct mdglobaldata *)globaldata_find(0); + + kern_end = kernel_vm_end; + if (kern_end < (vm_offset_t)&(md[ncpus])) + kern_end = (vm_offset_t)&(md[ncpus]); +#if 0 + kern_end = 0xFFFFF000; +#endif + + /* Walk page table pages, set bits in vm_page_dump */ + for (va = KERNBASE; va < kern_end; va += NBPDR) { + /* + * We always write a page, even if it is zero. Each + * page written corresponds to 2MB of space + */ + ptesize += PAGE_SIZE; + pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */ + j = va >> PDRSHIFT; + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is an entire 2M page. */ + pa = pd[j] & PG_FRAME & ~PDRMASK; + for (k = 0; k < NPTEPG; k++) { + if (is_dumpable(pa)) + dump_add_page(pa); + pa += PAGE_SIZE; + } + continue; + } + if ((pd[j] & PG_V) == PG_V) { + /* set bit for each valid page in this 2MB block */ + pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); + for (k = 0; k < NPTEPG; k++) { + if ((pt[k] & PG_V) == PG_V) { + pa = pt[k] & PG_FRAME; + if (is_dumpable(pa)) + dump_add_page(pa); + } + } + } else { + /* nothing, we're going to dump a null page */ + } + } + + /* Calculate dump size. */ + dumpsize = ptesize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfl(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) { + dumpsize += PAGE_SIZE; + } else { + dump_drop_page(pa); + } + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; + + /* Determine dump offset on device. */ + if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { + error = ENOSPC; + goto fail; + } + dumplo = di->mediaoffset + di->mediasize - dumpsize; + dumplo -= sizeof(kdh) * 2; + progress = dumpsize; + + /* Initialize mdhdr */ + bzero(&mdhdr, sizeof(mdhdr)); + strcpy(mdhdr.magic, MINIDUMP_MAGIC); + mdhdr.version = MINIDUMP_VERSION; + mdhdr.msgbufsize = msgbufp->msg_size; + mdhdr.bitmapsize = vm_page_dump_size; + mdhdr.ptesize = ptesize; + mdhdr.kernbase = KERNBASE; + + mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, + dumpsize, di->blocksize); + + kprintf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); + kprintf("Dumping %llu MB:", (long long)dumpsize >> 20); + + /* Dump leader */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + dumplo += sizeof(kdh); + + /* Dump my header */ + bzero(&fakept, sizeof(fakept)); + bcopy(&mdhdr, &fakept, sizeof(mdhdr)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); + if (error) + goto fail; + + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); + if (error) + goto fail; + + /* Dump kernel page table pages */ + for (va = KERNBASE; va < kern_end; va += NBPDR) { + /* We always write a page, even if it is zero */ + pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */ + j = va >> PDRSHIFT; + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is a single 2M block. Generate a fake PTP */ + pa = pd[j] & PG_FRAME & ~PDRMASK; + for (k = 0; k < NPTEPG; k++) { + fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; + } + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + if ((pd[j] & PG_V) == PG_V) { + pa = pd[j] & PG_FRAME; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + } else { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + } + } + + /* Dump memory chunks */ + /* XXX cluster it up and use blk_dump() */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfl(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } + + error = blk_flush(di); + if (error) + goto fail; + + /* Dump trailer */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + dumplo += sizeof(kdh); + + /* Signal completion, signoff and exit stage left. */ + dev_ddump(di->priv, NULL, 0, 0, 0); + kprintf("\nDump complete\n"); + return; + + fail: + if (error < 0) + error = -error; + + if (error == ECANCELED) + kprintf("\nDump aborted\n"); + else if (error == ENOSPC) + kprintf("\nDump failed. Partition too small.\n"); + else + kprintf("\n** DUMP FAILED (ERROR %d) **\n", error); +} + +void +dump_add_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 5; /* 2^5 = 32 */ + bit = pa & 31; + atomic_set_int(&vm_page_dump[idx], 1ul << bit); +} + +void +dump_drop_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 5; /* 2^5 = 32 */ + bit = pa & 31; + atomic_clear_int(&vm_page_dump[idx], 1ul << bit); +} diff --git a/sys/platform/pc32/include/minidump.h b/sys/platform/pc32/include/minidump.h new file mode 100644 index 0000000000..55508a370e --- /dev/null +++ b/sys/platform/pc32/include/minidump.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_MINIDUMP_H_ +#define _MACHINE_MINIDUMP_H_ 1 + +#define MINIDUMP_MAGIC "minidump FreeBSD/i386" +#define MINIDUMP_VERSION 1 + +struct minidumphdr { + char magic[24]; + uint32_t version; + uint32_t msgbufsize; + uint32_t bitmapsize; + uint32_t ptesize; + uint32_t kernbase; + uint32_t paemode; +}; + +#endif /* _MACHINE_MINIDUMP_H_ */ diff --git a/sys/platform/pc64/conf/files b/sys/platform/pc64/conf/files index 77f7057c88..6101a53310 100644 --- a/sys/platform/pc64/conf/files +++ b/sys/platform/pc64/conf/files @@ -122,6 +122,8 @@ platform/pc64/x86_64/swtch.s standard platform/pc64/x86_64/npx.c standard platform/pc64/x86_64/db_interface.c standard platform/pc64/x86_64/db_trace.c standard +platform/pc64/x86_64/dump_machdep.c standard +platform/pc64/x86_64/minidump_machdep.c standard platform/pc64/x86_64/vm_machdep.c standard platform/pc64/x86_64/machdep.c standard platform/pc64/x86_64/userldt.c standard diff --git a/sys/platform/pc64/include/minidump.h b/sys/platform/pc64/include/minidump.h new file mode 100644 index 0000000000..1ea92b7732 --- /dev/null +++ b/sys/platform/pc64/include/minidump.h @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_MINIDUMP_H_ +#define _MACHINE_MINIDUMP_H_ 1 + +#define MINIDUMP_MAGIC "minidump FreeBSD/amd64" +#define MINIDUMP_VERSION 1 + +struct minidumphdr { + char magic[24]; + uint32_t version; + uint32_t msgbufsize; + uint32_t bitmapsize; + uint32_t ptesize; + uint64_t kernbase; + uint64_t dmapbase; + uint64_t dmapend; +}; + +#endif /* _MACHINE_MINIDUMP_H_ */ diff --git a/sys/platform/pc64/x86_64/dump_machdep.c b/sys/platform/pc64/x86_64/dump_machdep.c new file mode 100644 index 0000000000..61003d3972 --- /dev/null +++ b/sys/platform/pc64/x86_64/dump_machdep.c @@ -0,0 +1,365 @@ +/*- + * Copyright (c) 2002 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +int do_minidump = 1; +TUNABLE_INT("debug.minidump", &do_minidump); +SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0, + "Enable mini crash dumps"); + +/* + * Don't touch the first SIZEOF_METADATA bytes on the dump device. This + * is to protect us from metadata and to protect metadata from us. + */ +#define SIZEOF_METADATA (64*1024) + +#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) +#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) + +struct md_pa { + vm_paddr_t md_start; + vm_paddr_t md_size; +}; + +typedef int callback_t(struct md_pa *, int, void *); + +static struct kerneldumpheader kdh; +static off_t dumplo, fileofs; + +/* Handle buffered writes. */ +static char buffer[DEV_BSIZE]; +static size_t fragsz; + +/* 20 phys_avail entry pairs correspond to 10 md_pa's */ +static struct md_pa dump_map[10]; + +static void +md_pa_init(void) +{ + int n, idx; + + bzero(dump_map, sizeof(dump_map)); + for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) { + idx = n * 2; + if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) + break; + dump_map[n].md_start = dump_avail[idx]; + dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx]; + } +} + +static struct md_pa * +md_pa_first(void) +{ + + return (&dump_map[0]); +} + +static struct md_pa * +md_pa_next(struct md_pa *mdp) +{ + + mdp++; + if (mdp->md_size == 0) + mdp = NULL; + return (mdp); +} + +static int +buf_write(struct dumperinfo *di, char *ptr, size_t sz) +{ + size_t len; + int error; + + while (sz) { + len = DEV_BSIZE - fragsz; + if (len > sz) + len = sz; + bcopy(ptr, buffer + fragsz, len); + fragsz += len; + ptr += len; + sz -= len; + if (fragsz == DEV_BSIZE) { + error = dev_ddump(di->priv, buffer, 0, dumplo, + DEV_BSIZE); + if (error) + return error; + dumplo += DEV_BSIZE; + fragsz = 0; + } + } + + return (0); +} + +static int +buf_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dev_ddump(di->priv, buffer, 0, dumplo, DEV_BSIZE); + dumplo += DEV_BSIZE; + fragsz = 0; + return (error); +} + +#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8) + +static int +cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg) +{ + struct dumperinfo *di = (struct dumperinfo*)arg; + vm_paddr_t a, pa; + void *va; + uint64_t pgs; + size_t counter, sz, chunk; + int i, c, error, twiddle; + + error = 0; /* catch case in which chunk size is 0 */ + counter = 0; /* Update twiddle every 16MB */ + twiddle = 0; + va = 0; + pgs = mdp->md_size / PAGE_SIZE; + pa = mdp->md_start; + + kprintf(" chunk %d: %ldMB (%ld pages)", seqnr, PG2MB(pgs), pgs); + + while (pgs) { + chunk = pgs; + if (chunk > MAXDUMPPGS) + chunk = MAXDUMPPGS; + sz = chunk << PAGE_SHIFT; + counter += sz; + if (counter >> 24) { + kprintf(" %ld", PG2MB(pgs)); + counter &= (1<<24) - 1; + } + for (i = 0; i < chunk; i++) { + a = pa + i * PAGE_SIZE; + va = pmap_kenter_temporary(trunc_page(a), i); + } + error = dev_ddump(di->priv, va, 0, dumplo, sz); + if (error) + break; + dumplo += sz; + pgs -= chunk; + pa += sz; + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + kprintf(" (CTRL-C to abort) "); + } + kprintf(" ... %s\n", (error) ? "fail" : "ok"); + return (error); +} + +static int +cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg) +{ + struct dumperinfo *di = (struct dumperinfo*)arg; + Elf_Phdr phdr; + uint64_t size; + int error; + + size = mdp->md_size; + bzero(&phdr, sizeof(phdr)); + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R; /* XXX */ + phdr.p_offset = fileofs; + phdr.p_vaddr = mdp->md_start; + phdr.p_paddr = mdp->md_start; + phdr.p_filesz = size; + phdr.p_memsz = size; + phdr.p_align = PAGE_SIZE; + + error = buf_write(di, (char*)&phdr, sizeof(phdr)); + fileofs += phdr.p_filesz; + return (error); +} + +static int +cb_size(struct md_pa *mdp, int seqnr, void *arg) +{ + uint64_t *sz = (uint64_t*)arg; + + *sz += (uint64_t)mdp->md_size; + return (0); +} + +static int +foreach_chunk(callback_t cb, void *arg) +{ + struct md_pa *mdp; + int error, seqnr; + + seqnr = 0; + mdp = md_pa_first(); + while (mdp != NULL) { + error = (*cb)(mdp, seqnr++, arg); + if (error) + return (-error); + mdp = md_pa_next(mdp); + } + return (seqnr); +} + +void +dumpsys(struct dumperinfo *di) +{ + Elf_Ehdr ehdr; + uint64_t dumpsize; + off_t hdrgap; + size_t hdrsz; + int error; + + savectx(&dumppcb); + dumpthread = curthread; + + if (do_minidump) { + minidumpsys(di); + return; + } + bzero(&ehdr, sizeof(ehdr)); + ehdr.e_ident[EI_MAG0] = ELFMAG0; + ehdr.e_ident[EI_MAG1] = ELFMAG1; + ehdr.e_ident[EI_MAG2] = ELFMAG2; + ehdr.e_ident[EI_MAG3] = ELFMAG3; + ehdr.e_ident[EI_CLASS] = ELF_CLASS; +#if BYTE_ORDER == LITTLE_ENDIAN + ehdr.e_ident[EI_DATA] = ELFDATA2LSB; +#else + ehdr.e_ident[EI_DATA] = ELFDATA2MSB; +#endif + ehdr.e_ident[EI_VERSION] = EV_CURRENT; + ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ + ehdr.e_type = ET_CORE; + ehdr.e_machine = EM_X86_64; + ehdr.e_phoff = sizeof(ehdr); + ehdr.e_flags = 0; + ehdr.e_ehsize = sizeof(ehdr); + ehdr.e_phentsize = sizeof(Elf_Phdr); + ehdr.e_shentsize = sizeof(Elf_Shdr); + + md_pa_init(); + + /* Calculate dump size. */ + dumpsize = 0L; + ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize); + hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; + fileofs = MD_ALIGN(hdrsz); + dumpsize += fileofs; + hdrgap = fileofs - DEV_ALIGN(hdrsz); + + /* Determine dump offset on device. */ + if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { + error = ENOSPC; + goto fail; + } + dumplo = di->mediaoffset + di->mediasize - dumpsize; + dumplo -= sizeof(kdh) * 2; + + mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, + dumpsize, di->blocksize); + + kprintf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20, + ehdr.e_phnum); + + /* Dump leader */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + dumplo += sizeof(kdh); + + /* Dump ELF header */ + error = buf_write(di, (char*)&ehdr, sizeof(ehdr)); + if (error) + goto fail; + + /* Dump program headers */ + error = foreach_chunk(cb_dumphdr, di); + if (error < 0) + goto fail; + buf_flush(di); + + /* + * All headers are written using blocked I/O, so we know the + * current offset is (still) block aligned. Skip the alignement + * in the file to have the segment contents aligned at page + * boundary. We cannot use MD_ALIGN on dumplo, because we don't + * care and may very well be unaligned within the dump device. + */ + dumplo += hdrgap; + + /* Dump memory chunks (updates dumplo) */ + error = foreach_chunk(cb_dumpdata, di); + if (error < 0) + goto fail; + + /* Dump trailer */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + + /* Signal completion, signoff and exit stage left. */ + dev_ddump(di->priv, NULL, 0, 0, 0); + kprintf("\nDump complete\n"); + return; + + fail: + if (error < 0) + error = -error; + + if (error == ECANCELED) + kprintf("\nDump aborted\n"); + else if (error == ENOSPC) + kprintf("\nDump failed. Partition too small.\n"); + else + kprintf("\n** DUMP FAILED (ERROR %d) **\n", error); +} diff --git a/sys/platform/pc64/x86_64/minidump_machdep.c b/sys/platform/pc64/x86_64/minidump_machdep.c new file mode 100644 index 0000000000..4727d7cfca --- /dev/null +++ b/sys/platform/pc64/x86_64/minidump_machdep.c @@ -0,0 +1,412 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +/* + * Don't touch the first SIZEOF_METADATA bytes on the dump device. This + * is to protect us from metadata and to protect metadata from us. + */ +#define SIZEOF_METADATA (64*1024) + +#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) +#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) + +extern uint64_t KPDPphys; + +uint64_t *vm_page_dump; +int vm_page_dump_size; + +static struct kerneldumpheader kdh; +static off_t dumplo; + +/* Handle chunked writes. */ +static size_t fragsz; +static void *dump_va; +static size_t counter, progress; + +CTASSERT(sizeof(*vm_page_dump) == 8); + +static int +is_dumpable(vm_paddr_t pa) +{ + int i; + + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (1); + } + return (0); +} + +#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) + +static int +blk_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dev_ddump(di->priv, dump_va, 0, dumplo, fragsz); + dumplo += fragsz; + fragsz = 0; + return (error); +} + +static int +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) +{ + size_t len; + int error, i, c; + + error = 0; + if ((sz % PAGE_SIZE) != 0) { + kprintf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + kprintf("cant have both va and pa!\n"); + return (EINVAL); + } + if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { + kprintf("address not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL) { + /* If we're doing a virtual dump, flush any pre-existing pa pages */ + error = blk_flush(di); + if (error) + return (error); + } + while (sz) { + len = (MAXDUMPPGS * PAGE_SIZE) - fragsz; + if (len > sz) + len = sz; + counter += len; + progress -= len; + if (counter >> 24) { + kprintf(" %ld", PG2MB(progress >> PAGE_SHIFT)); + counter &= (1<<24) - 1; + } + if (ptr) { + error = dev_ddump(di->priv, ptr, 0, dumplo, len); + if (error) + return (error); + dumplo += len; + ptr += len; + sz -= len; + } else { + for (i = 0; i < len; i += PAGE_SIZE) + dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); + fragsz += len; + pa += len; + sz -= len; + if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) { + error = blk_flush(di); + if (error) + return (error); + } + } + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + kprintf(" (CTRL-C to abort) "); + } + + return (0); +} + +/* A fake page table page, to avoid having to handle both 4K and 2M pages */ +static pt_entry_t fakept[NPTEPG]; + +void +minidumpsys(struct dumperinfo *di) +{ + uint64_t dumpsize; + uint32_t ptesize; + vm_offset_t va; + vm_offset_t kern_end; + int error; + uint64_t bits; + uint64_t *pdp, *pd, *pt, pa; + int i, j, k, bit; + struct minidumphdr mdhdr; + struct mdglobaldata *md; + + counter = 0; + /* Walk page table pages, set bits in vm_page_dump */ + ptesize = 0; + + md = (struct mdglobaldata *)globaldata_find(0); + + kern_end = kernel_vm_end; + if (kern_end < (vm_offset_t)&(md[ncpus])) + kern_end = (vm_offset_t)&(md[ncpus]); + + pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); + for (va = KERNBASE; va < kern_end; va += NBPDR) { + i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); + /* + * We always write a page, even if it is zero. Each + * page written corresponds to 2MB of space + */ + ptesize += PAGE_SIZE; + if ((pdp[i] & PG_V) == 0) + continue; + pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); + j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is an entire 2M page. */ + pa = pd[j] & PG_PS_FRAME; + for (k = 0; k < NPTEPG; k++) { + if (is_dumpable(pa)) + dump_add_page(pa); + pa += PAGE_SIZE; + } + continue; + } + if ((pd[j] & PG_V) == PG_V) { + /* set bit for each valid page in this 2MB block */ + pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); + for (k = 0; k < NPTEPG; k++) { + if ((pt[k] & PG_V) == PG_V) { + pa = pt[k] & PG_FRAME; + if (is_dumpable(pa)) + dump_add_page(pa); + } + } + } else { + /* nothing, we're going to dump a null page */ + } + } + + /* Calculate dump size. */ + dumpsize = ptesize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfq(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) { + dumpsize += PAGE_SIZE; + } else { + dump_drop_page(pa); + } + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; + + /* Determine dump offset on device. */ + if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { + error = ENOSPC; + goto fail; + } + dumplo = di->mediaoffset + di->mediasize - dumpsize; + dumplo -= sizeof(kdh) * 2; + progress = dumpsize; + + /* Initialize mdhdr */ + bzero(&mdhdr, sizeof(mdhdr)); + strcpy(mdhdr.magic, MINIDUMP_MAGIC); + mdhdr.version = MINIDUMP_VERSION; + mdhdr.msgbufsize = msgbufp->msg_size; + mdhdr.bitmapsize = vm_page_dump_size; + mdhdr.ptesize = ptesize; + mdhdr.kernbase = KERNBASE; + mdhdr.dmapbase = DMAP_MIN_ADDRESS; + mdhdr.dmapend = DMAP_MAX_ADDRESS; + + mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, + dumpsize, di->blocksize); + + kprintf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); + kprintf("Dumping %llu MB:", (long long)dumpsize >> 20); + + /* Dump leader */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + dumplo += sizeof(kdh); + + /* Dump my header */ + bzero(&fakept, sizeof(fakept)); + bcopy(&mdhdr, &fakept, sizeof(mdhdr)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); + if (error) + goto fail; + + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); + if (error) + goto fail; + + /* Dump kernel page table pages */ + pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); + for (va = KERNBASE; va < kern_end; va += NBPDR) { + i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); + /* We always write a page, even if it is zero */ + if ((pdp[i] & PG_V) == 0) { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); + j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is a single 2M block. Generate a fake PTP */ + pa = pd[j] & PG_PS_FRAME; + for (k = 0; k < NPTEPG; k++) { + fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; + } + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + if ((pd[j] & PG_V) == PG_V) { + pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); + error = blk_write(di, (char *)pt, 0, PAGE_SIZE); + if (error) + goto fail; + } else { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + } + } + + /* Dump memory chunks */ + /* XXX cluster it up and use blk_dump() */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfq(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } + + error = blk_flush(di); + if (error) + goto fail; + + /* Dump trailer */ + error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); + if (error) + goto fail; + dumplo += sizeof(kdh); + + /* Signal completion, signoff and exit stage left. */ + dev_ddump(di->priv, NULL, 0, 0, 0); + kprintf("\nDump complete\n"); + return; + + fail: + if (error < 0) + error = -error; + + if (error == ECANCELED) + kprintf("\nDump aborted\n"); + else if (error == ENOSPC) + kprintf("\nDump failed. Partition too small.\n"); + else + kprintf("\n** DUMP FAILED (ERROR %d) **\n", error); +} + +void +dump_add_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_set_long(&vm_page_dump[idx], 1ul << bit); +} + +void +dump_drop_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_clear_long(&vm_page_dump[idx], 1ul << bit); +} -- 2.41.0