/*- * Copyright (c) 2006 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/amd64/minidump_machdep.c,v 1.10 2009/05/29 21:27:12 jamie Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This * is to protect us from metadata and to protect metadata from us. */ #define SIZEOF_METADATA (64*1024) #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) #define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) extern uint64_t KPDPphys; uint64_t *vm_page_dump; int vm_page_dump_size; static struct kerneldumpheader kdh; static off_t dumplo; /* Handle chunked writes. */ static size_t fragsz; static void *dump_va; static size_t counter, progress; CTASSERT(sizeof(*vm_page_dump) == 8); static int is_dumpable(vm_paddr_t pa) { int i; for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) return (1); } return (0); } #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) static int blk_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); error = dev_ddump(di->priv, dump_va, 0, dumplo, fragsz); dumplo += fragsz; fragsz = 0; return (error); } static int blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) { size_t len; int error, i, c; int max_iosize; error = 0; if ((sz & PAGE_MASK)) { kprintf("size not page aligned\n"); return (EINVAL); } if (ptr != NULL && pa != 0) { kprintf("can't have both va and pa!\n"); return (EINVAL); } if (pa != 0 && (((uintptr_t)pa) & PAGE_MASK) != 0) { kprintf("address not page aligned\n"); return (EINVAL); } if (ptr != NULL) { /* * If we're doing a virtual dump, flush any * pre-existing pa pages */ error = blk_flush(di); if (error) return (error); } max_iosize = min(MAXPHYS, di->maxiosize); while (sz) { len = max_iosize - fragsz; if (len > sz) len = sz; counter += len; progress -= len; if (counter >> 24) { kprintf(" %ld", PG2MB(progress >> PAGE_SHIFT)); counter &= (1<<24) - 1; } if (ptr) { /*kprintf("s");*/ error = dev_ddump(di->priv, ptr, 0, dumplo, len); /* kprintf("t");*/ if (error) return (error); dumplo += len; ptr += len; sz -= len; } else { for (i = 0; i < len; i += PAGE_SIZE) { dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); } smp_invltlb(); fragsz += len; pa += len; sz -= len; if (fragsz == max_iosize) { error = blk_flush(di); if (error) return (error); } } } /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) kprintf(" (CTRL-C to abort) "); return (0); } /* A fake page table page, to avoid having to handle both 4K and 2M pages */ static pt_entry_t fakept[NPTEPG]; void minidumpsys(struct dumperinfo *di) { uint64_t dumpsize; uint32_t ptesize; vm_offset_t va; vm_offset_t kern_end; int error; uint64_t bits; uint64_t *pdp, *pd, *pt, pa; int i, j, k, bit; struct minidumphdr mdhdr; struct mdglobaldata *md; counter = 0; /* * Walk page table pages, set bits in vm_page_dump. * * NOTE: kernel_vm_end can actually be below KERNBASE. * Just use KvaEnd. Also note that loops which go * all the way to the end of the address space might * overflow the loop variable. */ ptesize = 0; md = (struct mdglobaldata *)globaldata_find(0); kern_end = KvaEnd; if (kern_end < (vm_offset_t)&(md[ncpus])) kern_end = (vm_offset_t)&(md[ncpus]); pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); for (va = VM_MIN_KERNEL_ADDRESS; va < kern_end; va += NBPDR) { /* * The loop probably overflows a 64-bit int due to NBPDR. */ if (va < VM_MIN_KERNEL_ADDRESS) break; /* * We always write a page, even if it is zero. Each * page written corresponds to 2MB of space */ i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); ptesize += PAGE_SIZE; if ((pdp[i] & PG_V) == 0) continue; pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { /* This is an entire 2M page. */ pa = pd[j] & PG_PS_FRAME; for (k = 0; k < NPTEPG; k++) { if (is_dumpable(pa)) dump_add_page(pa); pa += PAGE_SIZE; } continue; } if ((pd[j] & PG_V) == PG_V) { /* set bit for each valid page in this 2MB block */ pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); for (k = 0; k < NPTEPG; k++) { if ((pt[k] & PG_V) == PG_V) { pa = pt[k] & PG_FRAME; if (is_dumpable(pa)) dump_add_page(pa); } } } else { /* nothing, we're going to dump a null page */ } } /* Calculate dump size. */ dumpsize = ptesize; dumpsize += round_page(msgbufp->msg_size); dumpsize += round_page(vm_page_dump_size); for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = bsfq(bits); pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; /* Clear out undumpable pages now if needed */ if (is_dumpable(pa)) { dumpsize += PAGE_SIZE; } else { dump_drop_page(pa); } bits &= ~(1ul << bit); } } dumpsize += PAGE_SIZE; /* Determine dump offset on device. */ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { error = ENOSPC; goto fail; } dumplo = di->mediaoffset + di->mediasize - dumpsize; dumplo -= sizeof(kdh) * 2; progress = dumpsize; /* Initialize mdhdr */ bzero(&mdhdr, sizeof(mdhdr)); strcpy(mdhdr.magic, MINIDUMP_MAGIC); mdhdr.version = MINIDUMP_VERSION; mdhdr.msgbufsize = msgbufp->msg_size; mdhdr.bitmapsize = vm_page_dump_size; mdhdr.ptesize = ptesize; mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; mdhdr.dmapbase = DMAP_MIN_ADDRESS; mdhdr.dmapend = DMAP_MAX_ADDRESS; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); kprintf("Physical memory: %jd MB\n", (intmax_t)ptoa(physmem) / 1048576); kprintf("Dumping %jd MB:", (intmax_t)dumpsize >> 20); /* Dump leader */ error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Dump my header */ bzero(&fakept, sizeof(fakept)); bcopy(&mdhdr, &fakept, sizeof(mdhdr)); error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); if (error) goto fail; /* Dump msgbuf up front */ error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); if (error) goto fail; /* Dump bitmap */ error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); if (error) goto fail; /* Dump kernel page table pages */ pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); for (va = VM_MIN_KERNEL_ADDRESS; va < kern_end; va += NBPDR) { /* * The loop probably overflows a 64-bit int due to NBPDR. */ if (va < VM_MIN_KERNEL_ADDRESS) break; /* * We always write a page, even if it is zero */ i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); if ((pdp[i] & PG_V) == 0) { bzero(fakept, sizeof(fakept)); error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakept in the same block */ error = blk_flush(di); if (error) goto fail; continue; } pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { /* This is a single 2M block. Generate a fake PTP */ pa = pd[j] & PG_PS_FRAME; for (k = 0; k < NPTEPG; k++) { fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; } error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakept in the same block */ error = blk_flush(di); if (error) goto fail; continue; } if ((pd[j] & PG_V) == PG_V) { pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); error = blk_write(di, (char *)pt, 0, PAGE_SIZE); if (error) goto fail; } else { bzero(fakept, sizeof(fakept)); error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakept in the same block */ error = blk_flush(di); if (error) goto fail; } } /* Dump memory chunks */ /* XXX cluster it up and use blk_dump() */ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = bsfq(bits); pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; error = blk_write(di, 0, pa, PAGE_SIZE); if (error) goto fail; bits &= ~(1ul << bit); } } error = blk_flush(di); if (error) goto fail; /* Dump trailer */ error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Signal completion, signoff and exit stage left. */ dev_ddump(di->priv, NULL, 0, 0, 0); kprintf("\nDump complete\n"); return; fail: if (error < 0) error = -error; if (error == ECANCELED) kprintf("\nDump aborted\n"); else if (error == ENOSPC) kprintf("\nDump failed. Partition too small.\n"); else kprintf("\n** DUMP FAILED (ERROR %d) **\n", error); } void dump_add_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 6; /* 2^6 = 64 */ bit = pa & 63; atomic_set_long(&vm_page_dump[idx], 1ul << bit); } void dump_drop_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 6; /* 2^6 = 64 */ bit = pa & 63; atomic_clear_long(&vm_page_dump[idx], 1ul << bit); }