2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
3 * Copyright (c) 1991 Regents of the University of California.
5 * Copyright (c) 1994 John S. Dyson
7 * Copyright (c) 1994 David Greenman
9 * Copyright (c) 2004-2006 Matthew Dillon
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in
20 * the documentation and/or other materials provided with the
22 * 3. Neither the name of The DragonFly Project nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific, prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
35 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
36 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
40 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
41 * $DragonFly: src/sys/platform/pc64/amd64/pmap.c,v 1.1 2007/09/23 04:29:31 yanyh Exp $
42 * $DragonFly: src/sys/platform/pc64/amd64/pmap.c,v 1.1 2007/09/23 04:29:31 yanyh Exp $
45 * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting
46 * the PTE in the page table, because a cpu synchronization might be required.
47 * The actual invalidation is delayed until the following call or flush. In
48 * the VKERNEL build this function is called prior to adjusting the PTE and
49 * invalidates the table synchronously (not delayed), and is not SMP safe
53 #include <sys/types.h>
54 #include <sys/systm.h>
55 #include <sys/kernel.h>
59 #include <sys/thread.h>
61 #include <sys/vmspace.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_kern.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_zone.h>
69 #include <vm/vm_pageout.h>
71 #include <machine/md_var.h>
72 #include <machine/pcb.h>
73 #include <machine/pmap_inval.h>
74 #include <machine/globaldata.h>
76 struct pmap kernel_pmap;
89 * Bootstrap the kernel_pmap so it can be used with pmap_enter().
91 * NOTE! pm_pdir for the kernel pmap is offset so VA's translate
92 * directly into PTD indexes (PTA is also offset for the same reason).
93 * This is necessary because, for now, KVA is not mapped at address 0.
95 * Page table pages are not managed like they are in normal pmaps, so
96 * no pteobj is needed.
99 pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr)
104 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we
105 * just dummy it up so it works well enough for fork().
107 * In DragonFly, process pmaps may only be used to manipulate user address
108 * space, never kernel address space.
111 pmap_pinit0(struct pmap *pmap)
115 /************************************************************************
116 * Procedures to manage whole physical maps *
117 ************************************************************************
119 * Initialize a preallocated and zeroed pmap structure,
120 * such as one in a vmspace structure.
123 pmap_pinit(struct pmap *pmap)
128 * Clean up a pmap structure so it can be physically freed
131 pmap_puninit(pmap_t pmap)
137 * Wire in kernel global address entries. To avoid a race condition
138 * between pmap initialization and pmap_growkernel, this procedure
139 * adds the pmap to the master list (which growkernel scans to update),
140 * then copies the template.
142 * In a virtual kernel there are no kernel global address entries.
145 pmap_pinit2(struct pmap *pmap)
150 * Release all resources held by the given physical map.
152 * Should only be called if the map contains no valid mappings.
154 static int pmap_release_callback(struct vm_page *p, void *data);
157 pmap_release(struct pmap *pmap)
162 pmap_release_callback(struct vm_page *p, void *data)
168 * Retire the given physical map from service. Should only be called if
169 * the map contains no valid mappings.
172 pmap_destroy(pmap_t pmap)
177 * Add a reference to the specified pmap.
180 pmap_reference(pmap_t pmap)
184 /************************************************************************
185 * VMSPACE MANAGEMENT *
186 ************************************************************************
188 * The VMSPACE management we do in our virtual kernel must be reflected
189 * in the real kernel. This is accomplished by making vmspace system
190 * calls to the real kernel.
193 cpu_vmspace_alloc(struct vmspace *vm)
198 cpu_vmspace_free(struct vmspace *vm)
202 /************************************************************************
203 * Procedures which operate directly on the kernel PMAP *
204 ************************************************************************/
207 * This maps the requested page table and gives us access to it.
210 get_ptbase(struct pmap *pmap, vm_offset_t va)
216 get_ptbase1(struct pmap *pmap, vm_offset_t va)
222 get_ptbase2(struct pmap *pmap, vm_offset_t va)
228 * When removing a page directory the related VA range in the self-mapping
229 * of the page table must be invalidated.
232 inval_ptbase_pagedir(pmap_t pmap, vm_pindex_t pindex)
237 * Enter a mapping into kernel_pmap. Mappings created in this fashion
238 * are not managed. Mappings must be immediately accessible on all cpus.
240 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the
241 * real pmap and handle related races before storing the new vpte.
244 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
249 * Synchronize a kvm mapping originally made for the private use on
250 * some other cpu so it can be used on all cpus.
252 * XXX add MADV_RESYNC to improve performance.
255 pmap_kenter_sync(vm_offset_t va)
260 * Synchronize a kvm mapping originally made for the private use on
261 * some other cpu so it can be used on our cpu. Turns out to be the
262 * same madvise() call, because we have to sync the real pmaps anyway.
264 * XXX add MADV_RESYNC to improve performance.
267 pmap_kenter_sync_quick(vm_offset_t va)
273 * Make a previously read-only kernel mapping R+W (not implemented by
277 pmap_kmodify_rw(vm_offset_t va)
279 *pmap_kpte(va) |= VPTE_R | VPTE_W;
280 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
284 * Make a kernel mapping non-cacheable (not applicable to virtual kernels)
287 pmap_kmodify_nc(vm_offset_t va)
289 *pmap_kpte(va) |= VPTE_N;
290 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
296 * Map a contiguous range of physical memory to a KVM
299 pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
305 * Enter an unmanaged KVA mapping for the private use of the current
306 * cpu only. pmap_kenter_sync() may be called to make the mapping usable
309 * It is illegal for the mapping to be accessed by other cpus unleess
310 * pmap_kenter_sync*() is called.
313 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
318 * Make a temporary mapping for a physical address. This is only intended
319 * to be used for panic dumps.
322 pmap_kenter_temporary(vm_paddr_t pa, int i)
328 * Remove an unmanaged mapping created with pmap_kenter*().
331 pmap_kremove(vm_offset_t va)
336 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize
337 * only with this cpu.
339 * Unfortunately because we optimize new entries by testing VPTE_V later
340 * on, we actually still have to synchronize with all the cpus. XXX maybe
341 * store a junk value and test against 0 in the other places instead?
344 pmap_kremove_quick(vm_offset_t va)
349 * Map a set of unmanaged VM pages into KVM.
352 pmap_qenter(vm_offset_t va, struct vm_page **m, int count)
357 * Map a set of VM pages to kernel virtual memory. If a mapping changes
358 * clear the supplied mask. The caller handles any SMP interactions.
359 * The mask is used to provide the caller with hints on what SMP interactions
363 pmap_qenter2(vm_offset_t va, struct vm_page **m, int count, cpumask_t *mask)
368 * Undo the effects of pmap_qenter*().
371 pmap_qremove(vm_offset_t va, int count)
375 /************************************************************************
376 * Misc support glue called by machine independant code *
377 ************************************************************************
379 * These routines are called by machine independant code to operate on
380 * certain machine-dependant aspects of processes, threads, and pmaps.
384 * Initialize MD portions of the thread structure.
387 pmap_init_thread(thread_t td)
392 * This routine directly affects the fork perf for a process.
395 pmap_init_proc(struct proc *p)
400 * Destroy the UPAGES for a process that has exited and disassociate
401 * the process from its thread.
404 pmap_dispose_proc(struct proc *p)
409 * We pre-allocate all page table pages for kernel virtual memory so
410 * this routine will only be called if KVM has been exhausted.
413 pmap_growkernel(vm_offset_t addr)
418 * The modification bit is not tracked for any pages in this range. XXX
419 * such pages in this maps should always use pmap_k*() functions and not
422 * XXX User and kernel address spaces are independant for virtual kernels,
423 * this function only applies to the kernel pmap.
426 pmap_track_modified(pmap_t pmap, vm_offset_t va)
431 /************************************************************************
432 * Procedures supporting managed page table pages *
433 ************************************************************************
435 * These procedures are used to track managed page table pages. These pages
436 * use the page table page's vm_page_t to track PTEs in the page. The
437 * page table pages themselves are arranged in a VM object, pmap->pm_pteobj.
439 * This allows the system to throw away page table pages for user processes
440 * at will and reinstantiate them on demand.
444 * This routine works like vm_page_lookup() but also blocks as long as the
445 * page is busy. This routine does not busy the page it returns.
447 * Unless the caller is managing objects whos pages are in a known state,
448 * the call should be made with a critical section held so the page's object
449 * association remains valid on return.
452 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
458 * This routine unholds page table pages, and if the hold count
459 * drops to zero, then it decrements the wire count.
462 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
468 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
474 * After removing a page table entry, this routine is used to
475 * conditionally free the page, and manage the hold/wire counts.
478 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
484 * Attempt to release and free an vm_page in a pmap. Returns 1 on success,
485 * 0 on failure (if the procedure had to sleep).
488 pmap_release_free_page(struct pmap *pmap, vm_page_t p)
494 * This routine is called if the page table page is not mapped in the page
497 * The routine is broken up into two parts for readability.
500 _pmap_allocpte(pmap_t pmap, unsigned ptepindex)
506 * Determine the page table page required to access the VA in the pmap
507 * and allocate it if necessary. Return a held vm_page_t for the page.
509 * Only used with user pmaps.
512 pmap_allocpte(pmap_t pmap, vm_offset_t va)
517 /************************************************************************
518 * Managed pages in pmaps *
519 ************************************************************************
521 * All pages entered into user pmaps and some pages entered into the kernel
522 * pmap are managed, meaning that pmap_protect() and other related management
523 * functions work on these pages.
527 * free the pv_entry back to the free list. This function may be
528 * called from an interrupt.
531 free_pv_entry(pv_entry_t pv)
536 * get a new pv_entry, allocating a block from the system
537 * when needed. This function may be called from an interrupt.
546 * This routine is very drastic, but can save the system
555 * If it is the first entry on the list, it is actually
556 * in the header and we must copy the following entry up
557 * to the header. Otherwise we must search the list for
558 * the entry. In either case we free the now unused entry.
561 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
567 * Create a pv entry for page at pa for (pmap, va). If the page table page
568 * holding the VA is managed, mpte will be non-NULL.
571 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
576 * pmap_remove_pte: do the things to unmap a page in a process
579 pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va)
587 * Remove a single page from a process address space.
589 * This function may not be called from an interrupt if the pmap is
593 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
600 * Remove the given range of addresses from the specified map.
602 * It is assumed that the start and end are properly
603 * rounded to the page size.
605 * This function may not be called from an interrupt if the pmap is
609 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
616 * Removes this physical page from all physical maps in which it resides.
617 * Reflects back modify bits to the pager.
619 * This routine may not be called from an interrupt.
622 pmap_remove_all(vm_page_t m)
629 * Set the physical protection on the specified range of this map
632 * This function may not be called from an interrupt if the map is
633 * not the kernel_pmap.
636 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
641 * Enter a managed page into a pmap. If the page is not wired related pmap
642 * data can be destroyed at any time for later demand-operation.
644 * Insert the vm_page (m) at virtual address (v) in (pmap), with the
645 * specified protection, and wire the mapping if requested.
647 * NOTE: This routine may not lazy-evaluate or lose information. The
648 * page must actually be inserted into the given map NOW.
650 * NOTE: When entering a page at a KVA address, the pmap must be the
654 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
660 * This is a quick version of pmap_enter(). It is used only under the
661 * following conditions:
663 * (1) The pmap is not the kernel_pmap
664 * (2) The page is not to be wired into the map
665 * (3) The page is to mapped read-only in the pmap (initially that is)
666 * (4) The calling procedure is responsible for flushing the TLB
667 * (5) The page is always managed
668 * (6) There is no prior mapping at the VA
672 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
678 * Extract the physical address for the translation at the specified
679 * virtual address in the pmap.
682 pmap_extract(pmap_t pmap, vm_offset_t va)
688 * This routine preloads the ptes for a given object into the specified pmap.
689 * This eliminates the blast of soft faults on process startup and
690 * immediately after an mmap.
692 static int pmap_object_init_pt_callback(vm_page_t p, void *data);
695 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
696 vm_object_t object, vm_pindex_t pindex,
697 vm_size_t size, int limit)
703 pmap_object_init_pt_callback(vm_page_t p, void *data)
709 * pmap_prefault provides a quick way of clustering pagefaults into a
710 * processes address space. It is a "cousin" of pmap_object_init_pt,
711 * except it runs at page fault time instead of mmap time.
715 #define PAGEORDER_SIZE (PFBAK+PFFOR)
717 static int pmap_prefault_pageorder[] = {
718 -PAGE_SIZE, PAGE_SIZE,
719 -2 * PAGE_SIZE, 2 * PAGE_SIZE,
720 -3 * PAGE_SIZE, 3 * PAGE_SIZE,
721 -4 * PAGE_SIZE, 4 * PAGE_SIZE
725 pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
730 * Routine: pmap_change_wiring
731 * Function: Change the wiring attribute for a map/virtual-address
734 * The mapping must already exist in the pmap.
737 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
742 * Copy the range specified by src_addr/len
743 * from the source map to the range dst_addr/len
744 * in the destination map.
746 * This routine is only advisory and need not do anything.
749 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
750 vm_size_t len, vm_offset_t src_addr)
757 * Zero the specified PA by mapping the page into KVM and clearing its
760 * This function may be called from an interrupt and no locking is
764 pmap_zero_page(vm_paddr_t phys)
769 * pmap_page_assertzero:
771 * Assert that a page is empty, panic if it isn't.
774 pmap_page_assertzero(vm_paddr_t phys)
781 * Zero part of a physical page by mapping it into memory and clearing
782 * its contents with bzero.
784 * off and size may not cover an area beyond a single hardware page.
787 pmap_zero_page_area(vm_paddr_t phys, int off, int size)
794 * Copy the physical page from the source PA to the target PA.
795 * This function may be called from an interrupt. No locking
799 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
804 * pmap_copy_page_frag:
806 * Copy the physical page from the source PA to the target PA.
807 * This function may be called from an interrupt. No locking
811 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
816 * Returns true if the pmap's pv is one of the first
817 * 16 pvs linked to from this page. This count may
818 * be changed upwards or downwards in the future; it
819 * is only necessary that true be returned for a small
820 * subset of pmaps for proper page aging.
823 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
829 * Remove all pages from specified address space
830 * this aids process exit speeds. Also, this code
831 * is special cased for current process only, but
832 * can have the more generic (and slightly slower)
833 * mode enabled. This is much faster than pmap_remove
834 * in the case of running down an entire address space.
837 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
842 * pmap_testbit tests bits in active mappings of a VM page.
845 pmap_testbit(vm_page_t m, int bit)
851 * This routine is used to clear bits in ptes. Certain bits require special
852 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit.
854 * This routine is only called with certain VPTE_* bit combinations.
857 pmap_clearbit(vm_page_t m, int bit)
864 * Lower the permission for all mappings to a given page.
867 pmap_page_protect(vm_page_t m, vm_prot_t prot)
872 pmap_phys_address(int ppn)
878 * pmap_ts_referenced:
880 * Return a count of reference bits for a page, clearing those bits.
881 * It is not necessary for every reference bit to be cleared, but it
882 * is necessary that 0 only be returned when there are truly no
883 * reference bits set.
885 * XXX: The exact number of bits to check and clear is a matter that
886 * should be tested and standardized at some point in the future for
887 * optimal aging of shared pages.
890 pmap_ts_referenced(vm_page_t m)
898 * Return whether or not the specified physical page was modified
899 * in any physical maps.
902 pmap_is_modified(vm_page_t m)
908 * Clear the modify bits on the specified physical page.
911 pmap_clear_modify(vm_page_t m)
916 * pmap_clear_reference:
918 * Clear the reference bit on the specified physical page.
921 pmap_clear_reference(vm_page_t m)
927 * Miscellaneous support routines follow
931 i386_protection_init(void)
935 kp = protection_codes;
936 for (prot = 0; prot < 8; prot++) {
937 if (prot & VM_PROT_READ)
939 if (prot & VM_PROT_WRITE)
941 if (prot & VM_PROT_EXECUTE)
948 * Map a set of physical memory pages into the kernel virtual
949 * address space. Return a pointer to where it is mapped. This
950 * routine is intended to be used for mapping device memory,
953 * NOTE: we can't use pgeflag unless we invalidate the pages one at
957 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
959 vm_offset_t va, tmpva, offset;
962 offset = pa & PAGE_MASK;
963 size = roundup(offset + size, PAGE_SIZE);
965 va = kmem_alloc_nofault(&kernel_map, size);
967 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
969 pa = pa & VPTE_FRAME;
970 for (tmpva = va; size > 0;) {
971 pte = KernelPTA + (tmpva >> PAGE_SHIFT);
972 *pte = pa | VPTE_R | VPTE_W | VPTE_V; /* | pgeflag; */
980 return ((void *)(va + offset));
984 pmap_unmapdev(vm_offset_t va, vm_size_t size)
986 vm_offset_t base, offset;
988 base = va & VPTE_FRAME;
989 offset = va & PAGE_MASK;
990 size = roundup(offset + size, PAGE_SIZE);
991 pmap_qremove(va, size >> PAGE_SHIFT);
992 kmem_free(&kernel_map, base, size);
998 * perform the pmap work for mincore
1001 pmap_mincore(pmap_t pmap, vm_offset_t addr)
1007 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
1012 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
1018 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)