2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
3 * Copyright (c) 1991 Regents of the University of California.
5 * Copyright (c) 1994 John S. Dyson
7 * Copyright (c) 1994 David Greenman
9 * Copyright (c) 2004-2006 Matthew Dillon
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in
20 * the documentation and/or other materials provided with the
22 * 3. Neither the name of The DragonFly Project nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific, prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
35 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
36 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
40 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
41 * $DragonFly: src/sys/platform/pc64/amd64/pmap.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $
44 * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting
45 * the PTE in the page table, because a cpu synchronization might be required.
46 * The actual invalidation is delayed until the following call or flush. In
47 * the VKERNEL build this function is called prior to adjusting the PTE and
48 * invalidates the table synchronously (not delayed), and is not SMP safe
52 #include <sys/types.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
58 #include <sys/thread.h>
60 #include <sys/vmspace.h>
63 #include <vm/vm_page.h>
64 #include <vm/vm_extern.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_object.h>
67 #include <vm/vm_zone.h>
68 #include <vm/vm_pageout.h>
70 #include <machine/md_var.h>
71 #include <machine/pcb.h>
72 #include <machine/pmap_inval.h>
73 #include <machine/globaldata.h>
75 struct pmap kernel_pmap;
88 * Bootstrap the kernel_pmap so it can be used with pmap_enter().
90 * NOTE! pm_pdir for the kernel pmap is offset so VA's translate
91 * directly into PTD indexes (PTA is also offset for the same reason).
92 * This is necessary because, for now, KVA is not mapped at address 0.
94 * Page table pages are not managed like they are in normal pmaps, so
95 * no pteobj is needed.
98 pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr)
103 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we
104 * just dummy it up so it works well enough for fork().
106 * In DragonFly, process pmaps may only be used to manipulate user address
107 * space, never kernel address space.
110 pmap_pinit0(struct pmap *pmap)
114 /************************************************************************
115 * Procedures to manage whole physical maps *
116 ************************************************************************
118 * Initialize a preallocated and zeroed pmap structure,
119 * such as one in a vmspace structure.
122 pmap_pinit(struct pmap *pmap)
127 * Clean up a pmap structure so it can be physically freed
130 pmap_puninit(pmap_t pmap)
136 * Wire in kernel global address entries. To avoid a race condition
137 * between pmap initialization and pmap_growkernel, this procedure
138 * adds the pmap to the master list (which growkernel scans to update),
139 * then copies the template.
141 * In a virtual kernel there are no kernel global address entries.
144 pmap_pinit2(struct pmap *pmap)
149 * Release all resources held by the given physical map.
151 * Should only be called if the map contains no valid mappings.
153 static int pmap_release_callback(struct vm_page *p, void *data);
156 pmap_release(struct pmap *pmap)
161 pmap_release_callback(struct vm_page *p, void *data)
167 * Retire the given physical map from service. Should only be called if
168 * the map contains no valid mappings.
171 pmap_destroy(pmap_t pmap)
176 * Add a reference to the specified pmap.
179 pmap_reference(pmap_t pmap)
183 /************************************************************************
184 * VMSPACE MANAGEMENT *
185 ************************************************************************
187 * The VMSPACE management we do in our virtual kernel must be reflected
188 * in the real kernel. This is accomplished by making vmspace system
189 * calls to the real kernel.
192 cpu_vmspace_alloc(struct vmspace *vm)
197 cpu_vmspace_free(struct vmspace *vm)
201 /************************************************************************
202 * Procedures which operate directly on the kernel PMAP *
203 ************************************************************************/
206 * This maps the requested page table and gives us access to it.
209 get_ptbase(struct pmap *pmap, vm_offset_t va)
215 get_ptbase1(struct pmap *pmap, vm_offset_t va)
221 get_ptbase2(struct pmap *pmap, vm_offset_t va)
227 * When removing a page directory the related VA range in the self-mapping
228 * of the page table must be invalidated.
231 inval_ptbase_pagedir(pmap_t pmap, vm_pindex_t pindex)
236 * Enter a mapping into kernel_pmap. Mappings created in this fashion
237 * are not managed. Mappings must be immediately accessible on all cpus.
239 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the
240 * real pmap and handle related races before storing the new vpte.
243 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
248 * Synchronize a kvm mapping originally made for the private use on
249 * some other cpu so it can be used on all cpus.
251 * XXX add MADV_RESYNC to improve performance.
254 pmap_kenter_sync(vm_offset_t va)
259 * Synchronize a kvm mapping originally made for the private use on
260 * some other cpu so it can be used on our cpu. Turns out to be the
261 * same madvise() call, because we have to sync the real pmaps anyway.
263 * XXX add MADV_RESYNC to improve performance.
266 pmap_kenter_sync_quick(vm_offset_t va)
272 * Make a previously read-only kernel mapping R+W (not implemented by
276 pmap_kmodify_rw(vm_offset_t va)
278 *pmap_kpte(va) |= VPTE_R | VPTE_W;
279 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
283 * Make a kernel mapping non-cacheable (not applicable to virtual kernels)
286 pmap_kmodify_nc(vm_offset_t va)
288 *pmap_kpte(va) |= VPTE_N;
289 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
295 * Map a contiguous range of physical memory to a KVM
298 pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
304 * Enter an unmanaged KVA mapping for the private use of the current
305 * cpu only. pmap_kenter_sync() may be called to make the mapping usable
308 * It is illegal for the mapping to be accessed by other cpus unleess
309 * pmap_kenter_sync*() is called.
312 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
317 * Make a temporary mapping for a physical address. This is only intended
318 * to be used for panic dumps.
321 pmap_kenter_temporary(vm_paddr_t pa, int i)
327 * Remove an unmanaged mapping created with pmap_kenter*().
330 pmap_kremove(vm_offset_t va)
335 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize
336 * only with this cpu.
338 * Unfortunately because we optimize new entries by testing VPTE_V later
339 * on, we actually still have to synchronize with all the cpus. XXX maybe
340 * store a junk value and test against 0 in the other places instead?
343 pmap_kremove_quick(vm_offset_t va)
348 * Map a set of unmanaged VM pages into KVM.
351 pmap_qenter(vm_offset_t va, struct vm_page **m, int count)
356 * Map a set of VM pages to kernel virtual memory. If a mapping changes
357 * clear the supplied mask. The caller handles any SMP interactions.
358 * The mask is used to provide the caller with hints on what SMP interactions
362 pmap_qenter2(vm_offset_t va, struct vm_page **m, int count, cpumask_t *mask)
367 * Undo the effects of pmap_qenter*().
370 pmap_qremove(vm_offset_t va, int count)
374 /************************************************************************
375 * Misc support glue called by machine independant code *
376 ************************************************************************
378 * These routines are called by machine independant code to operate on
379 * certain machine-dependant aspects of processes, threads, and pmaps.
383 * Initialize MD portions of the thread structure.
386 pmap_init_thread(thread_t td)
391 * This routine directly affects the fork perf for a process.
394 pmap_init_proc(struct proc *p)
399 * Destroy the UPAGES for a process that has exited and disassociate
400 * the process from its thread.
403 pmap_dispose_proc(struct proc *p)
408 * We pre-allocate all page table pages for kernel virtual memory so
409 * this routine will only be called if KVM has been exhausted.
412 pmap_growkernel(vm_offset_t addr)
417 * The modification bit is not tracked for any pages in this range. XXX
418 * such pages in this maps should always use pmap_k*() functions and not
421 * XXX User and kernel address spaces are independant for virtual kernels,
422 * this function only applies to the kernel pmap.
425 pmap_track_modified(pmap_t pmap, vm_offset_t va)
430 /************************************************************************
431 * Procedures supporting managed page table pages *
432 ************************************************************************
434 * These procedures are used to track managed page table pages. These pages
435 * use the page table page's vm_page_t to track PTEs in the page. The
436 * page table pages themselves are arranged in a VM object, pmap->pm_pteobj.
438 * This allows the system to throw away page table pages for user processes
439 * at will and reinstantiate them on demand.
443 * This routine works like vm_page_lookup() but also blocks as long as the
444 * page is busy. This routine does not busy the page it returns.
446 * Unless the caller is managing objects whos pages are in a known state,
447 * the call should be made with a critical section held so the page's object
448 * association remains valid on return.
451 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
457 * This routine unholds page table pages, and if the hold count
458 * drops to zero, then it decrements the wire count.
461 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
467 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
473 * After removing a page table entry, this routine is used to
474 * conditionally free the page, and manage the hold/wire counts.
477 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
483 * Attempt to release and free an vm_page in a pmap. Returns 1 on success,
484 * 0 on failure (if the procedure had to sleep).
487 pmap_release_free_page(struct pmap *pmap, vm_page_t p)
493 * This routine is called if the page table page is not mapped in the page
496 * The routine is broken up into two parts for readability.
499 _pmap_allocpte(pmap_t pmap, unsigned ptepindex)
505 * Determine the page table page required to access the VA in the pmap
506 * and allocate it if necessary. Return a held vm_page_t for the page.
508 * Only used with user pmaps.
511 pmap_allocpte(pmap_t pmap, vm_offset_t va)
516 /************************************************************************
517 * Managed pages in pmaps *
518 ************************************************************************
520 * All pages entered into user pmaps and some pages entered into the kernel
521 * pmap are managed, meaning that pmap_protect() and other related management
522 * functions work on these pages.
526 * free the pv_entry back to the free list. This function may be
527 * called from an interrupt.
530 free_pv_entry(pv_entry_t pv)
535 * get a new pv_entry, allocating a block from the system
536 * when needed. This function may be called from an interrupt.
545 * This routine is very drastic, but can save the system
554 * If it is the first entry on the list, it is actually
555 * in the header and we must copy the following entry up
556 * to the header. Otherwise we must search the list for
557 * the entry. In either case we free the now unused entry.
560 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
566 * Create a pv entry for page at pa for (pmap, va). If the page table page
567 * holding the VA is managed, mpte will be non-NULL.
570 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
575 * pmap_remove_pte: do the things to unmap a page in a process
578 pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va)
586 * Remove a single page from a process address space.
588 * This function may not be called from an interrupt if the pmap is
592 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
599 * Remove the given range of addresses from the specified map.
601 * It is assumed that the start and end are properly
602 * rounded to the page size.
604 * This function may not be called from an interrupt if the pmap is
608 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
615 * Removes this physical page from all physical maps in which it resides.
616 * Reflects back modify bits to the pager.
618 * This routine may not be called from an interrupt.
621 pmap_remove_all(vm_page_t m)
628 * Set the physical protection on the specified range of this map
631 * This function may not be called from an interrupt if the map is
632 * not the kernel_pmap.
635 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
640 * Enter a managed page into a pmap. If the page is not wired related pmap
641 * data can be destroyed at any time for later demand-operation.
643 * Insert the vm_page (m) at virtual address (v) in (pmap), with the
644 * specified protection, and wire the mapping if requested.
646 * NOTE: This routine may not lazy-evaluate or lose information. The
647 * page must actually be inserted into the given map NOW.
649 * NOTE: When entering a page at a KVA address, the pmap must be the
653 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
659 * This is a quick version of pmap_enter(). It is used only under the
660 * following conditions:
662 * (1) The pmap is not the kernel_pmap
663 * (2) The page is not to be wired into the map
664 * (3) The page is to mapped read-only in the pmap (initially that is)
665 * (4) The calling procedure is responsible for flushing the TLB
666 * (5) The page is always managed
667 * (6) There is no prior mapping at the VA
671 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
677 * Extract the physical address for the translation at the specified
678 * virtual address in the pmap.
681 pmap_extract(pmap_t pmap, vm_offset_t va)
687 * This routine preloads the ptes for a given object into the specified pmap.
688 * This eliminates the blast of soft faults on process startup and
689 * immediately after an mmap.
691 static int pmap_object_init_pt_callback(vm_page_t p, void *data);
694 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
695 vm_object_t object, vm_pindex_t pindex,
696 vm_size_t size, int limit)
702 pmap_object_init_pt_callback(vm_page_t p, void *data)
708 * pmap_prefault provides a quick way of clustering pagefaults into a
709 * processes address space. It is a "cousin" of pmap_object_init_pt,
710 * except it runs at page fault time instead of mmap time.
714 #define PAGEORDER_SIZE (PFBAK+PFFOR)
716 static int pmap_prefault_pageorder[] = {
717 -PAGE_SIZE, PAGE_SIZE,
718 -2 * PAGE_SIZE, 2 * PAGE_SIZE,
719 -3 * PAGE_SIZE, 3 * PAGE_SIZE,
720 -4 * PAGE_SIZE, 4 * PAGE_SIZE
724 pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
729 * Routine: pmap_change_wiring
730 * Function: Change the wiring attribute for a map/virtual-address
733 * The mapping must already exist in the pmap.
736 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
741 * Copy the range specified by src_addr/len
742 * from the source map to the range dst_addr/len
743 * in the destination map.
745 * This routine is only advisory and need not do anything.
748 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
749 vm_size_t len, vm_offset_t src_addr)
756 * Zero the specified PA by mapping the page into KVM and clearing its
759 * This function may be called from an interrupt and no locking is
763 pmap_zero_page(vm_paddr_t phys)
768 * pmap_page_assertzero:
770 * Assert that a page is empty, panic if it isn't.
773 pmap_page_assertzero(vm_paddr_t phys)
780 * Zero part of a physical page by mapping it into memory and clearing
781 * its contents with bzero.
783 * off and size may not cover an area beyond a single hardware page.
786 pmap_zero_page_area(vm_paddr_t phys, int off, int size)
793 * Copy the physical page from the source PA to the target PA.
794 * This function may be called from an interrupt. No locking
798 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
803 * pmap_copy_page_frag:
805 * Copy the physical page from the source PA to the target PA.
806 * This function may be called from an interrupt. No locking
810 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
815 * Returns true if the pmap's pv is one of the first
816 * 16 pvs linked to from this page. This count may
817 * be changed upwards or downwards in the future; it
818 * is only necessary that true be returned for a small
819 * subset of pmaps for proper page aging.
822 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
828 * Remove all pages from specified address space
829 * this aids process exit speeds. Also, this code
830 * is special cased for current process only, but
831 * can have the more generic (and slightly slower)
832 * mode enabled. This is much faster than pmap_remove
833 * in the case of running down an entire address space.
836 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
841 * pmap_testbit tests bits in active mappings of a VM page.
844 pmap_testbit(vm_page_t m, int bit)
850 * This routine is used to clear bits in ptes. Certain bits require special
851 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit.
853 * This routine is only called with certain VPTE_* bit combinations.
856 pmap_clearbit(vm_page_t m, int bit)
863 * Lower the permission for all mappings to a given page.
866 pmap_page_protect(vm_page_t m, vm_prot_t prot)
871 pmap_phys_address(int ppn)
877 * pmap_ts_referenced:
879 * Return a count of reference bits for a page, clearing those bits.
880 * It is not necessary for every reference bit to be cleared, but it
881 * is necessary that 0 only be returned when there are truly no
882 * reference bits set.
884 * XXX: The exact number of bits to check and clear is a matter that
885 * should be tested and standardized at some point in the future for
886 * optimal aging of shared pages.
889 pmap_ts_referenced(vm_page_t m)
897 * Return whether or not the specified physical page was modified
898 * in any physical maps.
901 pmap_is_modified(vm_page_t m)
907 * Clear the modify bits on the specified physical page.
910 pmap_clear_modify(vm_page_t m)
915 * pmap_clear_reference:
917 * Clear the reference bit on the specified physical page.
920 pmap_clear_reference(vm_page_t m)
926 * Miscellaneous support routines follow
930 i386_protection_init(void)
934 kp = protection_codes;
935 for (prot = 0; prot < 8; prot++) {
936 if (prot & VM_PROT_READ)
938 if (prot & VM_PROT_WRITE)
940 if (prot & VM_PROT_EXECUTE)
947 * Map a set of physical memory pages into the kernel virtual
948 * address space. Return a pointer to where it is mapped. This
949 * routine is intended to be used for mapping device memory,
952 * NOTE: we can't use pgeflag unless we invalidate the pages one at
956 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
958 vm_offset_t va, tmpva, offset;
961 offset = pa & PAGE_MASK;
962 size = roundup(offset + size, PAGE_SIZE);
964 va = kmem_alloc_nofault(&kernel_map, size);
966 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
968 pa = pa & VPTE_FRAME;
969 for (tmpva = va; size > 0;) {
970 pte = KernelPTA + (tmpva >> PAGE_SHIFT);
971 *pte = pa | VPTE_R | VPTE_W | VPTE_V; /* | pgeflag; */
979 return ((void *)(va + offset));
983 pmap_unmapdev(vm_offset_t va, vm_size_t size)
985 vm_offset_t base, offset;
987 base = va & VPTE_FRAME;
988 offset = va & PAGE_MASK;
989 size = roundup(offset + size, PAGE_SIZE);
990 pmap_qremove(va, size >> PAGE_SHIFT);
991 kmem_free(&kernel_map, base, size);
997 * perform the pmap work for mincore
1000 pmap_mincore(pmap_t pmap, vm_offset_t addr)
1006 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
1011 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
1017 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)