This is a major revamping of the pageout and low-memory handling code.
[dragonfly.git] / sys / platform / pc32 / i386 / pmap.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
42 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
cfd17028 43 * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.87 2008/08/25 17:01:38 dillon Exp $
984263bc
MD
44 */
45
46/*
47 * Manages physical address maps.
48 *
49 * In addition to hardware address maps, this
50 * module is called upon to provide software-use-only
51 * maps which may or may not be stored in the same
52 * form as hardware maps. These pseudo-maps are
53 * used to store intermediate results from copy
54 * operations to and from address spaces.
55 *
56 * Since the information managed by this module is
57 * also stored by the logical address mapping module,
58 * this module may throw away valid virtual-to-physical
59 * mappings at almost any time. However, invalidations
60 * of virtual-to-physical mappings must be done as
61 * requested.
62 *
63 * In order to cope with hardware architectures which
64 * make virtual-to-physical map invalidates expensive,
65 * this module may delay invalidate or reduced protection
66 * operations until such time as they are actually
67 * necessary. This module is given full information as
68 * to which processors are currently using which maps,
69 * and to when physical maps must be made correct.
70 */
71
72#include "opt_disable_pse.h"
73#include "opt_pmap.h"
74#include "opt_msgbuf.h"
984263bc
MD
75
76#include <sys/param.h>
77#include <sys/systm.h>
78#include <sys/kernel.h>
79#include <sys/proc.h>
80#include <sys/msgbuf.h>
81#include <sys/vmmeter.h>
82#include <sys/mman.h>
83
84#include <vm/vm.h>
85#include <vm/vm_param.h>
86#include <sys/sysctl.h>
87#include <sys/lock.h>
88#include <vm/vm_kern.h>
89#include <vm/vm_page.h>
90#include <vm/vm_map.h>
91#include <vm/vm_object.h>
92#include <vm/vm_extern.h>
93#include <vm/vm_pageout.h>
94#include <vm/vm_pager.h>
95#include <vm/vm_zone.h>
96
97#include <sys/user.h>
e0e69b7d 98#include <sys/thread2.h>
e3161323 99#include <sys/sysref2.h>
984263bc
MD
100
101#include <machine/cputypes.h>
102#include <machine/md_var.h>
103#include <machine/specialreg.h>
984263bc 104#include <machine/smp.h>
a9295349 105#include <machine_base/apic/apicreg.h>
85100692 106#include <machine/globaldata.h>
0f7a3396
MD
107#include <machine/pmap.h>
108#include <machine/pmap_inval.h>
984263bc 109
fbbaeba3
MD
110#include <ddb/ddb.h>
111
984263bc
MD
112#define PMAP_KEEP_PDIRS
113#ifndef PMAP_SHPGPERPROC
114#define PMAP_SHPGPERPROC 200
115#endif
116
117#if defined(DIAGNOSTIC)
118#define PMAP_DIAGNOSTIC
119#endif
120
121#define MINPV 2048
122
123#if !defined(PMAP_DIAGNOSTIC)
124#define PMAP_INLINE __inline
125#else
126#define PMAP_INLINE
127#endif
128
129/*
130 * Get PDEs and PTEs for user/kernel address space
131 */
132#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
133#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
134
135#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
136#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
137#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
138#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
139#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
140
984263bc
MD
141
142/*
143 * Given a map and a machine independent protection code,
144 * convert to a vax protection code.
145 */
639a9b43
MD
146#define pte_prot(m, p) \
147 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)])
984263bc
MD
148static int protection_codes[8];
149
fbbaeba3 150struct pmap kernel_pmap;
54a764e8
MD
151static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list);
152
e880033d 153vm_paddr_t avail_start; /* PA of first available physical page */
6ef943a3 154vm_paddr_t avail_end; /* PA of last available physical page */
e880033d 155vm_offset_t virtual_start; /* VA of first avail page (after kernel bss) */
984263bc 156vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
c439ad8f
MD
157vm_offset_t KvaStart; /* VA start of KVA space */
158vm_offset_t KvaEnd; /* VA end of KVA space (non-inclusive) */
159vm_offset_t KvaSize; /* max size of kernel virtual address space */
984263bc
MD
160static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
161static int pgeflag; /* PG_G or-in */
162static int pseflag; /* PG_PS or-in */
163
164static vm_object_t kptobj;
165
166static int nkpt;
167vm_offset_t kernel_vm_end;
168
169/*
170 * Data for the pv entry allocation mechanism
171 */
172static vm_zone_t pvzone;
173static struct vm_zone pvzone_store;
174static struct vm_object pvzone_obj;
175static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
176static int pmap_pagedaemon_waken = 0;
177static struct pv_entry *pvinit;
178
179/*
180 * All those kernel PT submaps that BSD is so fond of
181 */
e731d345 182pt_entry_t *CMAP1 = 0, *ptmmap;
984263bc 183caddr_t CADDR1 = 0, ptvmmap = 0;
984263bc
MD
184static pt_entry_t *msgbufmap;
185struct msgbuf *msgbufp=0;
186
187/*
188 * Crashdump maps.
189 */
190static pt_entry_t *pt_crashdumpmap;
191static caddr_t crashdumpmap;
192
984263bc 193extern pt_entry_t *SMPpt;
984263bc 194
3ae0cd58
RG
195static PMAP_INLINE void free_pv_entry (pv_entry_t pv);
196static unsigned * get_ptbase (pmap_t pmap);
197static pv_entry_t get_pv_entry (void);
198static void i386_protection_init (void);
5e8d0349 199static __inline void pmap_clearbit (vm_page_t m, int bit);
3ae0cd58
RG
200
201static void pmap_remove_all (vm_page_t m);
17cde63e 202static void pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m);
0f7a3396
MD
203static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq,
204 vm_offset_t sva, pmap_inval_info_t info);
205static void pmap_remove_page (struct pmap *pmap,
206 vm_offset_t va, pmap_inval_info_t info);
3ae0cd58 207static int pmap_remove_entry (struct pmap *pmap, vm_page_t m,
0f7a3396 208 vm_offset_t va, pmap_inval_info_t info);
3ae0cd58
RG
209static boolean_t pmap_testbit (vm_page_t m, int bit);
210static void pmap_insert_entry (pmap_t pmap, vm_offset_t va,
211 vm_page_t mpte, vm_page_t m);
212
213static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va);
214
215static int pmap_release_free_page (pmap_t pmap, vm_page_t p);
216static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex);
217static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va);
218static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex);
0f7a3396 219static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t);
984263bc
MD
220static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
221
222static unsigned pdir4mb;
223
224/*
840de426
MD
225 * Move the kernel virtual free pointer to the next
226 * 4MB. This is used to help improve performance
227 * by using a large (4MB) page for much of the kernel
228 * (.text, .data, .bss)
229 */
230static vm_offset_t
231pmap_kmem_choose(vm_offset_t addr)
232{
233 vm_offset_t newaddr = addr;
234#ifndef DISABLE_PSE
235 if (cpu_feature & CPUID_PSE) {
236 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
237 }
238#endif
239 return newaddr;
240}
241
242/*
e0e69b7d
MD
243 * pmap_pte:
244 *
245 * Extract the page table entry associated with the given map/virtual
246 * pair.
247 *
248 * This function may NOT be called from an interrupt.
984263bc 249 */
984263bc 250PMAP_INLINE unsigned *
840de426 251pmap_pte(pmap_t pmap, vm_offset_t va)
984263bc
MD
252{
253 unsigned *pdeaddr;
254
255 if (pmap) {
256 pdeaddr = (unsigned *) pmap_pde(pmap, va);
257 if (*pdeaddr & PG_PS)
258 return pdeaddr;
259 if (*pdeaddr) {
260 return get_ptbase(pmap) + i386_btop(va);
261 }
262 }
263 return (0);
264}
265
266/*
e0e69b7d
MD
267 * pmap_pte_quick:
268 *
269 * Super fast pmap_pte routine best used when scanning the pv lists.
270 * This eliminates many course-grained invltlb calls. Note that many of
271 * the pv list scans are across different pmaps and it is very wasteful
272 * to do an entire invltlb when checking a single mapping.
273 *
9acd5bbb 274 * Should only be called while in a critical section.
984263bc 275 */
840de426
MD
276static unsigned *
277pmap_pte_quick(pmap_t pmap, vm_offset_t va)
984263bc 278{
840de426
MD
279 struct mdglobaldata *gd = mdcpu;
280 unsigned pde, newpf;
281
282 if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
283 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
284 unsigned index = i386_btop(va);
285 /* are we current address space or kernel? */
fbbaeba3 286 if ((pmap == &kernel_pmap) ||
840de426
MD
287 (frame == (((unsigned) PTDpde) & PG_FRAME))) {
288 return (unsigned *) PTmap + index;
289 }
290 newpf = pde & PG_FRAME;
291 if ( ((* (unsigned *) gd->gd_PMAP1) & PG_FRAME) != newpf) {
292 * (unsigned *) gd->gd_PMAP1 = newpf | PG_RW | PG_V;
293 cpu_invlpg(gd->gd_PADDR1);
294 }
295 return gd->gd_PADDR1 + ((unsigned) index & (NPTEPG - 1));
984263bc 296 }
840de426 297 return (0);
984263bc
MD
298}
299
840de426 300
984263bc
MD
301/*
302 * Bootstrap the system enough to run with virtual memory.
303 *
304 * On the i386 this is called after mapping has already been enabled
305 * and just syncs the pmap module with what has already been done.
306 * [We can't call it easily with mapping off since the kernel is not
307 * mapped with PA == VA, hence we would have to relocate every address
308 * from the linked base (virtual) address "KERNBASE" to the actual
309 * (physical) address starting relative to 0]
310 */
311void
f123d5a1 312pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr)
984263bc
MD
313{
314 vm_offset_t va;
315 pt_entry_t *pte;
85100692 316 struct mdglobaldata *gd;
984263bc 317 int i;
81c04d07 318 int pg;
984263bc 319
c439ad8f
MD
320 KvaStart = (vm_offset_t)VADDR(PTDPTDI, 0);
321 KvaSize = (vm_offset_t)VADDR(APTDPTDI, 0) - KvaStart;
322 KvaEnd = KvaStart + KvaSize;
323
984263bc
MD
324 avail_start = firstaddr;
325
326 /*
e880033d
MD
327 * XXX The calculation of virtual_start is wrong. It's NKPT*PAGE_SIZE
328 * too large. It should instead be correctly calculated in locore.s and
984263bc
MD
329 * not based on 'first' (which is a physical address, not a virtual
330 * address, for the start of unused physical memory). The kernel
331 * page tables are NOT double mapped and thus should not be included
332 * in this calculation.
333 */
e880033d
MD
334 virtual_start = (vm_offset_t) KERNBASE + firstaddr;
335 virtual_start = pmap_kmem_choose(virtual_start);
c439ad8f 336 virtual_end = VADDR(KPTDI+NKPDE-1, NPTEPG-1);
984263bc
MD
337
338 /*
339 * Initialize protection array.
340 */
341 i386_protection_init();
342
343 /*
344 * The kernel's pmap is statically allocated so we don't have to use
345 * pmap_create, which is unlikely to work correctly at this part of
346 * the boot sequence (XXX and which no longer exists).
347 */
fbbaeba3
MD
348 kernel_pmap.pm_pdir = (pd_entry_t *)(KERNBASE + (u_int)IdlePTD);
349 kernel_pmap.pm_count = 1;
350 kernel_pmap.pm_active = (cpumask_t)-1; /* don't allow deactivation */
351 TAILQ_INIT(&kernel_pmap.pm_pvlist);
984263bc
MD
352 nkpt = NKPT;
353
354 /*
355 * Reserve some special page table entries/VA space for temporary
356 * mapping of pages.
357 */
358#define SYSMAP(c, p, v, n) \
359 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
360
e880033d 361 va = virtual_start;
fbbaeba3 362 pte = (pt_entry_t *) pmap_pte(&kernel_pmap, va);
984263bc
MD
363
364 /*
365 * CMAP1/CMAP2 are used for zeroing and copying pages.
366 */
367 SYSMAP(caddr_t, CMAP1, CADDR1, 1)
984263bc
MD
368
369 /*
370 * Crashdump maps.
371 */
372 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
373
374 /*
e731d345
MD
375 * ptvmmap is used for reading arbitrary physical pages via
376 * /dev/mem.
377 */
378 SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
379
380 /*
984263bc
MD
381 * msgbufp is used to map the system message buffer.
382 * XXX msgbufmap is not used.
383 */
384 SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
385 atop(round_page(MSGBUF_SIZE)))
386
e880033d 387 virtual_start = va;
984263bc 388
17a9f566 389 *(int *) CMAP1 = 0;
984263bc
MD
390 for (i = 0; i < NKPT; i++)
391 PTD[i] = 0;
392
a2a5ad0d
MD
393 /*
394 * PG_G is terribly broken on SMP because we IPI invltlb's in some
395 * cases rather then invl1pg. Actually, I don't even know why it
396 * works under UP because self-referential page table mappings
397 */
398#ifdef SMP
399 pgeflag = 0;
400#else
401 if (cpu_feature & CPUID_PGE)
984263bc 402 pgeflag = PG_G;
a2a5ad0d 403#endif
984263bc
MD
404
405/*
406 * Initialize the 4MB page size flag
407 */
408 pseflag = 0;
409/*
410 * The 4MB page version of the initial
411 * kernel page mapping.
412 */
413 pdir4mb = 0;
414
415#if !defined(DISABLE_PSE)
416 if (cpu_feature & CPUID_PSE) {
417 unsigned ptditmp;
418 /*
419 * Note that we have enabled PSE mode
420 */
421 pseflag = PG_PS;
422 ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
423 ptditmp &= ~(NBPDR - 1);
424 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
425 pdir4mb = ptditmp;
426
8a8d5d85
MD
427#ifndef SMP
428 /*
429 * Enable the PSE mode. If we are SMP we can't do this
430 * now because the APs will not be able to use it when
431 * they boot up.
432 */
433 load_cr4(rcr4() | CR4_PSE);
984263bc 434
8a8d5d85
MD
435 /*
436 * We can do the mapping here for the single processor
437 * case. We simply ignore the old page table page from
438 * now on.
439 */
440 /*
441 * For SMP, we still need 4K pages to bootstrap APs,
442 * PSE will be enabled as soon as all APs are up.
443 */
b5b32410 444 PTD[KPTDI] = (pd_entry_t)ptditmp;
fbbaeba3 445 kernel_pmap.pm_pdir[KPTDI] = (pd_entry_t)ptditmp;
0f7a3396 446 cpu_invltlb();
8a8d5d85 447#endif
984263bc
MD
448 }
449#endif
97359a5b 450#ifdef SMP
984263bc
MD
451 if (cpu_apic_address == 0)
452 panic("pmap_bootstrap: no local apic!");
453
454 /* local apic is mapped on last page */
455 SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
456 (cpu_apic_address & PG_FRAME));
17a9f566 457#endif
984263bc 458
81c04d07
MD
459 /*
460 * We need to finish setting up the globaldata page for the BSP.
461 * locore has already populated the page table for the mdglobaldata
462 * portion.
463 */
464 pg = MDGLOBALDATA_BASEALLOC_PAGES;
85100692 465 gd = &CPU_prvspace[0].mdglobaldata;
81c04d07
MD
466 gd->gd_CMAP1 = &SMPpt[pg + 0];
467 gd->gd_CMAP2 = &SMPpt[pg + 1];
468 gd->gd_CMAP3 = &SMPpt[pg + 2];
469 gd->gd_PMAP1 = &SMPpt[pg + 3];
85100692
MD
470 gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1;
471 gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2;
472 gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3;
473 gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1;
984263bc 474
0f7a3396 475 cpu_invltlb();
984263bc
MD
476}
477
478#ifdef SMP
479/*
480 * Set 4mb pdir for mp startup
481 */
482void
483pmap_set_opt(void)
484{
485 if (pseflag && (cpu_feature & CPUID_PSE)) {
486 load_cr4(rcr4() | CR4_PSE);
72740893 487 if (pdir4mb && mycpu->gd_cpuid == 0) { /* only on BSP */
fbbaeba3 488 kernel_pmap.pm_pdir[KPTDI] =
984263bc
MD
489 PTD[KPTDI] = (pd_entry_t)pdir4mb;
490 cpu_invltlb();
491 }
492 }
493}
494#endif
495
496/*
497 * Initialize the pmap module.
498 * Called by vm_init, to initialize any structures that the pmap
499 * system needs to map virtual memory.
500 * pmap_init has been enhanced to support in a fairly consistant
501 * way, discontiguous physical memory.
502 */
503void
e7252eda 504pmap_init(void)
984263bc
MD
505{
506 int i;
507 int initial_pvs;
508
509 /*
510 * object for kernel page table pages
511 */
512 kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
513
514 /*
515 * Allocate memory for random pmap data structures. Includes the
516 * pv_head_table.
517 */
518
519 for(i = 0; i < vm_page_array_size; i++) {
520 vm_page_t m;
521
522 m = &vm_page_array[i];
523 TAILQ_INIT(&m->md.pv_list);
524 m->md.pv_list_count = 0;
525 }
526
527 /*
528 * init the pv free list
529 */
530 initial_pvs = vm_page_array_size;
531 if (initial_pvs < MINPV)
532 initial_pvs = MINPV;
533 pvzone = &pvzone_store;
e4846942 534 pvinit = (struct pv_entry *) kmem_alloc(&kernel_map,
984263bc
MD
535 initial_pvs * sizeof (struct pv_entry));
536 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
c5a45196 537 initial_pvs);
984263bc
MD
538
539 /*
540 * Now it is safe to enable pv_table recording.
541 */
542 pmap_initialized = TRUE;
543}
544
545/*
546 * Initialize the address space (zone) for the pv_entries. Set a
547 * high water mark so that the system can recover from excessive
548 * numbers of pv entries.
549 */
550void
f123d5a1 551pmap_init2(void)
984263bc
MD
552{
553 int shpgperproc = PMAP_SHPGPERPROC;
554
555 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
556 pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
557 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
558 pv_entry_high_water = 9 * (pv_entry_max / 10);
559 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
560}
561
562
563/***************************************************
564 * Low level helper routines.....
565 ***************************************************/
566
567#if defined(PMAP_DIAGNOSTIC)
568
569/*
570 * This code checks for non-writeable/modified pages.
571 * This should be an invalid condition.
572 */
573static int
574pmap_nw_modified(pt_entry_t ptea)
575{
576 int pte;
577
578 pte = (int) ptea;
579
580 if ((pte & (PG_M|PG_RW)) == PG_M)
581 return 1;
582 else
583 return 0;
584}
585#endif
586
587
588/*
589 * this routine defines the region(s) of memory that should
590 * not be tested for the modified bit.
591 */
592static PMAP_INLINE int
593pmap_track_modified(vm_offset_t va)
594{
595 if ((va < clean_sva) || (va >= clean_eva))
596 return 1;
597 else
598 return 0;
599}
600
984263bc 601static unsigned *
e0e69b7d 602get_ptbase(pmap_t pmap)
984263bc
MD
603{
604 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
e0e69b7d 605 struct globaldata *gd = mycpu;
984263bc
MD
606
607 /* are we current address space or kernel? */
fbbaeba3 608 if (pmap == &kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
984263bc
MD
609 return (unsigned *) PTmap;
610 }
e0e69b7d 611
984263bc 612 /* otherwise, we are alternate address space */
8790d7d8
MD
613 KKASSERT(gd->gd_intr_nesting_level == 0 &&
614 (gd->gd_curthread->td_flags & TDF_INTTHREAD) == 0);
e0e69b7d 615
984263bc 616 if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
b5b32410 617 APTDpde = (pd_entry_t)(frame | PG_RW | PG_V);
984263bc
MD
618 /* The page directory is not shared between CPUs */
619 cpu_invltlb();
984263bc
MD
620 }
621 return (unsigned *) APTmap;
622}
623
624/*
e0e69b7d
MD
625 * pmap_extract:
626 *
627 * Extract the physical page address associated with the map/VA pair.
628 *
629 * This function may not be called from an interrupt if the pmap is
630 * not kernel_pmap.
984263bc 631 */
6ef943a3 632vm_paddr_t
840de426 633pmap_extract(pmap_t pmap, vm_offset_t va)
984263bc
MD
634{
635 vm_offset_t rtval;
636 vm_offset_t pdirindex;
840de426 637
984263bc
MD
638 pdirindex = va >> PDRSHIFT;
639 if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
640 unsigned *pte;
641 if ((rtval & PG_PS) != 0) {
642 rtval &= ~(NBPDR - 1);
643 rtval |= va & (NBPDR - 1);
644 return rtval;
645 }
646 pte = get_ptbase(pmap) + i386_btop(va);
647 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
648 return rtval;
649 }
650 return 0;
f6bf3af1
MD
651}
652
984263bc
MD
653/***************************************************
654 * Low level mapping routines.....
655 ***************************************************/
656
657/*
6d1ec6fa
HP
658 * Routine: pmap_kenter
659 * Function:
660 * Add a wired page to the KVA
661 * NOTE! note that in order for the mapping to take effect -- you
662 * should do an invltlb after doing the pmap_kenter().
984263bc 663 */
24712b90 664void
6ef943a3 665pmap_kenter(vm_offset_t va, vm_paddr_t pa)
984263bc 666{
840de426 667 unsigned *pte;
0f7a3396
MD
668 unsigned npte;
669 pmap_inval_info info;
984263bc 670
0f7a3396 671 pmap_inval_init(&info);
984263bc
MD
672 npte = pa | PG_RW | PG_V | pgeflag;
673 pte = (unsigned *)vtopte(va);
17cde63e 674 pmap_inval_add(&info, &kernel_pmap, va);
984263bc 675 *pte = npte;
0f7a3396 676 pmap_inval_flush(&info);
984263bc
MD
677}
678
6d1ec6fa
HP
679/*
680 * Routine: pmap_kenter_quick
681 * Function:
682 * Similar to pmap_kenter(), except we only invalidate the
683 * mapping on the current CPU.
684 */
24712b90
MD
685void
686pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
687{
688 unsigned *pte;
689 unsigned npte;
690
691 npte = pa | PG_RW | PG_V | pgeflag;
692 pte = (unsigned *)vtopte(va);
693 *pte = npte;
694 cpu_invlpg((void *)va);
695}
696
697void
698pmap_kenter_sync(vm_offset_t va)
699{
700 pmap_inval_info info;
701
702 pmap_inval_init(&info);
fbbaeba3 703 pmap_inval_add(&info, &kernel_pmap, va);
24712b90
MD
704 pmap_inval_flush(&info);
705}
706
707void
708pmap_kenter_sync_quick(vm_offset_t va)
709{
710 cpu_invlpg((void *)va);
711}
712
984263bc
MD
713/*
714 * remove a page from the kernel pagetables
715 */
24712b90 716void
840de426 717pmap_kremove(vm_offset_t va)
984263bc 718{
840de426 719 unsigned *pte;
0f7a3396 720 pmap_inval_info info;
984263bc 721
0f7a3396 722 pmap_inval_init(&info);
984263bc 723 pte = (unsigned *)vtopte(va);
17cde63e 724 pmap_inval_add(&info, &kernel_pmap, va);
984263bc 725 *pte = 0;
0f7a3396 726 pmap_inval_flush(&info);
984263bc
MD
727}
728
24712b90
MD
729void
730pmap_kremove_quick(vm_offset_t va)
731{
732 unsigned *pte;
733 pte = (unsigned *)vtopte(va);
734 *pte = 0;
735 cpu_invlpg((void *)va);
736}
737
984263bc 738/*
9ad680a3
MD
739 * XXX these need to be recoded. They are not used in any critical path.
740 */
741void
742pmap_kmodify_rw(vm_offset_t va)
743{
744 *vtopte(va) |= PG_RW;
745 cpu_invlpg((void *)va);
746}
747
748void
749pmap_kmodify_nc(vm_offset_t va)
750{
751 *vtopte(va) |= PG_N;
752 cpu_invlpg((void *)va);
753}
754
755/*
984263bc
MD
756 * Used to map a range of physical addresses into kernel
757 * virtual address space.
758 *
759 * For now, VM is already on, we only need to map the
760 * specified memory.
761 */
762vm_offset_t
6ef943a3 763pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
984263bc
MD
764{
765 while (start < end) {
766 pmap_kenter(virt, start);
767 virt += PAGE_SIZE;
768 start += PAGE_SIZE;
769 }
770 return (virt);
771}
772
773
774/*
775 * Add a list of wired pages to the kva
776 * this routine is only used for temporary
777 * kernel mappings that do not need to have
778 * page modification or references recorded.
779 * Note that old mappings are simply written
780 * over. The page *must* be wired.
781 */
782void
840de426 783pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
984263bc
MD
784{
785 vm_offset_t end_va;
786
787 end_va = va + count * PAGE_SIZE;
788
789 while (va < end_va) {
790 unsigned *pte;
791
792 pte = (unsigned *)vtopte(va);
793 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
984263bc 794 cpu_invlpg((void *)va);
984263bc
MD
795 va += PAGE_SIZE;
796 m++;
797 }
798#ifdef SMP
0f7a3396 799 smp_invltlb(); /* XXX */
984263bc
MD
800#endif
801}
802
8100156a
MD
803void
804pmap_qenter2(vm_offset_t va, vm_page_t *m, int count, cpumask_t *mask)
805{
806 vm_offset_t end_va;
807 cpumask_t cmask = mycpu->gd_cpumask;
808
809 end_va = va + count * PAGE_SIZE;
810
811 while (va < end_va) {
812 unsigned *pte;
813 unsigned pteval;
814
815 /*
816 * Install the new PTE. If the pte changed from the prior
817 * mapping we must reset the cpu mask and invalidate the page.
818 * If the pte is the same but we have not seen it on the
819 * current cpu, invlpg the existing mapping. Otherwise the
820 * entry is optimal and no invalidation is required.
821 */
822 pte = (unsigned *)vtopte(va);
823 pteval = VM_PAGE_TO_PHYS(*m) | PG_A | PG_RW | PG_V | pgeflag;
824 if (*pte != pteval) {
a02705a9 825 *mask = 0;
8100156a
MD
826 *pte = pteval;
827 cpu_invlpg((void *)va);
828 } else if ((*mask & cmask) == 0) {
8100156a
MD
829 cpu_invlpg((void *)va);
830 }
831 va += PAGE_SIZE;
832 m++;
833 }
a02705a9 834 *mask |= cmask;
8100156a
MD
835}
836
984263bc
MD
837/*
838 * this routine jerks page mappings from the
839 * kernel -- it is meant only for temporary mappings.
840 */
841void
840de426 842pmap_qremove(vm_offset_t va, int count)
984263bc
MD
843{
844 vm_offset_t end_va;
845
846 end_va = va + count*PAGE_SIZE;
847
848 while (va < end_va) {
849 unsigned *pte;
850
851 pte = (unsigned *)vtopte(va);
852 *pte = 0;
984263bc 853 cpu_invlpg((void *)va);
984263bc
MD
854 va += PAGE_SIZE;
855 }
856#ifdef SMP
857 smp_invltlb();
858#endif
859}
860
06ecca5a
MD
861/*
862 * This routine works like vm_page_lookup() but also blocks as long as the
863 * page is busy. This routine does not busy the page it returns.
864 *
865 * Unless the caller is managing objects whos pages are in a known state,
654a39f0
MD
866 * the call should be made with a critical section held so the page's object
867 * association remains valid on return.
06ecca5a 868 */
984263bc 869static vm_page_t
840de426 870pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
984263bc
MD
871{
872 vm_page_t m;
06ecca5a 873
17cde63e
MD
874 do {
875 m = vm_page_lookup(object, pindex);
876 } while (m && vm_page_sleep_busy(m, FALSE, "pplookp"));
877
06ecca5a 878 return(m);
984263bc
MD
879}
880
881/*
263e4574 882 * Create a new thread and optionally associate it with a (new) process.
6ef943a3 883 * NOTE! the new thread's cpu may not equal the current cpu.
263e4574 884 */
7d0bac62
MD
885void
886pmap_init_thread(thread_t td)
263e4574 887{
f470d0c8 888 /* enforce pcb placement */
f470d0c8 889 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1;
65d6ce10 890 td->td_savefpu = &td->td_pcb->pcb_save;
7d0bac62 891 td->td_sp = (char *)td->td_pcb - 16;
263e4574
MD
892}
893
894/*
984263bc
MD
895 * This routine directly affects the fork perf for a process.
896 */
897void
13d13d89 898pmap_init_proc(struct proc *p)
984263bc 899{
984263bc
MD
900}
901
902/*
903 * Dispose the UPAGES for a process that has exited.
904 * This routine directly impacts the exit perf of a process.
905 */
c6880072 906void
7e1d4bf4 907pmap_dispose_proc(struct proc *p)
984263bc 908{
f1d1c3fa 909 KASSERT(p->p_lock == 0, ("attempt to dispose referenced proc! %p", p));
984263bc
MD
910}
911
984263bc
MD
912/***************************************************
913 * Page table page management routines.....
914 ***************************************************/
915
916/*
917 * This routine unholds page table pages, and if the hold count
918 * drops to zero, then it decrements the wire count.
919 */
920static int
0f7a3396 921_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
840de426 922{
17cde63e
MD
923 /*
924 * Wait until we can busy the page ourselves. We cannot have
925 * any active flushes if we block.
926 */
927 if (m->flags & PG_BUSY) {
928 pmap_inval_flush(info);
929 while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
930 ;
931 }
eec2b734
MD
932 KASSERT(m->queue == PQ_NONE,
933 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m));
984263bc 934
eec2b734 935 if (m->hold_count == 1) {
984263bc 936 /*
eec2b734 937 * Unmap the page table page
984263bc 938 */
eec2b734 939 vm_page_busy(m);
0f7a3396 940 pmap_inval_add(info, pmap, -1);
984263bc 941 pmap->pm_pdir[m->pindex] = 0;
eec2b734
MD
942
943 KKASSERT(pmap->pm_stats.resident_count > 0);
984263bc 944 --pmap->pm_stats.resident_count;
984263bc
MD
945
946 if (pmap->pm_ptphint == m)
947 pmap->pm_ptphint = NULL;
948
949 /*
eec2b734
MD
950 * This was our last hold, the page had better be unwired
951 * after we decrement wire_count.
952 *
953 * FUTURE NOTE: shared page directory page could result in
954 * multiple wire counts.
984263bc 955 */
eec2b734 956 vm_page_unhold(m);
984263bc 957 --m->wire_count;
eec2b734
MD
958 KKASSERT(m->wire_count == 0);
959 --vmstats.v_wire_count;
17cde63e 960 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
eec2b734
MD
961 vm_page_flash(m);
962 vm_page_free_zero(m);
984263bc 963 return 1;
17cde63e
MD
964 } else {
965 KKASSERT(m->hold_count > 1);
966 vm_page_unhold(m);
967 return 0;
984263bc 968 }
984263bc
MD
969}
970
971static PMAP_INLINE int
0f7a3396 972pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
984263bc 973{
eec2b734
MD
974 KKASSERT(m->hold_count > 0);
975 if (m->hold_count > 1) {
976 vm_page_unhold(m);
984263bc 977 return 0;
eec2b734
MD
978 } else {
979 return _pmap_unwire_pte_hold(pmap, m, info);
980 }
984263bc
MD
981}
982
983/*
984 * After removing a page table entry, this routine is used to
985 * conditionally free the page, and manage the hold/wire counts.
986 */
987static int
0f7a3396
MD
988pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte,
989 pmap_inval_info_t info)
984263bc
MD
990{
991 unsigned ptepindex;
992 if (va >= UPT_MIN_ADDRESS)
993 return 0;
994
995 if (mpte == NULL) {
996 ptepindex = (va >> PDRSHIFT);
997 if (pmap->pm_ptphint &&
998 (pmap->pm_ptphint->pindex == ptepindex)) {
999 mpte = pmap->pm_ptphint;
1000 } else {
0f7a3396 1001 pmap_inval_flush(info);
984263bc
MD
1002 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1003 pmap->pm_ptphint = mpte;
1004 }
1005 }
1006
0f7a3396 1007 return pmap_unwire_pte_hold(pmap, mpte, info);
984263bc
MD
1008}
1009
54a764e8 1010/*
fbbaeba3
MD
1011 * Initialize pmap0/vmspace0. This pmap is not added to pmap_list because
1012 * it, and IdlePTD, represents the template used to update all other pmaps.
1013 *
1014 * On architectures where the kernel pmap is not integrated into the user
1015 * process pmap, this pmap represents the process pmap, not the kernel pmap.
1016 * kernel_pmap should be used to directly access the kernel_pmap.
54a764e8 1017 */
984263bc 1018void
840de426 1019pmap_pinit0(struct pmap *pmap)
984263bc
MD
1020{
1021 pmap->pm_pdir =
e4846942 1022 (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
24712b90 1023 pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD);
984263bc
MD
1024 pmap->pm_count = 1;
1025 pmap->pm_active = 0;
1026 pmap->pm_ptphint = NULL;
1027 TAILQ_INIT(&pmap->pm_pvlist);
1028 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1029}
1030
1031/*
1032 * Initialize a preallocated and zeroed pmap structure,
1033 * such as one in a vmspace structure.
1034 */
1035void
840de426 1036pmap_pinit(struct pmap *pmap)
984263bc
MD
1037{
1038 vm_page_t ptdpg;
1039
1040 /*
1041 * No need to allocate page table space yet but we do need a valid
1042 * page directory table.
1043 */
b5b32410 1044 if (pmap->pm_pdir == NULL) {
984263bc 1045 pmap->pm_pdir =
e4846942 1046 (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
b5b32410 1047 }
984263bc
MD
1048
1049 /*
c3834cb2 1050 * Allocate an object for the ptes
984263bc
MD
1051 */
1052 if (pmap->pm_pteobj == NULL)
c3834cb2 1053 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1);
984263bc
MD
1054
1055 /*
c3834cb2
MD
1056 * Allocate the page directory page, unless we already have
1057 * one cached. If we used the cached page the wire_count will
1058 * already be set appropriately.
984263bc 1059 */
c3834cb2
MD
1060 if ((ptdpg = pmap->pm_pdirm) == NULL) {
1061 ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI,
1062 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1063 pmap->pm_pdirm = ptdpg;
1064 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY);
1065 ptdpg->valid = VM_PAGE_BITS_ALL;
1066 ptdpg->wire_count = 1;
1067 ++vmstats.v_wire_count;
1068 pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1069 }
984263bc
MD
1070 if ((ptdpg->flags & PG_ZERO) == 0)
1071 bzero(pmap->pm_pdir, PAGE_SIZE);
1072
984263bc 1073 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
984263bc
MD
1074
1075 /* install self-referential address mapping entry */
1076 *(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1077 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
1078
1079 pmap->pm_count = 1;
1080 pmap->pm_active = 0;
1081 pmap->pm_ptphint = NULL;
1082 TAILQ_INIT(&pmap->pm_pvlist);
1083 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
eec2b734 1084 pmap->pm_stats.resident_count = 1;
984263bc
MD
1085}
1086
1087/*
c3834cb2
MD
1088 * Clean up a pmap structure so it can be physically freed. This routine
1089 * is called by the vmspace dtor function. A great deal of pmap data is
1090 * left passively mapped to improve vmspace management so we have a bit
1091 * of cleanup work to do here.
e3161323
MD
1092 */
1093void
1094pmap_puninit(pmap_t pmap)
1095{
c3834cb2
MD
1096 vm_page_t p;
1097
e3161323 1098 KKASSERT(pmap->pm_active == 0);
c3834cb2
MD
1099 if ((p = pmap->pm_pdirm) != NULL) {
1100 KKASSERT(pmap->pm_pdir != NULL);
1101 pmap_kremove((vm_offset_t)pmap->pm_pdir);
1102 p->wire_count--;
1103 vmstats.v_wire_count--;
1104 KKASSERT((p->flags & PG_BUSY) == 0);
1105 vm_page_busy(p);
1106 vm_page_free_zero(p);
1107 pmap->pm_pdirm = NULL;
1108 }
e3161323
MD
1109 if (pmap->pm_pdir) {
1110 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE);
1111 pmap->pm_pdir = NULL;
1112 }
1113 if (pmap->pm_pteobj) {
1114 vm_object_deallocate(pmap->pm_pteobj);
1115 pmap->pm_pteobj = NULL;
1116 }
1117}
1118
1119/*
984263bc
MD
1120 * Wire in kernel global address entries. To avoid a race condition
1121 * between pmap initialization and pmap_growkernel, this procedure
54a764e8
MD
1122 * adds the pmap to the master list (which growkernel scans to update),
1123 * then copies the template.
984263bc
MD
1124 */
1125void
840de426 1126pmap_pinit2(struct pmap *pmap)
984263bc 1127{
54a764e8
MD
1128 crit_enter();
1129 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode);
984263bc
MD
1130 /* XXX copies current process, does not fill in MPPTDI */
1131 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
54a764e8 1132 crit_exit();
984263bc
MD
1133}
1134
344ad853 1135/*
eec2b734 1136 * Attempt to release and free a vm_page in a pmap. Returns 1 on success,
344ad853 1137 * 0 on failure (if the procedure had to sleep).
c3834cb2
MD
1138 *
1139 * When asked to remove the page directory page itself, we actually just
1140 * leave it cached so we do not have to incur the SMP inval overhead of
1141 * removing the kernel mapping. pmap_puninit() will take care of it.
344ad853 1142 */
984263bc 1143static int
840de426 1144pmap_release_free_page(struct pmap *pmap, vm_page_t p)
984263bc
MD
1145{
1146 unsigned *pde = (unsigned *) pmap->pm_pdir;
1147 /*
1148 * This code optimizes the case of freeing non-busy
1149 * page-table pages. Those pages are zero now, and
1150 * might as well be placed directly into the zero queue.
1151 */
1152 if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
1153 return 0;
1154
1155 vm_page_busy(p);
1156
1157 /*
1158 * Remove the page table page from the processes address space.
1159 */
1160 pde[p->pindex] = 0;
eec2b734
MD
1161 KKASSERT(pmap->pm_stats.resident_count > 0);
1162 --pmap->pm_stats.resident_count;
984263bc
MD
1163
1164 if (p->hold_count) {
1165 panic("pmap_release: freeing held page table page");
1166 }
c3834cb2
MD
1167 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1168 pmap->pm_ptphint = NULL;
1169
984263bc 1170 /*
c3834cb2
MD
1171 * We leave the page directory page cached, wired, and mapped in
1172 * the pmap until the dtor function (pmap_puninit()) gets called.
1173 * However, still clean it up so we can set PG_ZERO.
984263bc
MD
1174 */
1175 if (p->pindex == PTDPTDI) {
1176 bzero(pde + KPTDI, nkpt * PTESIZE);
984263bc 1177 pde[MPPTDI] = 0;
984263bc 1178 pde[APTDPTDI] = 0;
c3834cb2
MD
1179 vm_page_flag_set(p, PG_ZERO);
1180 vm_page_wakeup(p);
1181 } else {
1182 p->wire_count--;
1183 vmstats.v_wire_count--;
1184 vm_page_free_zero(p);
984263bc 1185 }
984263bc
MD
1186 return 1;
1187}
1188
1189/*
1190 * this routine is called if the page table page is not
1191 * mapped correctly.
1192 */
1193static vm_page_t
840de426 1194_pmap_allocpte(pmap_t pmap, unsigned ptepindex)
984263bc
MD
1195{
1196 vm_offset_t pteva, ptepa;
1197 vm_page_t m;
1198
1199 /*
1200 * Find or fabricate a new pagetable page
1201 */
1202 m = vm_page_grab(pmap->pm_pteobj, ptepindex,
dc1fd4b3 1203 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
984263bc
MD
1204
1205 KASSERT(m->queue == PQ_NONE,
1206 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1207
eec2b734
MD
1208 /*
1209 * Increment the hold count for the page we will be returning to
1210 * the caller.
1211 */
1212 m->hold_count++;
1213
1214 /*
1215 * It is possible that someone else got in and mapped by the page
1216 * directory page while we were blocked, if so just unbusy and
1217 * return the held page.
1218 */
1219 if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) {
1220 KKASSERT((ptepa & PG_FRAME) == VM_PAGE_TO_PHYS(m));
1221 vm_page_wakeup(m);
1222 return(m);
1223 }
1224
984263bc 1225 if (m->wire_count == 0)
12e4aaff 1226 vmstats.v_wire_count++;
984263bc
MD
1227 m->wire_count++;
1228
984263bc
MD
1229
1230 /*
1231 * Map the pagetable page into the process address space, if
1232 * it isn't already there.
1233 */
1234
eec2b734 1235 ++pmap->pm_stats.resident_count;
984263bc
MD
1236
1237 ptepa = VM_PAGE_TO_PHYS(m);
1238 pmap->pm_pdir[ptepindex] =
1239 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
1240
1241 /*
1242 * Set the page table hint
1243 */
1244 pmap->pm_ptphint = m;
1245
1246 /*
1247 * Try to use the new mapping, but if we cannot, then
1248 * do it with the routine that maps the page explicitly.
1249 */
1250 if ((m->flags & PG_ZERO) == 0) {
1251 if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1252 (((unsigned) PTDpde) & PG_FRAME)) {
1253 pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1254 bzero((caddr_t) pteva, PAGE_SIZE);
1255 } else {
1256 pmap_zero_page(ptepa);
1257 }
1258 }
1259
1260 m->valid = VM_PAGE_BITS_ALL;
1261 vm_page_flag_clear(m, PG_ZERO);
1262 vm_page_flag_set(m, PG_MAPPED);
1263 vm_page_wakeup(m);
1264
1265 return m;
1266}
1267
1268static vm_page_t
840de426 1269pmap_allocpte(pmap_t pmap, vm_offset_t va)
984263bc
MD
1270{
1271 unsigned ptepindex;
1272 vm_offset_t ptepa;
1273 vm_page_t m;
1274
1275 /*
1276 * Calculate pagetable page index
1277 */
1278 ptepindex = va >> PDRSHIFT;
1279
1280 /*
1281 * Get the page directory entry
1282 */
1283 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1284
1285 /*
1286 * This supports switching from a 4MB page to a
1287 * normal 4K page.
1288 */
1289 if (ptepa & PG_PS) {
1290 pmap->pm_pdir[ptepindex] = 0;
1291 ptepa = 0;
0f7a3396
MD
1292 cpu_invltlb();
1293 smp_invltlb();
984263bc
MD
1294 }
1295
1296 /*
1297 * If the page table page is mapped, we just increment the
1298 * hold count, and activate it.
1299 */
1300 if (ptepa) {
1301 /*
1302 * In order to get the page table page, try the
1303 * hint first.
1304 */
1305 if (pmap->pm_ptphint &&
1306 (pmap->pm_ptphint->pindex == ptepindex)) {
1307 m = pmap->pm_ptphint;
1308 } else {
1309 m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1310 pmap->pm_ptphint = m;
1311 }
1312 m->hold_count++;
1313 return m;
1314 }
1315 /*
1316 * Here if the pte page isn't mapped, or if it has been deallocated.
1317 */
1318 return _pmap_allocpte(pmap, ptepindex);
1319}
1320
1321
1322/***************************************************
1f804340 1323 * Pmap allocation/deallocation routines.
984263bc
MD
1324 ***************************************************/
1325
1326/*
1327 * Release any resources held by the given physical map.
1328 * Called when a pmap initialized by pmap_pinit is being released.
1329 * Should only be called if the map contains no valid mappings.
1330 */
1f804340
MD
1331static int pmap_release_callback(struct vm_page *p, void *data);
1332
984263bc 1333void
840de426 1334pmap_release(struct pmap *pmap)
984263bc 1335{
984263bc 1336 vm_object_t object = pmap->pm_pteobj;
1f804340 1337 struct rb_vm_page_scan_info info;
984263bc 1338
e3161323 1339 KASSERT(pmap->pm_active == 0, ("pmap still active! %08x", pmap->pm_active));
984263bc
MD
1340#if defined(DIAGNOSTIC)
1341 if (object->ref_count != 1)
1342 panic("pmap_release: pteobj reference count != 1");
1343#endif
1344
1f804340
MD
1345 info.pmap = pmap;
1346 info.object = object;
9acd5bbb 1347 crit_enter();
54a764e8 1348 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode);
1f804340
MD
1349 crit_exit();
1350
1351 do {
1352 crit_enter();
1353 info.error = 0;
1354 info.mpte = NULL;
1355 info.limit = object->generation;
1356
1357 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
1358 pmap_release_callback, &info);
1359 if (info.error == 0 && info.mpte) {
1360 if (!pmap_release_free_page(pmap, info.mpte))
1361 info.error = 1;
984263bc 1362 }
344ad853 1363 crit_exit();
1f804340
MD
1364 } while (info.error);
1365}
1366
1367static int
1368pmap_release_callback(struct vm_page *p, void *data)
1369{
1370 struct rb_vm_page_scan_info *info = data;
1371
1372 if (p->pindex == PTDPTDI) {
1373 info->mpte = p;
1374 return(0);
344ad853 1375 }
1f804340
MD
1376 if (!pmap_release_free_page(info->pmap, p)) {
1377 info->error = 1;
1378 return(-1);
1379 }
1380 if (info->object->generation != info->limit) {
1381 info->error = 1;
1382 return(-1);
1383 }
1384 return(0);
984263bc 1385}
984263bc
MD
1386
1387/*
0e5797fe 1388 * Grow the number of kernel page table entries, if needed.
984263bc 1389 */
0e5797fe 1390
984263bc
MD
1391void
1392pmap_growkernel(vm_offset_t addr)
1393{
54a764e8 1394 struct pmap *pmap;
984263bc
MD
1395 vm_offset_t ptppaddr;
1396 vm_page_t nkpg;
1397 pd_entry_t newpdir;
1398
9acd5bbb 1399 crit_enter();
984263bc
MD
1400 if (kernel_vm_end == 0) {
1401 kernel_vm_end = KERNBASE;
1402 nkpt = 0;
1403 while (pdir_pde(PTD, kernel_vm_end)) {
1404 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1405 nkpt++;
1406 }
1407 }
1408 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1409 while (kernel_vm_end < addr) {
1410 if (pdir_pde(PTD, kernel_vm_end)) {
1411 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1412 continue;
1413 }
1414
1415 /*
1416 * This index is bogus, but out of the way
1417 */
dc1fd4b3
MD
1418 nkpg = vm_page_alloc(kptobj, nkpt,
1419 VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT);
1420 if (nkpg == NULL)
984263bc
MD
1421 panic("pmap_growkernel: no memory to grow kernel");
1422
984263bc
MD
1423 vm_page_wire(nkpg);
1424 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1425 pmap_zero_page(ptppaddr);
1426 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
1427 pdir_pde(PTD, kernel_vm_end) = newpdir;
fbbaeba3 1428 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir;
0e5797fe
MD
1429 nkpt++;
1430
1431 /*
54a764e8 1432 * This update must be interlocked with pmap_pinit2.
0e5797fe 1433 */
54a764e8
MD
1434 TAILQ_FOREACH(pmap, &pmap_list, pm_pmnode) {
1435 *pmap_pde(pmap, kernel_vm_end) = newpdir;
1436 }
1437 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1438 ~(PAGE_SIZE * NPTEPG - 1);
984263bc 1439 }
9acd5bbb 1440 crit_exit();
984263bc
MD
1441}
1442
1443/*
1444 * Retire the given physical map from service.
1445 * Should only be called if the map contains
1446 * no valid mappings.
1447 */
1448void
840de426 1449pmap_destroy(pmap_t pmap)
984263bc
MD
1450{
1451 int count;
1452
1453 if (pmap == NULL)
1454 return;
1455
1456 count = --pmap->pm_count;
1457 if (count == 0) {
1458 pmap_release(pmap);
1459 panic("destroying a pmap is not yet implemented");
1460 }
1461}
1462
1463/*
1464 * Add a reference to the specified pmap.
1465 */
1466void
840de426 1467pmap_reference(pmap_t pmap)
984263bc
MD
1468{
1469 if (pmap != NULL) {
1470 pmap->pm_count++;
1471 }
1472}
1473
1474/***************************************************
1475* page management routines.
1476 ***************************************************/
1477
1478/*
8a8d5d85
MD
1479 * free the pv_entry back to the free list. This function may be
1480 * called from an interrupt.
984263bc
MD
1481 */
1482static PMAP_INLINE void
840de426 1483free_pv_entry(pv_entry_t pv)
984263bc
MD
1484{
1485 pv_entry_count--;
8a8d5d85 1486 zfree(pvzone, pv);
984263bc
MD
1487}
1488
1489/*
1490 * get a new pv_entry, allocating a block from the system
8a8d5d85 1491 * when needed. This function may be called from an interrupt.
984263bc
MD
1492 */
1493static pv_entry_t
1494get_pv_entry(void)
1495{
1496 pv_entry_count++;
1497 if (pv_entry_high_water &&
20479584
MD
1498 (pv_entry_count > pv_entry_high_water) &&
1499 (pmap_pagedaemon_waken == 0)) {
984263bc
MD
1500 pmap_pagedaemon_waken = 1;
1501 wakeup (&vm_pages_needed);
1502 }
8a8d5d85 1503 return zalloc(pvzone);
984263bc
MD
1504}
1505
1506/*
1507 * This routine is very drastic, but can save the system
1508 * in a pinch.
1509 */
1510void
840de426 1511pmap_collect(void)
984263bc
MD
1512{
1513 int i;
1514 vm_page_t m;
1515 static int warningdone=0;
1516
1517 if (pmap_pagedaemon_waken == 0)
1518 return;
20479584 1519 pmap_pagedaemon_waken = 0;
984263bc
MD
1520
1521 if (warningdone < 5) {
26be20a0 1522 kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
984263bc
MD
1523 warningdone++;
1524 }
1525
1526 for(i = 0; i < vm_page_array_size; i++) {
1527 m = &vm_page_array[i];
1528 if (m->wire_count || m->hold_count || m->busy ||
1529 (m->flags & PG_BUSY))
1530 continue;
1531 pmap_remove_all(m);
1532 }
984263bc
MD
1533}
1534
1535
1536/*
1537 * If it is the first entry on the list, it is actually
1538 * in the header and we must copy the following entry up
1539 * to the header. Otherwise we must search the list for
1540 * the entry. In either case we free the now unused entry.
1541 */
984263bc 1542static int
0f7a3396
MD
1543pmap_remove_entry(struct pmap *pmap, vm_page_t m,
1544 vm_offset_t va, pmap_inval_info_t info)
984263bc
MD
1545{
1546 pv_entry_t pv;
1547 int rtval;
984263bc 1548
9acd5bbb 1549 crit_enter();
984263bc
MD
1550 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1551 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1552 if (pmap == pv->pv_pmap && va == pv->pv_va)
1553 break;
1554 }
1555 } else {
1556 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1557 if (va == pv->pv_va)
1558 break;
1559 }
1560 }
1561
1562 rtval = 0;
1563 if (pv) {
984263bc
MD
1564 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1565 m->md.pv_list_count--;
17cde63e 1566 if (TAILQ_EMPTY(&m->md.pv_list))
984263bc 1567 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
984263bc 1568 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
8790d7d8
MD
1569 ++pmap->pm_generation;
1570 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info);
984263bc
MD
1571 free_pv_entry(pv);
1572 }
9acd5bbb 1573 crit_exit();
984263bc
MD
1574 return rtval;
1575}
1576
1577/*
1578 * Create a pv entry for page at pa for
1579 * (pmap, va).
1580 */
1581static void
840de426 1582pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
984263bc 1583{
984263bc
MD
1584 pv_entry_t pv;
1585
9acd5bbb 1586 crit_enter();
984263bc
MD
1587 pv = get_pv_entry();
1588 pv->pv_va = va;
1589 pv->pv_pmap = pmap;
1590 pv->pv_ptem = mpte;
1591
1592 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1593 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1594 m->md.pv_list_count++;
1595
9acd5bbb 1596 crit_exit();
984263bc
MD
1597}
1598
1599/*
1600 * pmap_remove_pte: do the things to unmap a page in a process
1601 */
1602static int
0f7a3396
MD
1603pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va,
1604 pmap_inval_info_t info)
984263bc
MD
1605{
1606 unsigned oldpte;
1607 vm_page_t m;
1608
0f7a3396 1609 pmap_inval_add(info, pmap, va);
984263bc
MD
1610 oldpte = loadandclear(ptq);
1611 if (oldpte & PG_W)
1612 pmap->pm_stats.wired_count -= 1;
1613 /*
1614 * Machines that don't support invlpg, also don't support
0f7a3396
MD
1615 * PG_G. XXX PG_G is disabled for SMP so don't worry about
1616 * the SMP case.
984263bc
MD
1617 */
1618 if (oldpte & PG_G)
41a01a4d 1619 cpu_invlpg((void *)va);
eec2b734
MD
1620 KKASSERT(pmap->pm_stats.resident_count > 0);
1621 --pmap->pm_stats.resident_count;
984263bc
MD
1622 if (oldpte & PG_MANAGED) {
1623 m = PHYS_TO_VM_PAGE(oldpte);
1624 if (oldpte & PG_M) {
1625#if defined(PMAP_DIAGNOSTIC)
1626 if (pmap_nw_modified((pt_entry_t) oldpte)) {
26be20a0 1627 kprintf(
984263bc
MD
1628 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1629 va, oldpte);
1630 }
1631#endif
1632 if (pmap_track_modified(va))
1633 vm_page_dirty(m);
1634 }
1635 if (oldpte & PG_A)
1636 vm_page_flag_set(m, PG_REFERENCED);
0f7a3396 1637 return pmap_remove_entry(pmap, m, va, info);
984263bc 1638 } else {
0f7a3396 1639 return pmap_unuse_pt(pmap, va, NULL, info);
984263bc
MD
1640 }
1641
1642 return 0;
1643}
1644
1645/*
e0e69b7d
MD
1646 * pmap_remove_page:
1647 *
1648 * Remove a single page from a process address space.
1649 *
1650 * This function may not be called from an interrupt if the pmap is
1651 * not kernel_pmap.
984263bc
MD
1652 */
1653static void
0f7a3396 1654pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info)
984263bc 1655{
840de426 1656 unsigned *ptq;
984263bc
MD
1657
1658 /*
e0e69b7d
MD
1659 * if there is no pte for this address, just skip it!!! Otherwise
1660 * get a local va for mappings for this pmap and remove the entry.
984263bc 1661 */
e0e69b7d
MD
1662 if (*pmap_pde(pmap, va) != 0) {
1663 ptq = get_ptbase(pmap) + i386_btop(va);
1664 if (*ptq) {
0f7a3396 1665 pmap_remove_pte(pmap, ptq, va, info);
e0e69b7d 1666 }
984263bc 1667 }
984263bc
MD
1668}
1669
1670/*
0f7a3396 1671 * pmap_remove:
e0e69b7d 1672 *
984263bc
MD
1673 * Remove the given range of addresses from the specified map.
1674 *
1675 * It is assumed that the start and end are properly
1676 * rounded to the page size.
e0e69b7d
MD
1677 *
1678 * This function may not be called from an interrupt if the pmap is
1679 * not kernel_pmap.
984263bc
MD
1680 */
1681void
840de426 1682pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
984263bc 1683{
840de426 1684 unsigned *ptbase;
984263bc
MD
1685 vm_offset_t pdnxt;
1686 vm_offset_t ptpaddr;
1687 vm_offset_t sindex, eindex;
0f7a3396 1688 struct pmap_inval_info info;
984263bc
MD
1689
1690 if (pmap == NULL)
1691 return;
1692
1693 if (pmap->pm_stats.resident_count == 0)
1694 return;
1695
0f7a3396
MD
1696 pmap_inval_init(&info);
1697
984263bc
MD
1698 /*
1699 * special handling of removing one page. a very
1700 * common operation and easy to short circuit some
1701 * code.
1702 */
1703 if (((sva + PAGE_SIZE) == eva) &&
1704 (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
0f7a3396
MD
1705 pmap_remove_page(pmap, sva, &info);
1706 pmap_inval_flush(&info);
984263bc
MD
1707 return;
1708 }
1709
984263bc
MD
1710 /*
1711 * Get a local virtual address for the mappings that are being
1712 * worked with.
1713 */
984263bc
MD
1714 sindex = i386_btop(sva);
1715 eindex = i386_btop(eva);
1716
1717 for (; sindex < eindex; sindex = pdnxt) {
1718 unsigned pdirindex;
1719
1720 /*
1721 * Calculate index for next page table.
1722 */
1723 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1724 if (pmap->pm_stats.resident_count == 0)
1725 break;
1726
1727 pdirindex = sindex / NPDEPG;
1728 if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
0f7a3396 1729 pmap_inval_add(&info, pmap, -1);
984263bc
MD
1730 pmap->pm_pdir[pdirindex] = 0;
1731 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
984263bc
MD
1732 continue;
1733 }
1734
1735 /*
1736 * Weed out invalid mappings. Note: we assume that the page
1737 * directory table is always allocated, and in kernel virtual.
1738 */
1739 if (ptpaddr == 0)
1740 continue;
1741
1742 /*
1743 * Limit our scan to either the end of the va represented
1744 * by the current page table page, or to the end of the
1745 * range being removed.
1746 */
1747 if (pdnxt > eindex) {
1748 pdnxt = eindex;
1749 }
1750
8790d7d8
MD
1751 /*
1752 * NOTE: pmap_remove_pte() can block.
1753 */
0f7a3396 1754 for (; sindex != pdnxt; sindex++) {
984263bc 1755 vm_offset_t va;
8790d7d8
MD
1756
1757 ptbase = get_ptbase(pmap);
0f7a3396 1758 if (ptbase[sindex] == 0)
984263bc 1759 continue;
984263bc 1760 va = i386_ptob(sindex);
0f7a3396 1761 if (pmap_remove_pte(pmap, ptbase + sindex, va, &info))
984263bc
MD
1762 break;
1763 }
1764 }
0f7a3396 1765 pmap_inval_flush(&info);
984263bc
MD
1766}
1767
1768/*
e0e69b7d
MD
1769 * pmap_remove_all:
1770 *
1771 * Removes this physical page from all physical maps in which it resides.
1772 * Reflects back modify bits to the pager.
984263bc 1773 *
e0e69b7d 1774 * This routine may not be called from an interrupt.
984263bc
MD
1775 */
1776
1777static void
840de426 1778pmap_remove_all(vm_page_t m)
984263bc 1779{
0f7a3396 1780 struct pmap_inval_info info;
840de426 1781 unsigned *pte, tpte;
0f7a3396 1782 pv_entry_t pv;
984263bc 1783
bee81bdd
SS
1784 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
1785 return;
984263bc 1786
0f7a3396 1787 pmap_inval_init(&info);
9acd5bbb 1788 crit_enter();
984263bc 1789 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
eec2b734
MD
1790 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
1791 --pv->pv_pmap->pm_stats.resident_count;
984263bc
MD
1792
1793 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
0f7a3396 1794 pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
984263bc 1795 tpte = loadandclear(pte);
17cde63e 1796
984263bc
MD
1797 if (tpte & PG_W)
1798 pv->pv_pmap->pm_stats.wired_count--;
1799
1800 if (tpte & PG_A)
1801 vm_page_flag_set(m, PG_REFERENCED);
1802
1803 /*
1804 * Update the vm_page_t clean and reference bits.
1805 */
1806 if (tpte & PG_M) {
1807#if defined(PMAP_DIAGNOSTIC)
1808 if (pmap_nw_modified((pt_entry_t) tpte)) {
26be20a0 1809 kprintf(
984263bc
MD
1810 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1811 pv->pv_va, tpte);
1812 }
1813#endif
1814 if (pmap_track_modified(pv->pv_va))
1815 vm_page_dirty(m);
1816 }
984263bc 1817 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
8790d7d8
MD
1818 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1819 ++pv->pv_pmap->pm_generation;
984263bc 1820 m->md.pv_list_count--;
17cde63e
MD
1821 if (TAILQ_EMPTY(&m->md.pv_list))
1822 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
0f7a3396 1823 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
984263bc
MD
1824 free_pv_entry(pv);
1825 }
9acd5bbb 1826 crit_exit();
17cde63e 1827 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0);
0f7a3396 1828 pmap_inval_flush(&info);
984263bc
MD
1829}
1830
1831/*
e0e69b7d
MD
1832 * pmap_protect:
1833 *
1834 * Set the physical protection on the specified range of this map
1835 * as requested.
1836 *
1837 * This function may not be called from an interrupt if the map is
1838 * not the kernel_pmap.
984263bc
MD
1839 */
1840void
1841pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1842{
840de426 1843 unsigned *ptbase;
984263bc
MD
1844 vm_offset_t pdnxt, ptpaddr;
1845 vm_pindex_t sindex, eindex;
0f7a3396 1846 pmap_inval_info info;
984263bc
MD
1847
1848 if (pmap == NULL)
1849 return;
1850
1851 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1852 pmap_remove(pmap, sva, eva);
1853 return;
1854 }
1855
1856 if (prot & VM_PROT_WRITE)
1857 return;
1858
0f7a3396 1859 pmap_inval_init(&info);
984263bc
MD
1860
1861 ptbase = get_ptbase(pmap);
1862
1863 sindex = i386_btop(sva);
1864 eindex = i386_btop(eva);
1865
1866 for (; sindex < eindex; sindex = pdnxt) {
1867
1868 unsigned pdirindex;
1869
1870 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1871
1872 pdirindex = sindex / NPDEPG;
1873 if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
0f7a3396 1874 pmap_inval_add(&info, pmap, -1);
55f2596a 1875 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
984263bc 1876 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
984263bc
MD
1877 continue;
1878 }
1879
1880 /*
1881 * Weed out invalid mappings. Note: we assume that the page
1882 * directory table is always allocated, and in kernel virtual.
1883 */
1884 if (ptpaddr == 0)
1885 continue;
1886
1887 if (pdnxt > eindex) {
1888 pdnxt = eindex;
1889 }
1890
1891 for (; sindex != pdnxt; sindex++) {
1892
1893 unsigned pbits;
1894 vm_page_t m;
1895
17cde63e
MD
1896 /*
1897 * XXX non-optimal. Note also that there can be
1898 * no pmap_inval_flush() calls until after we modify
1899 * ptbase[sindex] (or otherwise we have to do another
1900 * pmap_inval_add() call).
1901 */
0f7a3396 1902 pmap_inval_add(&info, pmap, i386_ptob(sindex));
984263bc
MD
1903 pbits = ptbase[sindex];
1904
1905 if (pbits & PG_MANAGED) {
1906 m = NULL;
1907 if (pbits & PG_A) {
1908 m = PHYS_TO_VM_PAGE(pbits);
1909 vm_page_flag_set(m, PG_REFERENCED);
1910 pbits &= ~PG_A;
1911 }
1912 if (pbits & PG_M) {
1913 if (pmap_track_modified(i386_ptob(sindex))) {
1914 if (m == NULL)
1915 m = PHYS_TO_VM_PAGE(pbits);
1916 vm_page_dirty(m);
1917 pbits &= ~PG_M;
1918 }
1919 }
1920 }
1921
1922 pbits &= ~PG_RW;
1923
1924 if (pbits != ptbase[sindex]) {
1925 ptbase[sindex] = pbits;
984263bc
MD
1926 }
1927 }
1928 }
0f7a3396 1929 pmap_inval_flush(&info);
984263bc
MD
1930}
1931
1932/*
1933 * Insert the given physical page (p) at
1934 * the specified virtual address (v) in the
1935 * target physical map with the protection requested.
1936 *
1937 * If specified, the page will be wired down, meaning
1938 * that the related pte can not be reclaimed.
1939 *
1940 * NB: This is the only routine which MAY NOT lazy-evaluate
1941 * or lose information. That is, this routine must actually
1942 * insert this page into the given map NOW.
1943 */
1944void
1945pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1946 boolean_t wired)
1947{
6ef943a3 1948 vm_paddr_t pa;
840de426 1949 unsigned *pte;
6ef943a3 1950 vm_paddr_t opa;
984263bc
MD
1951 vm_offset_t origpte, newpte;
1952 vm_page_t mpte;
0f7a3396 1953 pmap_inval_info info;
984263bc
MD
1954
1955 if (pmap == NULL)
1956 return;
1957
1958 va &= PG_FRAME;
1959#ifdef PMAP_DIAGNOSTIC
c439ad8f 1960 if (va >= KvaEnd)
984263bc
MD
1961 panic("pmap_enter: toobig");
1962 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
1963 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
1964#endif
fbbaeba3
MD
1965 if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) {
1966 kprintf("Warning: pmap_enter called on UVA with kernel_pmap\n");
1967#ifdef DDB
1968 db_print_backtrace();
1969#endif
1970 }
1971 if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) {
1972 kprintf("Warning: pmap_enter called on KVA without kernel_pmap\n");
1973#ifdef DDB
1974 db_print_backtrace();
1975#endif
1976 }
984263bc 1977
984263bc
MD
1978 /*
1979 * In the case that a page table page is not
1980 * resident, we are creating it here.
1981 */
17cde63e 1982 if (va < UPT_MIN_ADDRESS)
984263bc 1983 mpte = pmap_allocpte(pmap, va);
17cde63e
MD
1984 else
1985 mpte = NULL;
984263bc 1986
0f7a3396 1987 pmap_inval_init(&info);
984263bc
MD
1988 pte = pmap_pte(pmap, va);
1989
1990 /*
1991 * Page Directory table entry not valid, we need a new PT page
1992 */
1993 if (pte == NULL) {
6ef943a3
MD
1994 panic("pmap_enter: invalid page directory pdir=%x, va=0x%x\n",
1995 (unsigned) pmap->pm_pdir[PTDPTDI], va);
984263bc
MD
1996 }
1997
1998 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
1999 origpte = *(vm_offset_t *)pte;
2000 opa = origpte & PG_FRAME;
2001
2002 if (origpte & PG_PS)
2003 panic("pmap_enter: attempted pmap_enter on 4MB page");
2004
2005 /*
2006 * Mapping has not changed, must be protection or wiring change.
2007 */
2008 if (origpte && (opa == pa)) {
2009 /*
2010 * Wiring change, just update stats. We don't worry about
2011 * wiring PT pages as they remain resident as long as there
2012 * are valid mappings in them. Hence, if a user page is wired,
2013 * the PT page will be also.
2014 */
2015 if (wired && ((origpte & PG_W) == 0))
2016 pmap->pm_stats.wired_count++;
2017 else if (!wired && (origpte & PG_W))
2018 pmap->pm_stats.wired_count--;
2019
2020#if defined(PMAP_DIAGNOSTIC)
2021 if (pmap_nw_modified((pt_entry_t) origpte)) {
26be20a0 2022 kprintf(
984263bc
MD
2023 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
2024 va, origpte);
2025 }
2026#endif
2027
2028 /*
639a9b43
MD
2029 * Remove the extra pte reference. Note that we cannot
2030 * optimize the RO->RW case because we have adjusted the
2031 * wiring count above and may need to adjust the wiring
2032 * bits below.
984263bc
MD
2033 */
2034 if (mpte)
2035 mpte->hold_count--;
2036
984263bc
MD
2037 /*
2038 * We might be turning off write access to the page,
2039 * so we go ahead and sense modify status.
2040 */
2041 if (origpte & PG_MANAGED) {
2042 if ((origpte & PG_M) && pmap_track_modified(va)) {
2043 vm_page_t om;
2044 om = PHYS_TO_VM_PAGE(opa);
2045 vm_page_dirty(om);
2046 }
2047 pa |= PG_MANAGED;
17cde63e 2048 KKASSERT(m->flags & PG_MAPPED);
984263bc
MD
2049 }
2050 goto validate;
2051 }
2052 /*
2053 * Mapping has changed, invalidate old range and fall through to
2054 * handle validating new mapping.
2055 */
2056 if (opa) {
2057 int err;
0f7a3396 2058 err = pmap_remove_pte(pmap, pte, va, &info);
984263bc
MD
2059 if (err)
2060 panic("pmap_enter: pte vanished, va: 0x%x", va);
2061 }
2062
2063 /*
2064 * Enter on the PV list if part of our managed memory. Note that we
2065 * raise IPL while manipulating pv_table since pmap_enter can be
2066 * called at interrupt time.
2067 */
2068 if (pmap_initialized &&
2069 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
2070 pmap_insert_entry(pmap, va, mpte, m);
2071 pa |= PG_MANAGED;
17cde63e 2072 vm_page_flag_set(m, PG_MAPPED);
984263bc
MD
2073 }
2074
2075 /*
2076 * Increment counters
2077 */
eec2b734 2078 ++pmap->pm_stats.resident_count;
984263bc
MD
2079 if (wired)
2080 pmap->pm_stats.wired_count++;
2081
2082validate:
2083 /*
2084 * Now validate mapping with desired protection/wiring.
2085 */
2086 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2087
2088 if (wired)
2089 newpte |= PG_W;
2090 if (va < UPT_MIN_ADDRESS)
2091 newpte |= PG_U;
fbbaeba3 2092 if (pmap == &kernel_pmap)
984263bc
MD
2093 newpte |= pgeflag;
2094
2095 /*
2096 * if the mapping or permission bits are different, we need
2097 * to update the pte.
2098 */
2099 if ((origpte & ~(PG_M|PG_A)) != newpte) {
17cde63e 2100 pmap_inval_add(&info, pmap, va);
984263bc 2101 *pte = newpte | PG_A;
17cde63e
MD
2102 if (newpte & PG_RW)
2103 vm_page_flag_set(m, PG_WRITEABLE);
984263bc 2104 }
c695044a 2105 KKASSERT((newpte & PG_MANAGED) == 0 || (m->flags & PG_MAPPED));
0f7a3396 2106 pmap_inval_flush(&info);
984263bc
MD
2107}
2108
2109/*
17cde63e
MD
2110 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
2111 * This code also assumes that the pmap has no pre-existing entry for this
2112 * VA.
2113 *
2114 * This code currently may only be used on user pmaps, not kernel_pmap.
984263bc 2115 */
17cde63e
MD
2116static void
2117pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
984263bc
MD
2118{
2119 unsigned *pte;
6ef943a3 2120 vm_paddr_t pa;
17cde63e
MD
2121 vm_page_t mpte;
2122 unsigned ptepindex;
2123 vm_offset_t ptepa;
0f7a3396
MD
2124 pmap_inval_info info;
2125
2126 pmap_inval_init(&info);
984263bc 2127
fbbaeba3
MD
2128 if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) {
2129 kprintf("Warning: pmap_enter_quick called on UVA with kernel_pmap\n");
2130#ifdef DDB
2131 db_print_backtrace();
2132#endif
2133 }
2134 if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) {
2135 kprintf("Warning: pmap_enter_quick called on KVA without kernel_pmap\n");
2136#ifdef DDB
2137 db_print_backtrace();
2138#endif
2139 }
2140
17cde63e
MD
2141 KKASSERT(va < UPT_MIN_ADDRESS); /* assert used on user pmaps only */
2142
984263bc 2143 /*
17cde63e
MD
2144 * Calculate the page table page (mpte), allocating it if necessary.
2145 *
2146 * A held page table page (mpte), or NULL, is passed onto the
2147 * section following.
984263bc
MD
2148 */
2149 if (va < UPT_MIN_ADDRESS) {
984263bc
MD
2150 /*
2151 * Calculate pagetable page index
2152 */
2153 ptepindex = va >> PDRSHIFT;
17cde63e
MD
2154
2155 do {
984263bc
MD
2156 /*
2157 * Get the page directory entry
2158 */
2159 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2160
2161 /*
2162 * If the page table page is mapped, we just increment
2163 * the hold count, and activate it.
2164 */
2165 if (ptepa) {
2166 if (ptepa & PG_PS)
2167 panic("pmap_enter_quick: unexpected mapping into 4MB page");
2168 if (pmap->pm_ptphint &&
17cde63e 2169 (pmap->pm_ptphint->pindex == ptepindex)) {
984263bc
MD
2170 mpte = pmap->pm_ptphint;
2171 } else {
2172 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2173 pmap->pm_ptphint = mpte;
2174 }
17cde63e
MD
2175 if (mpte)
2176 mpte->hold_count++;
984263bc
MD
2177 } else {
2178 mpte = _pmap_allocpte(pmap, ptepindex);
2179 }
17cde63e 2180 } while (mpte == NULL);
984263bc
MD
2181 } else {
2182 mpte = NULL;
17cde63e 2183 /* this code path is not yet used */
984263bc
MD
2184 }
2185
2186 /*
17cde63e
MD
2187 * With a valid (and held) page directory page, we can just use
2188 * vtopte() to get to the pte. If the pte is already present
2189 * we do not disturb it.
984263bc
MD
2190 */
2191 pte = (unsigned *)vtopte(va);
17cde63e 2192 if (*pte & PG_V) {
984263bc 2193 if (mpte)
0f7a3396 2194 pmap_unwire_pte_hold(pmap, mpte, &info);
17cde63e
MD
2195 pa = VM_PAGE_TO_PHYS(m);
2196 KKASSERT(((*pte ^ pa) & PG_FRAME) == 0);
2197 return;
984263bc
MD
2198 }
2199
2200 /*
17cde63e 2201 * Enter on the PV list if part of our managed memory
984263bc 2202 */
17cde63e 2203 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
984263bc 2204 pmap_insert_entry(pmap, va, mpte, m);
17cde63e
MD
2205 vm_page_flag_set(m, PG_MAPPED);
2206 }
984263bc
MD
2207
2208 /*
2209 * Increment counters
2210 */
eec2b734 2211 ++pmap->pm_stats.resident_count;
984263bc
MD
2212
2213 pa = VM_PAGE_TO_PHYS(m);
2214
2215 /*
2216 * Now validate mapping with RO protection
2217 */
2218 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
2219 *pte = pa | PG_V | PG_U;
2220 else
2221 *pte = pa | PG_V | PG_U | PG_MANAGED;
17cde63e
MD
2222/* pmap_inval_add(&info, pmap, va); shouldn't be needed inval->valid */
2223 pmap_inval_flush(&info);
984263bc
MD
2224}
2225
2226/*
2227 * Make a temporary mapping for a physical address. This is only intended
2228 * to be used for panic dumps.
2229 */
2230void *
6ef943a3 2231pmap_kenter_temporary(vm_paddr_t pa, int i)
984263bc
MD
2232{
2233 pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
2234 return ((void *)crashdumpmap);
2235}
2236
2237#define MAX_INIT_PT (96)
06ecca5a 2238
984263bc 2239/*
06ecca5a
MD
2240 * This routine preloads the ptes for a given object into the specified pmap.
2241 * This eliminates the blast of soft faults on process startup and
2242 * immediately after an mmap.
984263bc 2243 */
1f804340
MD
2244static int pmap_object_init_pt_callback(vm_page_t p, void *data);
2245
984263bc 2246void
083a7402
MD
2247pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
2248 vm_object_t object, vm_pindex_t pindex,
2249 vm_size_t size, int limit)
984263bc 2250{
1f804340 2251 struct rb_vm_page_scan_info info;
287ebb09 2252 struct lwp *lp;
984263bc 2253 int psize;
984263bc 2254
54a764e8
MD
2255 /*
2256 * We can't preinit if read access isn't set or there is no pmap
2257 * or object.
2258 */
083a7402 2259 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
984263bc
MD
2260 return;
2261
54a764e8
MD
2262 /*
2263 * We can't preinit if the pmap is not the current pmap
2264 */
287ebb09
MD
2265 lp = curthread->td_lwp;
2266 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace))
54a764e8
MD
2267 return;
2268
984263bc
MD
2269 psize = i386_btop(size);
2270
2271 if ((object->type != OBJT_VNODE) ||
2272 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
2273 (object->resident_page_count > MAX_INIT_PT))) {
2274 return;
2275 }
2276
2277 if (psize + pindex > object->size) {
2278 if (object->size < pindex)
2279 return;
2280 psize = object->size - pindex;
2281 }
2282
1f804340
MD
2283 if (psize == 0)
2284 return;
06ecca5a 2285
984263bc 2286 /*
1f804340
MD
2287 * Use a red-black scan to traverse the requested range and load
2288 * any valid pages found into the pmap.
06ecca5a 2289 *
9acd5bbb
MD
2290 * We cannot safely scan the object's memq unless we are in a
2291 * critical section since interrupts can remove pages from objects.
984263bc 2292 */
1f804340
MD
2293 info.start_pindex = pindex;
2294 info.end_pindex = pindex + psize - 1;
2295 info.limit = limit;
2296 info.mpte = NULL;
2297 info.addr = addr;
2298 info.pmap = pmap;
2299
654a39f0 2300 crit_enter();
1f804340
MD
2301 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
2302 pmap_object_init_pt_callback, &info);
2303 crit_exit();
2304}
06ecca5a 2305
1f804340
MD
2306static
2307int
2308pmap_object_init_pt_callback(vm_page_t p, void *data)
2309{
2310 struct rb_vm_page_scan_info *info = data;
2311 vm_pindex_t rel_index;
2312 /*
2313 * don't allow an madvise to blow away our really
2314 * free pages allocating pv entries.
2315 */
2316 if ((info->limit & MAP_PREFAULT_MADVISE) &&
2317 vmstats.v_free_count < vmstats.v_free_reserved) {
2318 return(-1);
984263bc 2319 }
1f804340
MD
2320 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2321 (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2322 if ((p->queue - p->pc) == PQ_CACHE)
2323 vm_page_deactivate(p);
2324 vm_page_busy(p);
2325 rel_index = p->pindex - info->start_pindex;
17cde63e
MD
2326 pmap_enter_quick(info->pmap,
2327 info->addr + i386_ptob(rel_index), p);
1f804340
MD
2328 vm_page_wakeup(p);
2329 }
2330 return(0);
984263bc
MD
2331}
2332
2333/*
06ecca5a
MD
2334 * pmap_prefault provides a quick way of clustering pagefaults into a
2335 * processes address space. It is a "cousin" of pmap_object_init_pt,
2336 * except it runs at page fault time instead of mmap time.
984263bc
MD
2337 */
2338#define PFBAK 4
2339#define PFFOR 4
2340#define PAGEORDER_SIZE (PFBAK+PFFOR)
2341
2342static int pmap_prefault_pageorder[] = {
2343 -PAGE_SIZE, PAGE_SIZE,
2344 -2 * PAGE_SIZE, 2 * PAGE_SIZE,
6302a396 2345 -3 * PAGE_SIZE, 3 * PAGE_SIZE,
984263bc
MD
2346 -4 * PAGE_SIZE, 4 * PAGE_SIZE
2347};
2348
2349void
840de426 2350pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
984263bc
MD
2351{
2352 int i;
2353 vm_offset_t starta;
2354 vm_offset_t addr;
2355 vm_pindex_t pindex;
17cde63e 2356 vm_page_t m;
984263bc 2357 vm_object_t object;
287ebb09 2358 struct lwp *lp;
984263bc 2359
75f59a66
MD
2360 /*
2361 * We do not currently prefault mappings that use virtual page
2362 * tables. We do not prefault foreign pmaps.
2363 */
2364 if (entry->maptype == VM_MAPTYPE_VPAGETABLE)
2365 return;
287ebb09
MD
2366 lp = curthread->td_lwp;
2367 if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace)))
984263bc
MD
2368 return;
2369
2370 object = entry->object.vm_object;
2371
2372 starta = addra - PFBAK * PAGE_SIZE;
06ecca5a 2373 if (starta < entry->start)
984263bc 2374 starta = entry->start;
06ecca5a 2375 else if (starta > addra)
984263bc 2376 starta = 0;
984263bc 2377
06ecca5a 2378 /*
9acd5bbb
MD
2379 * critical section protection is required to maintain the
2380 * page/object association, interrupts can free pages and remove
2381 * them from their objects.
06ecca5a 2382 */
654a39f0 2383 crit_enter();
984263bc
MD
2384 for (i = 0; i < PAGEORDER_SIZE; i++) {
2385 vm_object_t lobject;
2386 unsigned *pte;
2387
2388 addr = addra + pmap_prefault_pageorder[i];
2389 if (addr > addra + (PFFOR * PAGE_SIZE))
2390 addr = 0;
2391
2392 if (addr < starta || addr >= entry->end)
2393 continue;
2394
3641b7ca 2395 if ((*pmap_pde(pmap, addr)) == 0)
984263bc
MD
2396 continue;
2397
2398 pte = (unsigned *) vtopte(addr);
2399 if (*pte)
2400 continue;
2401
2402 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2403 lobject = object;
06ecca5a 2404
984263bc 2405 for (m = vm_page_lookup(lobject, pindex);
06ecca5a
MD
2406 (!m && (lobject->type == OBJT_DEFAULT) &&
2407 (lobject->backing_object));
2408 lobject = lobject->backing_object
2409 ) {
984263bc
MD
2410 if (lobject->backing_object_offset & PAGE_MASK)
2411 break;
2412 pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2413 m = vm_page_lookup(lobject->backing_object, pindex);
2414 }
2415
2416 /*
2417 * give-up when a page is not in memory
2418 */
2419 if (m == NULL)
2420 break;
2421
2422 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2423 (m->busy == 0) &&
2424 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2425
2426 if ((m->queue - m->pc) == PQ_CACHE) {
2427 vm_page_deactivate(m);
2428 }
2429 vm_page_busy(m);
17cde63e 2430 pmap_enter_quick(pmap, addr, m);
984263bc
MD
2431 vm_page_wakeup(m);
2432 }
2433 }
654a39f0 2434 crit_exit();
984263bc
MD
2435}
2436
2437/*
2438 * Routine: pmap_change_wiring
2439 * Function: Change the wiring attribute for a map/virtual-address
2440 * pair.
2441 * In/out conditions:
2442 * The mapping must already exist in the pmap.
2443 */
2444void
840de426 2445pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
984263bc 2446{
840de426 2447 unsigned *pte;
984263bc
MD
2448
2449 if (pmap == NULL)
2450 return;
2451
2452 pte = pmap_pte(pmap, va);
2453
2454 if (wired && !pmap_pte_w(pte))
2455 pmap->pm_stats.wired_count++;
2456 else if (!wired && pmap_pte_w(pte))
2457 pmap->pm_stats.wired_count--;
2458
2459 /*
2460 * Wiring is not a hardware characteristic so there is no need to
0f7a3396
MD
2461 * invalidate TLB. However, in an SMP environment we must use
2462 * a locked bus cycle to update the pte (if we are not using
2463 * the pmap_inval_*() API that is)... it's ok to do this for simple
2464 * wiring changes.
984263bc 2465 */
0f7a3396
MD
2466#ifdef SMP
2467 if (wired)
2468 atomic_set_int(pte, PG_W);
2469 else
2470 atomic_clear_int(pte, PG_W);
2471#else
2472 if (wired)
2473 atomic_set_int_nonlocked(pte, PG_W);
2474 else
2475 atomic_clear_int_nonlocked(pte, PG_W);
2476#endif
984263bc
MD
2477}
2478
2479
2480
2481/*
2482 * Copy the range specified by src_addr/len
2483 * from the source map to the range dst_addr/len
2484 * in the destination map.
2485 *
2486 * This routine is only advisory and need not do anything.
2487 */
984263bc 2488void
840de426
MD
2489pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2490 vm_size_t len, vm_offset_t src_addr)
984263bc 2491{
0f7a3396 2492 pmap_inval_info info;
984263bc
MD
2493 vm_offset_t addr;
2494 vm_offset_t end_addr = src_addr + len;
2495 vm_offset_t pdnxt;
2496 unsigned src_frame, dst_frame;
2497 vm_page_t m;
2498
2499 if (dst_addr != src_addr)
2500 return;
17cde63e
MD
2501 /*
2502 * XXX BUGGY. Amoung other things srcmpte is assumed to remain
2503 * valid through blocking calls, and that's just not going to
2504 * be the case.
2505 *
2506 * FIXME!
2507 */
2508 return;
984263bc
MD
2509
2510 src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2511 if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2512 return;
2513 }
2514
2515 dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2516 if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2517 APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
984263bc
MD
2518 /* The page directory is not shared between CPUs */
2519 cpu_invltlb();
984263bc 2520 }
0f7a3396
MD
2521 pmap_inval_init(&info);
2522 pmap_inval_add(&info, dst_pmap, -1);
2523 pmap_inval_add(&info, src_pmap, -1);
984263bc 2524
06ecca5a 2525 /*
654a39f0 2526 * critical section protection is required to maintain the page/object
06ecca5a
MD
2527 * association, interrupts can free pages and remove them from
2528 * their objects.
2529 */
654a39f0 2530 crit_enter();
06ecca5a 2531 for (addr = src_addr; addr < end_addr; addr = pdnxt) {
984263bc
MD
2532 unsigned *src_pte, *dst_pte;
2533 vm_page_t dstmpte, srcmpte;
2534 vm_offset_t srcptepaddr;
2535 unsigned ptepindex;
2536
2537 if (addr >= UPT_MIN_ADDRESS)
2538 panic("pmap_copy: invalid to pmap_copy page tables\n");
2539
2540 /*
2541 * Don't let optional prefaulting of pages make us go
2542 * way below the low water mark of free pages or way
2543 * above high water mark of used pv entries.
2544 */
12e4aaff 2545 if (vmstats.v_free_count < vmstats.v_free_reserved ||
984263bc
MD
2546 pv_entry_count > pv_entry_high_water)
2547 break;
2548
2549 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2550 ptepindex = addr >> PDRSHIFT;
2551
2552 srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2553 if (srcptepaddr == 0)
2554 continue;
2555
2556 if (srcptepaddr & PG_PS) {
2557 if (dst_pmap->pm_pdir[ptepindex] == 0) {
2558 dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2559 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
2560 }
2561 continue;
2562 }
2563
2564 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
17cde63e
MD
2565 if ((srcmpte == NULL) || (srcmpte->hold_count == 0) ||
2566 (srcmpte->flags & PG_BUSY)) {
984263bc 2567 continue;
17cde63e 2568 }
984263bc
MD
2569
2570 if (pdnxt > end_addr)
2571 pdnxt = end_addr;
2572
2573 src_pte = (unsigned *) vtopte(addr);
2574 dst_pte = (unsigned *) avtopte(addr);
2575 while (addr < pdnxt) {
2576 unsigned ptetemp;
5e8d0349 2577
984263bc
MD
2578 ptetemp = *src_pte;
2579 /*
2580 * we only virtual copy managed pages
2581 */
2582 if ((ptetemp & PG_MANAGED) != 0) {
2583 /*
2584 * We have to check after allocpte for the
2585 * pte still being around... allocpte can
2586 * block.
eec2b734
MD
2587 *
2588 * pmap_allocpte() can block. If we lose
2589 * our page directory mappings we stop.
984263bc
MD
2590 */
2591 dstmpte = pmap_allocpte(dst_pmap, addr);
eec2b734
MD
2592
2593 if (src_frame != (((unsigned) PTDpde) & PG_FRAME) ||
2594 dst_frame != (((unsigned) APTDpde) & PG_FRAME)
2595 ) {
2596 kprintf("WARNING: pmap_copy: detected and corrected race\n");
2597 pmap_unwire_pte_hold(dst_pmap, dstmpte, &info);
2598 goto failed;
17cde63e
MD
2599 } else if ((*dst_pte == 0) &&
2600 (ptetemp = *src_pte) != 0 &&
2601 (ptetemp & PG_MANAGED)) {
984263bc
MD
2602 /*
2603 * Clear the modified and
2604 * accessed (referenced) bits
2605 * during the copy.
2606 */
2607 m = PHYS_TO_VM_PAGE(ptetemp);
70fc5283 2608 *dst_pte = ptetemp & ~(PG_M | PG_A);
eec2b734 2609 ++dst_pmap->pm_stats.resident_count;
984263bc
MD
2610 pmap_insert_entry(dst_pmap, addr,
2611 dstmpte, m);
17cde63e 2612 KKASSERT(m->flags & PG_MAPPED);
984263bc 2613 } else {
17cde63e 2614 kprintf("WARNING: pmap_copy: dst_pte race detected and corrected\n");
0f7a3396 2615 pmap_unwire_pte_hold(dst_pmap, dstmpte, &info);
17cde63e 2616 goto failed;
984263bc
MD
2617 }
2618 if (dstmpte->hold_count >= srcmpte->hold_count)
2619 break;
2620 }
2621 addr += PAGE_SIZE;
2622 src_pte++;
2623 dst_pte++;
2624 }
2625 }
eec2b734 2626failed:
654a39f0 2627 crit_exit();
0f7a3396 2628 pmap_inval_flush(&info);
984263bc
MD
2629}
2630
2631/*
e0e69b7d
MD
2632 * pmap_zero_page:
2633 *
2634 * Zero the specified PA by mapping the page into KVM and clearing its
2635 * contents.
2636 *
2637 * This function may be called from an interrupt and no locking is
2638 * required.
984263bc
MD
2639 */
2640void
6ef943a3 2641pmap_zero_page(vm_paddr_t phys)
984263bc 2642{
85100692 2643 struct mdglobaldata *gd = mdcpu;
17a9f566 2644
e0e69b7d 2645 crit_enter();
85100692
MD
2646 if (*(int *)gd->gd_CMAP3)
2647 panic("pmap_zero_page: CMAP3 busy");
85100692 2648 *(int *)gd->gd_CMAP3 =
17a9f566 2649 PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
85100692 2650 cpu_invlpg(gd->gd_CADDR3);
984263bc
MD
2651
2652#if defined(I686_CPU)
2653 if (cpu_class == CPUCLASS_686)
85100692 2654 i686_pagezero(gd->gd_CADDR3);
984263bc
MD
2655 else
2656#endif
85100692 2657 bzero(gd->gd_CADDR3, PAGE_SIZE);
85100692 2658 *(int *) gd->gd_CMAP3 = 0;
e0e69b7d 2659 crit_exit();
8100156a
MD
2660}
2661
2662/*
2663 * pmap_page_assertzero:
2664 *
2665 * Assert that a page is empty, panic if it isn't.
2666 */
2667void
2668pmap_page_assertzero(vm_paddr_t phys)
2669{
2670 struct mdglobaldata *gd = mdcpu;
2671 int i;
2672
2673 crit_enter();
2674 if (*(int *)gd->gd_CMAP3)
2675 panic("pmap_zero_page: CMAP3 busy");
2676 *(int *)gd->gd_CMAP3 =
2677 PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2678 cpu_invlpg(gd->gd_CADDR3);
2679 for (i = 0; i < PAGE_SIZE; i += 4) {
2680 if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
2681 panic("pmap_page_assertzero() @ %p not zero!\n",
2682 (void *)gd->gd_CADDR3);
2683 }
2684 }
2685 *(int *) gd->gd_CMAP3 = 0;
2686 crit_exit();
984263bc
MD
2687}
2688
2689/*
e0e69b7d
MD
2690 * pmap_zero_page:
2691 *
2692 * Zero part of a physical page by mapping it into memory and clearing
2693 * its contents with bzero.
984263bc
MD
2694 *
2695 * off and size may not cover an area beyond a single hardware page.
2696 */
2697void
6ef943a3 2698pmap_zero_page_area(vm_paddr_t phys, int off, int size)
984263bc 2699{
85100692 2700 struct mdglobaldata *gd = mdcpu;
17a9f566 2701
e0e69b7d 2702 crit_enter();
85100692
MD
2703 if (*(int *) gd->gd_CMAP3)
2704 panic("pmap_zero_page: CMAP3 busy");
85100692
MD
2705 *(int *) gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2706 cpu_invlpg(gd->gd_CADDR3);
984263bc
MD
2707
2708#if defined(I686_CPU)
2709 if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
85100692 2710 i686_pagezero(gd->gd_CADDR3);
984263bc
MD
2711 else
2712#endif
85100692 2713 bzero((char *)gd->gd_CADDR3 + off, size);
85100692 2714 *(int *) gd->gd_CMAP3 = 0;
e0e69b7d 2715 crit_exit();
984263bc
MD
2716}
2717
2718/*
e0e69b7d
MD
2719 * pmap_copy_page:
2720 *
2721 * Copy the physical page from the source PA to the target PA.
2722 * This function may be called from an interrupt. No locking
2723 * is required.
984263bc
MD
2724 */
2725void
6ef943a3 2726pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
984263bc 2727{
85100692 2728 struct mdglobaldata *gd = mdcpu;
17a9f566 2729
e0e69b7d 2730 crit_enter();
85100692
MD
2731 if (*(int *) gd->gd_CMAP1)
2732 panic("pmap_copy_page: CMAP1 busy");
2733 if (*(int *) gd->gd_CMAP2)
2734 panic("pmap_copy_page: CMAP2 busy");
984263bc 2735
85100692
MD
2736 *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2737 *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
984263bc 2738
85100692
MD
2739 cpu_invlpg(gd->gd_CADDR1);
2740 cpu_invlpg(gd->gd_CADDR2);
984263bc 2741
85100692 2742 bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
984263bc 2743
85100692
MD
2744 *(int *) gd->gd_CMAP1 = 0;
2745 *(int *) gd->gd_CMAP2 = 0;
e0e69b7d 2746 crit_exit();
984263bc
MD
2747}
2748
f6bf3af1
MD
2749/*
2750 * pmap_copy_page_frag:
2751 *
2752 * Copy the physical page from the source PA to the target PA.
2753 * This function may be called from an interrupt. No locking
2754 * is required.
2755 */
2756void
2757pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
2758{
2759 struct mdglobaldata *gd = mdcpu;
2760
2761 crit_enter();
2762 if (*(int *) gd->gd_CMAP1)
2763 panic("pmap_copy_page: CMAP1 busy");
2764 if (*(int *) gd->gd_CMAP2)
2765 panic("pmap_copy_page: CMAP2 busy");
2766
2767 *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2768 *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2769
2770 cpu_invlpg(gd->gd_CADDR1);
2771 cpu_invlpg(gd->gd_CADDR2);
2772
2773 bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
2774 (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
2775 bytes);
2776
2777 *(int *) gd->gd_CMAP1 = 0;
2778 *(int *) gd->gd_CMAP2 = 0;
2779 crit_exit();
2780}
2781
984263bc
MD
2782/*
2783 * Returns true if the pmap's pv is one of the first
2784 * 16 pvs linked to from this page. This count may
2785 * be changed upwards or downwards in the future; it
2786 * is only necessary that true be returned for a small
2787 * subset of pmaps for proper page aging.
2788 */
2789boolean_t
840de426 2790pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
984263bc
MD
2791{
2792 pv_entry_t pv;
2793 int loops = 0;
984263bc
MD
2794
2795 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2796 return FALSE;
2797
9acd5bbb 2798 crit_enter();
984263bc
MD
2799
2800 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2801 if (pv->pv_pmap == pmap) {
9acd5bbb 2802 crit_exit();
984263bc
MD
2803 return TRUE;
2804 }
2805 loops++;
2806 if (loops >= 16)
2807 break;
2808 }
9acd5bbb 2809 crit_exit();
984263bc
MD
2810 return (FALSE);
2811}
2812
984263bc
MD
2813/*
2814 * Remove all pages from specified address space
2815 * this aids process exit speeds. Also, this code
2816 * is special cased for current process only, but
2817 * can have the more generic (and slightly slower)
2818 * mode enabled. This is much faster than pmap_remove
2819 * in the case of running down an entire address space.
2820 */
2821void
840de426 2822pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
984263bc 2823{
287ebb09 2824 struct lwp *lp;
984263bc
MD
2825 unsigned *pte, tpte;
2826 pv_entry_t pv, npv;
984263bc 2827 vm_page_t m;
0f7a3396 2828 pmap_inval_info info;
4a22e893 2829 int iscurrentpmap;
8790d7d8 2830 int32_t save_generation;
984263bc 2831
287ebb09
MD
2832 lp = curthread->td_lwp;
2833 if (lp && pmap == vmspace_pmap(lp->lwp_vmspace))
4a22e893
MD
2834 iscurrentpmap = 1;
2835 else
2836 iscurrentpmap = 0;
984263bc 2837
0f7a3396 2838 pmap_inval_init(&info);
9acd5bbb 2839 crit_enter();
4a22e893 2840 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
984263bc
MD
2841 if (pv->pv_va >= eva || pv->pv_va < sva) {
2842 npv = TAILQ_NEXT(pv, pv_plist);
2843 continue;
2844 }
2845
8790d7d8
MD
2846 KKASSERT(pmap == pv->pv_pmap);
2847
4a22e893
MD
2848 if (iscurrentpmap)
2849 pte = (unsigned *)vtopte(pv->pv_va);
2850 else
8790d7d8 2851 pte = pmap_pte_quick(pmap, pv->pv_va);
4a22e893 2852 if (pmap->pm_active)
8790d7d8 2853 pmap_inval_add(&info, pmap, pv->pv_va);
984263bc 2854
4a22e893
MD
2855 /*
2856 * We cannot remove wired pages from a process' mapping
2857 * at this time
2858 */
17cde63e 2859 if (*pte & PG_W) {
984263bc
MD
2860 npv = TAILQ_NEXT(pv, pv_plist);
2861 continue;
2862 }
17cde63e 2863 tpte = loadandclear(pte);
984263bc
MD
2864
2865 m = PHYS_TO_VM_PAGE(tpte);
2866
2867 KASSERT(m < &vm_page_array[vm_page_array_size],
2868 ("pmap_remove_pages: bad tpte %x", tpte));
2869
eec2b734
MD
2870 KKASSERT(pmap->pm_stats.resident_count > 0);
2871 --pmap->pm_stats.resident_count;
984263bc
MD
2872
2873 /*
2874 * Update the vm_page_t clean and reference bits.
2875 */
2876 if (tpte & PG_M) {
2877 vm_page_dirty(m);
2878 }
2879
984263bc 2880 npv = TAILQ_NEXT(pv, pv_plist);
8790d7d8
MD
2881 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2882 save_generation = ++pmap->pm_generation;
984263bc
MD
2883
2884 m->md.pv_list_count--;
2885 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
17cde63e 2886 if (TAILQ_EMPTY(&m->md.pv_list))
984263bc 2887 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
984263bc 2888
8790d7d8 2889 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem, &info);
984263bc 2890 free_pv_entry(pv);
8790d7d8
MD
2891
2892 /*
2893 * Restart the scan if we blocked during the unuse or free
2894 * calls and other removals were made.
2895 */
2896 if (save_generation != pmap->pm_generation) {
2897 kprintf("Warning: pmap_remove_pages race-A avoided\n");
2898 pv = TAILQ_FIRST(&pmap->pm_pvlist);
2899 }
984263bc 2900 }
0f7a3396 2901 pmap_inval_flush(&info);
9acd5bbb 2902 crit_exit();
984263bc
MD
2903}
2904
2905/*
2906 * pmap_testbit tests bits in pte's
5e8d0349 2907 * note that the testbit/clearbit routines are inline,
984263bc
MD
2908 * and a lot of things compile-time evaluate.
2909 */
2910static boolean_t
840de426 2911pmap_testbit(vm_page_t m, int bit)
984263bc
MD
2912{
2913 pv_entry_t pv;
2914 unsigned *pte;
984263bc
MD
2915
2916 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2917 return FALSE;
2918
2919 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2920 return FALSE;
2921
9acd5bbb 2922 crit_enter();
984263bc
MD
2923
2924 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2925 /*
2926 * if the bit being tested is the modified bit, then
2927 * mark clean_map and ptes as never
2928 * modified.
2929 */
2930 if (bit & (PG_A|PG_M)) {
2931 if (!pmap_track_modified(pv->pv_va))
2932 continue;
2933 }
2934
2935#if defined(PMAP_DIAGNOSTIC)
2936 if (!pv->pv_pmap) {
26be20a0 2937 kprintf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
984263bc
MD
2938 continue;
2939 }
2940#endif
2941 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2942 if (*pte & bit) {
9acd5bbb 2943 crit_exit();
984263bc
MD
2944 return TRUE;
2945 }
2946 }
9acd5bbb 2947 crit_exit();
984263bc
MD
2948 return (FALSE);
2949}
2950
2951/*
2952 * this routine is used to modify bits in ptes
2953 */
2954static __inline void
5e8d0349 2955pmap_clearbit(vm_page_t m, int bit)
984263bc 2956{
0f7a3396 2957 struct pmap_inval_info info;
840de426
MD
2958 pv_entry_t pv;
2959 unsigned *pte;
5e8d0349 2960 unsigned pbits;
984263bc
MD
2961
2962 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2963 return;
2964
0f7a3396 2965 pmap_inval_init(&info);
9acd5bbb 2966 crit_enter();
984263bc
MD
2967
2968 /*
2969 * Loop over all current mappings setting/clearing as appropos If
2970 * setting RO do we need to clear the VAC?
2971 */
2972 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2973 /*
2974 * don't write protect pager mappings
2975 */
5e8d0349 2976 if (bit == PG_RW) {
984263bc
MD
2977 if (!pmap_track_modified(pv->pv_va))
2978 continue;
2979 }
2980
2981#if defined(PMAP_DIAGNOSTIC)
2982 if (!pv->pv_pmap) {
26be20a0 2983 kprintf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
984263bc
MD
2984 continue;
2985 }
2986#endif
2987
0f7a3396
MD
2988 /*
2989 * Careful here. We can use a locked bus instruction to
2990 * clear PG_A or PG_M safely but we need to synchronize
2991 * with the target cpus when we mess with PG_RW.
70fc5283
MD
2992 *
2993 * We do not have to force synchronization when clearing
2994 * PG_M even for PTEs generated via virtual memory maps,
2995 * because the virtual kernel will invalidate the pmap
2996 * entry when/if it needs to resynchronize the Modify bit.
0f7a3396 2997 */
70fc5283 2998 if (bit & PG_RW)
0f7a3396 2999 pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
17cde63e
MD
3000 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3001again:
5e8d0349
MD
3002 pbits = *pte;
3003 if (pbits & bit) {
3004 if (bit == PG_RW) {
17cde63e 3005 if (pbits & PG_M) {
5e8d0349 3006 vm_page_dirty(m);
17cde63e
MD
3007 atomic_clear_int(pte, PG_M|PG_RW);
3008 } else {
3009 /*
3010 * The cpu may be trying to set PG_M
3011 * simultaniously with our clearing
3012 * of PG_RW.
3013 */
3014 if (!atomic_cmpset_int(pte, pbits,
3015 pbits & ~PG_RW))
3016 goto again;
3017 }
5e8d0349
MD
3018 } else if (bit == PG_M) {
3019 /*
70fc5283
MD
3020 * We could also clear PG_RW here to force
3021 * a fault on write to redetect PG_M for
3022 * virtual kernels, but it isn't necessary
3023 * since virtual kernels invalidate the pte
3024 * when they clear the VPTE_M bit in their
3025 * virtual page tables.
5e8d0349 3026 */
70fc5283 3027 atomic_clear_int(pte, PG_M);
5e8d0349
MD
3028 } else {
3029 atomic_clear_int(pte, bit);
984263bc
MD
3030 }
3031 }
3032 }
0f7a3396 3033 pmap_inval_flush(&info);
9acd5bbb 3034 crit_exit();
984263bc
MD
3035}
3036
3037/*
3038 * pmap_page_protect:
3039 *
3040 * Lower the permission for all mappings to a given page.
3041 */
3042void
3043pmap_page_protect(vm_page_t m, vm_prot_t prot)
3044{
3045 if ((prot & VM_PROT_WRITE) == 0) {
3046 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
5e8d0349 3047 pmap_clearbit(m, PG_RW);
17cde63e 3048 vm_page_flag_clear(m, PG_WRITEABLE);
984263bc
MD
3049 } else {
3050 pmap_remove_all(m);
3051 }
3052 }
3053}
3054
6ef943a3 3055vm_paddr_t
cfd17028 3056pmap_phys_address(vm_pindex_t ppn)
984263bc
MD
3057{
3058 return (i386_ptob(ppn));
3059}
3060
3061/*
3062 * pmap_ts_referenced:
3063 *
3064 * Return a count of reference bits for a page, clearing those bits.
3065 * It is not necessary for every reference bit to be cleared, but it
3066 * is necessary that 0 only be returned when there are truly no
3067 * reference bits set.
3068 *
3069 * XXX: The exact number of bits to check and clear is a matter that
3070 * should be tested and standardized at some point in the future for
3071 * optimal aging of shared pages.
3072 */
3073int
3074pmap_ts_referenced(vm_page_t m)
3075{
840de426 3076 pv_entry_t pv, pvf, pvn;
984263bc 3077 unsigned *pte;
984263bc
MD
3078 int rtval = 0;
3079
3080 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
3081 return (rtval);
3082
9acd5bbb 3083 crit_enter();
984263bc
MD
3084
3085 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
3086
3087 pvf = pv;
3088
3089 do {
3090 pvn = TAILQ_NEXT(pv, pv_list);
3091
3092 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
3093
3094 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
3095
3096 if (!pmap_track_modified(pv->pv_va))
3097 continue;
3098
3099 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3100
3101 if (pte && (*pte & PG_A)) {
0f7a3396
MD
3102#ifdef SMP
3103 atomic_clear_int(pte, PG_A);
3104#else
3105 atomic_clear_int_nonlocked(pte, PG_A);
3106#endif
984263bc
MD
3107 rtval++;
3108 if (rtval > 4) {
3109 break;
3110 }
3111 }
3112 } while ((pv = pvn) != NULL && pv != pvf);
3113 }
9acd5bbb 3114 crit_exit();
984263bc
MD
3115
3116 return (rtval);
3117}
3118
3119/*
3120 * pmap_is_modified:
3121 *
3122 * Return whether or not the specified physical page was modified
3123 * in any physical maps.
3124 */
3125boolean_t
3126pmap_is_modified(vm_page_t m)
3127{
3128 return pmap_testbit(m, PG_M);
3129}
3130
3131/*
3132 * Clear the modify bits on the specified physical page.
3133 */
3134void
3135pmap_clear_modify(vm_page_t m)
3136{
5e8d0349 3137 pmap_clearbit(m, PG_M);
984263bc
MD
3138}
3139
3140/*
3141 * pmap_clear_reference:
3142 *
3143 * Clear the reference bit on the specified physical page.
3144 */
3145void
3146pmap_clear_reference(vm_page_t m)
3147{
5e8d0349 3148 pmap_clearbit(m, PG_A);
984263bc
MD
3149}
3150
3151/*
3152 * Miscellaneous support routines follow
3153 */
3154
3155static void
840de426 3156i386_protection_init(void)
984263bc 3157{
840de426 3158 int *kp, prot;
984263bc
MD
3159
3160 kp = protection_codes;
3161 for (prot = 0; prot < 8; prot++) {
3162 switch (prot) {
3163 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3164 /*
3165 * Read access is also 0. There isn't any execute bit,
3166 * so just make it readable.
3167 */
3168 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3169 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3170 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3171 *kp++ = 0;
3172 break;
3173 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3174 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3175 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3176 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3177 *kp++ = PG_RW;
3178 break;
3179 }
3180 }
3181}
3182
3183/*
3184 * Map a set of physical memory pages into the kernel virtual
3185 * address space. Return a pointer to where it is mapped. This
3186 * routine is intended to be used for mapping device memory,
3187 * NOT real memory.
a2a5ad0d
MD
3188 *
3189 * NOTE: we can't use pgeflag unless we invalidate the pages one at
3190 * a time.
984263bc
MD
3191 */
3192void *
6ef943a3 3193pmap_mapdev(vm_paddr_t pa, vm_size_t size)
984263bc
MD
3194{
3195 vm_offset_t va, tmpva, offset;
3196 unsigned *pte;
3197
3198 offset = pa & PAGE_MASK;
3199 size = roundup(offset + size, PAGE_SIZE);
3200
e4846942 3201 va = kmem_alloc_nofault(&kernel_map, size);
984263bc
MD
3202 if (!va)
3203 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3204
3205 pa = pa & PG_FRAME;
3206 for (tmpva = va; size > 0;) {
3207 pte = (unsigned *)vtopte(tmpva);
a2a5ad0d 3208 *pte = pa | PG_RW | PG_V; /* | pgeflag; */
984263bc
MD
3209 size -= PAGE_SIZE;
3210 tmpva += PAGE_SIZE;
3211 pa += PAGE_SIZE;
3212 }
0f7a3396
MD
3213 cpu_invltlb();
3214 smp_invltlb();
984263bc
MD
3215
3216 return ((void *)(va + offset));
3217}
3218
3219void
840de426 3220pmap_unmapdev(vm_offset_t va, vm_size_t size)
984263bc
MD
3221{
3222 vm_offset_t base, offset;
3223
3224 base = va & PG_FRAME;
3225 offset = va & PAGE_MASK;
3226 size = roundup(offset + size, PAGE_SIZE);
0f579831 3227 pmap_qremove(va, size >> PAGE_SHIFT);
e4846942 3228 kmem_free(&kernel_map, base, size);
984263bc
MD
3229}
3230
3231/*
3232 * perform the pmap work for mincore
3233 */
3234int
840de426 3235pmap_mincore(pmap_t pmap, vm_offset_t addr)
984263bc 3236{
984263bc
MD
3237 unsigned *ptep, pte;
3238 vm_page_t m;
3239 int val = 0;
3240
3241 ptep = pmap_pte(pmap, addr);
3242 if (ptep == 0) {
3243 return 0;
3244 }
3245
3246 if ((pte = *ptep) != 0) {
3247 vm_offset_t pa;
3248
3249 val = MINCORE_INCORE;
3250 if ((pte & PG_MANAGED) == 0)
3251 return val;
3252
3253 pa = pte & PG_FRAME;
3254
3255 m = PHYS_TO_VM_PAGE(pa);
3256
3257 /*
3258 * Modified by us
3259 */
3260 if (pte & PG_M)
3261 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3262 /*
3263 * Modified by someone
3264 */
3265 else if (m->dirty || pmap_is_modified(m))
3266 val |= MINCORE_MODIFIED_OTHER;
3267 /*
3268 * Referenced by us
3269 */
3270 if (pte & PG_A)
3271 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3272
3273 /*
3274 * Referenced by someone
3275 */
3276 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
3277 val |= MINCORE_REFERENCED_OTHER;
3278 vm_page_flag_set(m, PG_REFERENCED);
3279 }
3280 }
3281 return val;
3282}
3283
e3161323
MD
3284/*
3285 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new
3286 * vmspace will be ref'd and the old one will be deref'd.
3287 *
287ebb09
MD
3288 * The vmspace for all lwps associated with the process will be adjusted
3289 * and cr3 will be reloaded if any lwp is the current lwp.
e3161323 3290 */
984263bc 3291void
e3161323 3292pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
984263bc 3293{
e3161323 3294 struct vmspace *oldvm;
287ebb09 3295 struct lwp *lp;
08f2f1bb 3296
287ebb09 3297 crit_enter();
e3161323
MD
3298 oldvm = p->p_vmspace;
3299 if (oldvm != newvm) {
e3161323 3300 p->p_vmspace = newvm;
287ebb09 3301 KKASSERT(p->p_nthreads == 1);
3e291793 3302 lp = RB_ROOT(&p->p_lwp_tree);
287ebb09
MD
3303 pmap_setlwpvm(lp, newvm);
3304 if (adjrefs) {
3305 sysref_get(&newvm->vm_sysref);
3306 sysref_put(&oldvm->vm_sysref);
3307 }
3308 }
3309 crit_exit();
3310}
3311
3312/*
3313 * Set the vmspace for a LWP. The vmspace is almost universally set the
3314 * same as the process vmspace, but virtual kernels need to swap out contexts
3315 * on a per-lwp basis.
3316 */
3317void
3318pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
3319{
3320 struct vmspace *oldvm;
3321 struct pmap *pmap;
3322
3323 crit_enter();
3324 oldvm = lp->lwp_vmspace;
3325
3326 if (oldvm != newvm) {
3327 lp->lwp_vmspace = newvm;
3328 if (curthread->td_lwp == lp) {
e3161323 3329 pmap = vmspace_pmap(newvm);
984263bc 3330#if defined(SMP)
e3161323 3331 atomic_set_int(&pmap->pm_active, 1 << mycpu->gd_cpuid);
984263bc 3332#else
e3161323 3333 pmap->pm_active |= 1;
984263bc
MD
3334#endif
3335#if defined(SWTCH_OPTIM_STATS)
e3161323 3336 tlb_flush_count++;
984263bc 3337#endif
e3161323
MD
3338 curthread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir);
3339 load_cr3(curthread->td_pcb->pcb_cr3);
3340 pmap = vmspace_pmap(oldvm);
4a22e893 3341#if defined(SMP)
e3161323
MD
3342 atomic_clear_int(&pmap->pm_active,
3343 1 << mycpu->gd_cpuid);
4a22e893 3344#else
e3161323 3345 pmap->pm_active &= ~1;
4a22e893 3346#endif
e3161323 3347 }
e3161323
MD
3348 }
3349 crit_exit();
4a22e893
MD
3350}
3351
984263bc
MD
3352vm_offset_t
3353pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
3354{
3355
3356 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3357 return addr;
3358 }
3359
3360 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3361 return addr;
3362}
3363
3364
984263bc
MD
3365#if defined(DEBUG)
3366
3ae0cd58 3367static void pads (pmap_t pm);
c469b1c4 3368void pmap_pvdump (vm_paddr_t pa);
984263bc
MD
3369
3370/* print address space of pmap*/
3371static void
840de426 3372pads(pmap_t pm)
984263bc
MD
3373{
3374 unsigned va, i, j;
3375 unsigned *ptep;
3376
fbbaeba3 3377 if (pm == &kernel_pmap)
984263bc 3378 return;
eec2b734
MD
3379 crit_enter();
3380 for (i = 0; i < 1024; i++) {
3381 if (pm->pm_pdir[i]) {
984263bc
MD
3382 for (j = 0; j < 1024; j++) {
3383 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
fbbaeba3 3384 if (pm == &kernel_pmap && va < KERNBASE)
984263bc 3385 continue;
fbbaeba3 3386 if (pm != &kernel_pmap && va > UPT_MAX_ADDRESS)
984263bc
MD
3387 continue;
3388 ptep = pmap_pte_quick(pm, va);
3389 if (pmap_pte_v(ptep))
26be20a0 3390 kprintf("%x:%x ", va, *(int *) ptep);
984263bc 3391 };
eec2b734
MD
3392 }
3393 }
3394 crit_exit();
984263bc
MD
3395