kernel - Attempt to fix i386 wire_count panic
[dragonfly.git] / sys / platform / pc32 / i386 / pmap.c
CommitLineData
984263bc 1/*
4107b0c0
MD
2 * (MPSAFE)
3 *
984263bc
MD
4 * Copyright (c) 1991 Regents of the University of California.
5 * All rights reserved.
6 * Copyright (c) 1994 John S. Dyson
7 * All rights reserved.
8 * Copyright (c) 1994 David Greenman
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
45 */
46
47/*
4107b0c0 48 * Manages physical address maps.
984263bc 49 *
b12defdc 50 * In most cases we hold page table pages busy in order to manipulate them.
984263bc 51 */
5926987a
MD
52/*
53 * PMAP_DEBUG - see platform/pc32/include/pmap.h
54 */
984263bc
MD
55
56#include "opt_disable_pse.h"
57#include "opt_pmap.h"
58#include "opt_msgbuf.h"
984263bc
MD
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/kernel.h>
63#include <sys/proc.h>
64#include <sys/msgbuf.h>
65#include <sys/vmmeter.h>
66#include <sys/mman.h>
b12defdc 67#include <sys/thread.h>
984263bc
MD
68
69#include <vm/vm.h>
70#include <vm/vm_param.h>
71#include <sys/sysctl.h>
72#include <sys/lock.h>
73#include <vm/vm_kern.h>
74#include <vm/vm_page.h>
75#include <vm/vm_map.h>
76#include <vm/vm_object.h>
77#include <vm/vm_extern.h>
78#include <vm/vm_pageout.h>
79#include <vm/vm_pager.h>
80#include <vm/vm_zone.h>
81
82#include <sys/user.h>
e0e69b7d 83#include <sys/thread2.h>
e3161323 84#include <sys/sysref2.h>
b12defdc 85#include <sys/spinlock2.h>
90244566 86#include <vm/vm_page2.h>
984263bc
MD
87
88#include <machine/cputypes.h>
89#include <machine/md_var.h>
90#include <machine/specialreg.h>
984263bc 91#include <machine/smp.h>
a9295349 92#include <machine_base/apic/apicreg.h>
85100692 93#include <machine/globaldata.h>
0f7a3396
MD
94#include <machine/pmap.h>
95#include <machine/pmap_inval.h>
984263bc
MD
96
97#define PMAP_KEEP_PDIRS
98#ifndef PMAP_SHPGPERPROC
99#define PMAP_SHPGPERPROC 200
948209ce 100#define PMAP_PVLIMIT 1400000 /* i386 kvm problems */
984263bc
MD
101#endif
102
103#if defined(DIAGNOSTIC)
104#define PMAP_DIAGNOSTIC
105#endif
106
107#define MINPV 2048
108
109#if !defined(PMAP_DIAGNOSTIC)
110#define PMAP_INLINE __inline
111#else
112#define PMAP_INLINE
113#endif
114
115/*
116 * Get PDEs and PTEs for user/kernel address space
117 */
118#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
119#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
120
121#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
122#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
123#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
124#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
125#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
126
984263bc
MD
127/*
128 * Given a map and a machine independent protection code,
129 * convert to a vax protection code.
130 */
639a9b43
MD
131#define pte_prot(m, p) \
132 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)])
984263bc
MD
133static int protection_codes[8];
134
fbbaeba3 135struct pmap kernel_pmap;
54a764e8
MD
136static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list);
137
e880033d 138vm_paddr_t avail_start; /* PA of first available physical page */
6ef943a3 139vm_paddr_t avail_end; /* PA of last available physical page */
e880033d 140vm_offset_t virtual_start; /* VA of first avail page (after kernel bss) */
984263bc 141vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
791c6551
MD
142vm_offset_t virtual2_start;
143vm_offset_t virtual2_end;
c439ad8f
MD
144vm_offset_t KvaStart; /* VA start of KVA space */
145vm_offset_t KvaEnd; /* VA end of KVA space (non-inclusive) */
146vm_offset_t KvaSize; /* max size of kernel virtual address space */
984263bc
MD
147static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
148static int pgeflag; /* PG_G or-in */
149static int pseflag; /* PG_PS or-in */
150
151static vm_object_t kptobj;
152
153static int nkpt;
154vm_offset_t kernel_vm_end;
155
156/*
157 * Data for the pv entry allocation mechanism
158 */
159static vm_zone_t pvzone;
160static struct vm_zone pvzone_store;
161static struct vm_object pvzone_obj;
162static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
163static int pmap_pagedaemon_waken = 0;
164static struct pv_entry *pvinit;
165
166/*
a93980ab
MD
167 * Considering all the issues I'm having with pmap caching, if breakage
168 * continues to occur, and for debugging, I've added a sysctl that will
169 * just do an unconditional invltlb.
170 */
171static int dreadful_invltlb;
172
173SYSCTL_INT(_vm, OID_AUTO, dreadful_invltlb,
9733f757 174 CTLFLAG_RW, &dreadful_invltlb, 0, "Debugging sysctl to force invltlb on pmap operations");
a93980ab
MD
175
176/*
984263bc
MD
177 * All those kernel PT submaps that BSD is so fond of
178 */
4090d6ff 179pt_entry_t *CMAP1 = NULL, *ptmmap;
984263bc 180caddr_t CADDR1 = 0, ptvmmap = 0;
984263bc 181static pt_entry_t *msgbufmap;
4090d6ff 182struct msgbuf *msgbufp=NULL;
984263bc
MD
183
184/*
185 * Crashdump maps.
186 */
187static pt_entry_t *pt_crashdumpmap;
188static caddr_t crashdumpmap;
189
984263bc 190extern pt_entry_t *SMPpt;
984263bc 191
3ae0cd58
RG
192static PMAP_INLINE void free_pv_entry (pv_entry_t pv);
193static unsigned * get_ptbase (pmap_t pmap);
194static pv_entry_t get_pv_entry (void);
195static void i386_protection_init (void);
5e8d0349 196static __inline void pmap_clearbit (vm_page_t m, int bit);
3ae0cd58
RG
197
198static void pmap_remove_all (vm_page_t m);
0f7a3396
MD
199static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq,
200 vm_offset_t sva, pmap_inval_info_t info);
201static void pmap_remove_page (struct pmap *pmap,
202 vm_offset_t va, pmap_inval_info_t info);
3ae0cd58 203static int pmap_remove_entry (struct pmap *pmap, vm_page_t m,
0f7a3396 204 vm_offset_t va, pmap_inval_info_t info);
3ae0cd58 205static boolean_t pmap_testbit (vm_page_t m, int bit);
2bb9cc6f
MD
206static void pmap_insert_entry (pmap_t pmap, pv_entry_t pv,
207 vm_offset_t va, vm_page_t mpte, vm_page_t m);
3ae0cd58
RG
208
209static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va);
210
211static int pmap_release_free_page (pmap_t pmap, vm_page_t p);
212static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex);
213static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va);
214static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex);
0f7a3396 215static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t);
984263bc
MD
216static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
217
218static unsigned pdir4mb;
219
220/*
840de426
MD
221 * Move the kernel virtual free pointer to the next
222 * 4MB. This is used to help improve performance
223 * by using a large (4MB) page for much of the kernel
224 * (.text, .data, .bss)
225 */
4107b0c0
MD
226static
227vm_offset_t
840de426
MD
228pmap_kmem_choose(vm_offset_t addr)
229{
230 vm_offset_t newaddr = addr;
231#ifndef DISABLE_PSE
232 if (cpu_feature & CPUID_PSE) {
233 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
234 }
235#endif
236 return newaddr;
237}
238
239/*
4107b0c0
MD
240 * This function returns a pointer to the pte entry in the pmap and has
241 * the side effect of potentially retaining a cached mapping of the pmap.
e0e69b7d 242 *
4107b0c0
MD
243 * The caller must hold vm_token and the returned value is only valid
244 * until the caller blocks or releases the token.
984263bc 245 */
4107b0c0
MD
246static
247unsigned *
840de426 248pmap_pte(pmap_t pmap, vm_offset_t va)
984263bc
MD
249{
250 unsigned *pdeaddr;
251
4107b0c0 252 ASSERT_LWKT_TOKEN_HELD(&vm_token);
984263bc
MD
253 if (pmap) {
254 pdeaddr = (unsigned *) pmap_pde(pmap, va);
255 if (*pdeaddr & PG_PS)
256 return pdeaddr;
4107b0c0 257 if (*pdeaddr)
984263bc 258 return get_ptbase(pmap) + i386_btop(va);
984263bc
MD
259 }
260 return (0);
261}
262
263/*
4107b0c0
MD
264 * pmap_pte using the kernel_pmap
265 *
266 * Used for debugging, no requirements.
267 */
268unsigned *
269pmap_kernel_pte(vm_offset_t va)
270{
271 unsigned *pdeaddr;
272
273 pdeaddr = (unsigned *) pmap_pde(&kernel_pmap, va);
274 if (*pdeaddr & PG_PS)
275 return pdeaddr;
276 if (*pdeaddr)
277 return (unsigned *)vtopte(va);
278 return(0);
279}
280
281/*
e0e69b7d
MD
282 * pmap_pte_quick:
283 *
c1692ddf
MD
284 * Super fast pmap_pte routine best used when scanning the pv lists.
285 * This eliminates many course-grained invltlb calls. Note that many of
286 * the pv list scans are across different pmaps and it is very wasteful
287 * to do an entire invltlb when checking a single mapping.
e0e69b7d 288 *
c1692ddf
MD
289 * Should only be called while in a critical section.
290 *
4107b0c0
MD
291 * The caller must hold vm_token and the returned value is only valid
292 * until the caller blocks or releases the token.
984263bc 293 */
4107b0c0
MD
294static
295unsigned *
840de426 296pmap_pte_quick(pmap_t pmap, vm_offset_t va)
984263bc 297{
840de426
MD
298 struct mdglobaldata *gd = mdcpu;
299 unsigned pde, newpf;
300
4107b0c0 301 ASSERT_LWKT_TOKEN_HELD(&vm_token);
840de426
MD
302 if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
303 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
304 unsigned index = i386_btop(va);
305 /* are we current address space or kernel? */
fbbaeba3 306 if ((pmap == &kernel_pmap) ||
840de426
MD
307 (frame == (((unsigned) PTDpde) & PG_FRAME))) {
308 return (unsigned *) PTmap + index;
309 }
310 newpf = pde & PG_FRAME;
4107b0c0
MD
311 if (((*(unsigned *)gd->gd_PMAP1) & PG_FRAME) != newpf) {
312 *(unsigned *)gd->gd_PMAP1 = newpf | PG_RW | PG_V;
840de426
MD
313 cpu_invlpg(gd->gd_PADDR1);
314 }
06bb314f 315 return gd->gd_PADDR1 + (index & (NPTEPG - 1));
984263bc 316 }
840de426 317 return (0);
984263bc
MD
318}
319
840de426 320
984263bc 321/*
4107b0c0 322 * Bootstrap the system enough to run with virtual memory.
984263bc 323 *
4107b0c0
MD
324 * On the i386 this is called after mapping has already been enabled
325 * and just syncs the pmap module with what has already been done.
326 * [We can't call it easily with mapping off since the kernel is not
327 * mapped with PA == VA, hence we would have to relocate every address
328 * from the linked base (virtual) address "KERNBASE" to the actual
329 * (physical) address starting relative to 0]
984263bc
MD
330 */
331void
f123d5a1 332pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr)
984263bc
MD
333{
334 vm_offset_t va;
335 pt_entry_t *pte;
85100692 336 struct mdglobaldata *gd;
984263bc 337 int i;
81c04d07 338 int pg;
984263bc 339
c439ad8f
MD
340 KvaStart = (vm_offset_t)VADDR(PTDPTDI, 0);
341 KvaSize = (vm_offset_t)VADDR(APTDPTDI, 0) - KvaStart;
342 KvaEnd = KvaStart + KvaSize;
343
984263bc
MD
344 avail_start = firstaddr;
345
346 /*
e880033d
MD
347 * XXX The calculation of virtual_start is wrong. It's NKPT*PAGE_SIZE
348 * too large. It should instead be correctly calculated in locore.s and
984263bc
MD
349 * not based on 'first' (which is a physical address, not a virtual
350 * address, for the start of unused physical memory). The kernel
351 * page tables are NOT double mapped and thus should not be included
352 * in this calculation.
353 */
e880033d
MD
354 virtual_start = (vm_offset_t) KERNBASE + firstaddr;
355 virtual_start = pmap_kmem_choose(virtual_start);
c439ad8f 356 virtual_end = VADDR(KPTDI+NKPDE-1, NPTEPG-1);
984263bc
MD
357
358 /*
359 * Initialize protection array.
360 */
361 i386_protection_init();
362
363 /*
364 * The kernel's pmap is statically allocated so we don't have to use
365 * pmap_create, which is unlikely to work correctly at this part of
366 * the boot sequence (XXX and which no longer exists).
b12defdc
MD
367 *
368 * The kernel_pmap's pm_pteobj is used only for locking and not
369 * for mmu pages.
984263bc 370 */
fbbaeba3
MD
371 kernel_pmap.pm_pdir = (pd_entry_t *)(KERNBASE + (u_int)IdlePTD);
372 kernel_pmap.pm_count = 1;
c2fb025d 373 kernel_pmap.pm_active = (cpumask_t)-1 & ~CPUMASK_LOCK;
b12defdc 374 kernel_pmap.pm_pteobj = &kernel_object;
fbbaeba3 375 TAILQ_INIT(&kernel_pmap.pm_pvlist);
b12defdc
MD
376 TAILQ_INIT(&kernel_pmap.pm_pvlist_free);
377 spin_init(&kernel_pmap.pm_spin);
378 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok");
984263bc
MD
379 nkpt = NKPT;
380
381 /*
382 * Reserve some special page table entries/VA space for temporary
383 * mapping of pages.
384 */
385#define SYSMAP(c, p, v, n) \
386 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
387
e880033d 388 va = virtual_start;
4107b0c0 389 pte = (pt_entry_t *) pmap_kernel_pte(va);
984263bc
MD
390
391 /*
392 * CMAP1/CMAP2 are used for zeroing and copying pages.
393 */
394 SYSMAP(caddr_t, CMAP1, CADDR1, 1)
984263bc
MD
395
396 /*
397 * Crashdump maps.
398 */
399 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
400
401 /*
e731d345
MD
402 * ptvmmap is used for reading arbitrary physical pages via
403 * /dev/mem.
404 */
405 SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
406
407 /*
984263bc
MD
408 * msgbufp is used to map the system message buffer.
409 * XXX msgbufmap is not used.
410 */
411 SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
412 atop(round_page(MSGBUF_SIZE)))
413
e880033d 414 virtual_start = va;
984263bc 415
17a9f566 416 *(int *) CMAP1 = 0;
984263bc
MD
417 for (i = 0; i < NKPT; i++)
418 PTD[i] = 0;
419
a2a5ad0d
MD
420 /*
421 * PG_G is terribly broken on SMP because we IPI invltlb's in some
422 * cases rather then invl1pg. Actually, I don't even know why it
423 * works under UP because self-referential page table mappings
424 */
425#ifdef SMP
426 pgeflag = 0;
427#else
428 if (cpu_feature & CPUID_PGE)
984263bc 429 pgeflag = PG_G;
a2a5ad0d 430#endif
984263bc
MD
431
432/*
433 * Initialize the 4MB page size flag
434 */
435 pseflag = 0;
436/*
437 * The 4MB page version of the initial
438 * kernel page mapping.
439 */
440 pdir4mb = 0;
441
442#if !defined(DISABLE_PSE)
443 if (cpu_feature & CPUID_PSE) {
444 unsigned ptditmp;
445 /*
446 * Note that we have enabled PSE mode
447 */
448 pseflag = PG_PS;
449 ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
450 ptditmp &= ~(NBPDR - 1);
451 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
452 pdir4mb = ptditmp;
453
8a8d5d85
MD
454#ifndef SMP
455 /*
456 * Enable the PSE mode. If we are SMP we can't do this
457 * now because the APs will not be able to use it when
458 * they boot up.
459 */
460 load_cr4(rcr4() | CR4_PSE);
984263bc 461
8a8d5d85
MD
462 /*
463 * We can do the mapping here for the single processor
464 * case. We simply ignore the old page table page from
465 * now on.
466 */
467 /*
468 * For SMP, we still need 4K pages to bootstrap APs,
469 * PSE will be enabled as soon as all APs are up.
470 */
b5b32410 471 PTD[KPTDI] = (pd_entry_t)ptditmp;
fbbaeba3 472 kernel_pmap.pm_pdir[KPTDI] = (pd_entry_t)ptditmp;
0f7a3396 473 cpu_invltlb();
8a8d5d85 474#endif
984263bc
MD
475 }
476#endif
984263bc 477
81c04d07
MD
478 /*
479 * We need to finish setting up the globaldata page for the BSP.
480 * locore has already populated the page table for the mdglobaldata
481 * portion.
482 */
483 pg = MDGLOBALDATA_BASEALLOC_PAGES;
85100692 484 gd = &CPU_prvspace[0].mdglobaldata;
81c04d07
MD
485 gd->gd_CMAP1 = &SMPpt[pg + 0];
486 gd->gd_CMAP2 = &SMPpt[pg + 1];
487 gd->gd_CMAP3 = &SMPpt[pg + 2];
488 gd->gd_PMAP1 = &SMPpt[pg + 3];
9388fcaa 489 gd->gd_GDMAP1 = &PTD[APTDPTDI];
85100692
MD
490 gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1;
491 gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2;
492 gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3;
493 gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1;
9388fcaa 494 gd->gd_GDADDR1= (unsigned *)VADDR(APTDPTDI, 0);
984263bc 495
0f7a3396 496 cpu_invltlb();
984263bc
MD
497}
498
499#ifdef SMP
500/*
501 * Set 4mb pdir for mp startup
502 */
503void
504pmap_set_opt(void)
505{
506 if (pseflag && (cpu_feature & CPUID_PSE)) {
507 load_cr4(rcr4() | CR4_PSE);
72740893 508 if (pdir4mb && mycpu->gd_cpuid == 0) { /* only on BSP */
fbbaeba3 509 kernel_pmap.pm_pdir[KPTDI] =
984263bc
MD
510 PTD[KPTDI] = (pd_entry_t)pdir4mb;
511 cpu_invltlb();
512 }
513 }
514}
515#endif
516
517/*
4107b0c0
MD
518 * Initialize the pmap module, called by vm_init()
519 *
520 * Called from the low level boot code only.
984263bc
MD
521 */
522void
e7252eda 523pmap_init(void)
984263bc
MD
524{
525 int i;
526 int initial_pvs;
527
528 /*
529 * object for kernel page table pages
530 */
531 kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
532
533 /*
534 * Allocate memory for random pmap data structures. Includes the
535 * pv_head_table.
536 */
537
538 for(i = 0; i < vm_page_array_size; i++) {
539 vm_page_t m;
540
541 m = &vm_page_array[i];
542 TAILQ_INIT(&m->md.pv_list);
543 m->md.pv_list_count = 0;
544 }
545
546 /*
547 * init the pv free list
548 */
549 initial_pvs = vm_page_array_size;
550 if (initial_pvs < MINPV)
551 initial_pvs = MINPV;
552 pvzone = &pvzone_store;
948209ce
MD
553 pvinit = (void *)kmem_alloc(&kernel_map,
554 initial_pvs * sizeof (struct pv_entry));
555 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry),
556 pvinit, initial_pvs);
984263bc
MD
557
558 /*
559 * Now it is safe to enable pv_table recording.
560 */
561 pmap_initialized = TRUE;
562}
563
564/*
565 * Initialize the address space (zone) for the pv_entries. Set a
566 * high water mark so that the system can recover from excessive
567 * numbers of pv entries.
4107b0c0
MD
568 *
569 * Called from the low level boot code only.
984263bc
MD
570 */
571void
f123d5a1 572pmap_init2(void)
984263bc
MD
573{
574 int shpgperproc = PMAP_SHPGPERPROC;
948209ce 575 int entry_max;
984263bc
MD
576
577 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
578 pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
948209ce
MD
579
580#ifdef PMAP_PVLIMIT
581 /*
582 * Horrible hack for systems with a lot of memory running i386.
583 * the calculated pv_entry_max can wind up eating a ton of KVM
584 * so put a cap on the number of entries if the user did not
585 * change any of the values. This saves about 44MB of KVM on
586 * boxes with 3+GB of ram.
587 *
588 * On the flip side, this makes it more likely that some setups
589 * will run out of pv entries. Those sysads will have to bump
590 * the limit up with vm.pamp.pv_entries or vm.pmap.shpgperproc.
591 */
592 if (shpgperproc == PMAP_SHPGPERPROC) {
593 if (pv_entry_max > PMAP_PVLIMIT)
594 pv_entry_max = PMAP_PVLIMIT;
595 }
596#endif
984263bc
MD
597 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
598 pv_entry_high_water = 9 * (pv_entry_max / 10);
948209ce
MD
599
600 /*
601 * Subtract out pages already installed in the zone (hack)
602 */
603 entry_max = pv_entry_max - vm_page_array_size;
604 if (entry_max <= 0)
605 entry_max = 1;
606
607 zinitna(pvzone, &pvzone_obj, NULL, 0, entry_max, ZONE_INTERRUPT, 1);
984263bc
MD
608}
609
610
611/***************************************************
612 * Low level helper routines.....
613 ***************************************************/
614
5926987a
MD
615#ifdef PMAP_DEBUG
616
617static void
618test_m_maps_pv(vm_page_t m, pv_entry_t pv)
619{
620 pv_entry_t spv;
621
74b9d1ec 622 crit_enter();
5926987a
MD
623#ifdef PMAP_DEBUG
624 KKASSERT(pv->pv_m == m);
625#endif
626 TAILQ_FOREACH(spv, &m->md.pv_list, pv_list) {
74b9d1ec
MD
627 if (pv == spv) {
628 crit_exit();
5926987a 629 return;
74b9d1ec 630 }
5926987a 631 }
74b9d1ec 632 crit_exit();
ed20d0e3 633 panic("test_m_maps_pv: failed m %p pv %p", m, pv);
5926987a
MD
634}
635
636static void
637ptbase_assert(struct pmap *pmap)
638{
639 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
640
641 /* are we current address space or kernel? */
4107b0c0 642 if (pmap == &kernel_pmap || frame == (((unsigned)PTDpde) & PG_FRAME))
5926987a 643 return;
3558dcda 644 KKASSERT(frame == (*mdcpu->gd_GDMAP1 & PG_FRAME));
5926987a
MD
645}
646
647#else
648
649#define test_m_maps_pv(m, pv)
650#define ptbase_assert(pmap)
651
652#endif
653
984263bc
MD
654#if defined(PMAP_DIAGNOSTIC)
655
656/*
657 * This code checks for non-writeable/modified pages.
658 * This should be an invalid condition.
659 */
660static int
661pmap_nw_modified(pt_entry_t ptea)
662{
663 int pte;
664
665 pte = (int) ptea;
666
667 if ((pte & (PG_M|PG_RW)) == PG_M)
668 return 1;
669 else
670 return 0;
671}
672#endif
673
674
675/*
4107b0c0
MD
676 * This routine defines the region(s) of memory that should not be tested
677 * for the modified bit.
678 *
679 * No requirements.
984263bc
MD
680 */
681static PMAP_INLINE int
682pmap_track_modified(vm_offset_t va)
683{
684 if ((va < clean_sva) || (va >= clean_eva))
685 return 1;
686 else
687 return 0;
688}
689
c1692ddf
MD
690/*
691 * Retrieve the mapped page table base for a particular pmap. Use our self
692 * mapping for the kernel_pmap or our current pmap.
693 *
694 * For foreign pmaps we use the per-cpu page table map. Since this involves
695 * installing a ptd it's actually (per-process x per-cpu). However, we
696 * still cannot depend on our mapping to survive thread switches because
697 * the process might be threaded and switching to another thread for the
698 * same process on the same cpu will allow that other thread to make its
699 * own mapping.
700 *
701 * This could be a bit confusing but the jist is for something like the
702 * vkernel which uses foreign pmaps all the time this represents a pretty
703 * good cache that avoids unnecessary invltlb()s.
4107b0c0
MD
704 *
705 * The caller must hold vm_token and the returned value is only valid
706 * until the caller blocks or releases the token.
c1692ddf 707 */
984263bc 708static unsigned *
e0e69b7d 709get_ptbase(pmap_t pmap)
984263bc
MD
710{
711 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
c1692ddf 712 struct mdglobaldata *gd = mdcpu;
984263bc 713
4107b0c0
MD
714 ASSERT_LWKT_TOKEN_HELD(&vm_token);
715
5926987a
MD
716 /*
717 * We can use PTmap if the pmap is our current address space or
718 * the kernel address space.
719 */
fbbaeba3 720 if (pmap == &kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
984263bc
MD
721 return (unsigned *) PTmap;
722 }
e0e69b7d 723
5926987a 724 /*
c1692ddf
MD
725 * Otherwise we use the per-cpu alternative page table map. Each
726 * cpu gets its own map. Because of this we cannot use this map
727 * from interrupts or threads which can preempt.
be3aecf7
MD
728 *
729 * Even if we already have the map cached we may still have to
730 * invalidate the TLB if another cpu modified a PDE in the map.
5926987a 731 */
c1692ddf
MD
732 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
733 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
e0e69b7d 734
c1692ddf
MD
735 if ((*gd->gd_GDMAP1 & PG_FRAME) != frame) {
736 *gd->gd_GDMAP1 = frame | PG_RW | PG_V;
be3aecf7
MD
737 pmap->pm_cached |= gd->mi.gd_cpumask;
738 cpu_invltlb();
739 } else if ((pmap->pm_cached & gd->mi.gd_cpumask) == 0) {
740 pmap->pm_cached |= gd->mi.gd_cpumask;
984263bc 741 cpu_invltlb();
a93980ab
MD
742 } else if (dreadful_invltlb) {
743 cpu_invltlb();
984263bc 744 }
c1692ddf 745 return ((unsigned *)gd->gd_GDADDR1);
984263bc
MD
746}
747
748/*
e0e69b7d
MD
749 * pmap_extract:
750 *
4107b0c0 751 * Extract the physical page address associated with the map/VA pair.
e0e69b7d 752 *
4107b0c0 753 * The caller may hold vm_token if it desires non-blocking operation.
984263bc 754 */
6ef943a3 755vm_paddr_t
840de426 756pmap_extract(pmap_t pmap, vm_offset_t va)
984263bc
MD
757{
758 vm_offset_t rtval;
759 vm_offset_t pdirindex;
840de426 760
4107b0c0 761 lwkt_gettoken(&vm_token);
984263bc
MD
762 pdirindex = va >> PDRSHIFT;
763 if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
764 unsigned *pte;
765 if ((rtval & PG_PS) != 0) {
766 rtval &= ~(NBPDR - 1);
767 rtval |= va & (NBPDR - 1);
4107b0c0
MD
768 } else {
769 pte = get_ptbase(pmap) + i386_btop(va);
770 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
984263bc 771 }
4107b0c0
MD
772 } else {
773 rtval = 0;
984263bc 774 }
4107b0c0
MD
775 lwkt_reltoken(&vm_token);
776 return rtval;
f6bf3af1
MD
777}
778
984263bc
MD
779/***************************************************
780 * Low level mapping routines.....
781 ***************************************************/
782
783/*
4107b0c0
MD
784 * Map a wired VM page to a KVA, fully SMP synchronized.
785 *
786 * No requirements, non blocking.
984263bc 787 */
24712b90 788void
6ef943a3 789pmap_kenter(vm_offset_t va, vm_paddr_t pa)
984263bc 790{
840de426 791 unsigned *pte;
0f7a3396
MD
792 unsigned npte;
793 pmap_inval_info info;
984263bc 794
0f7a3396 795 pmap_inval_init(&info);
984263bc
MD
796 npte = pa | PG_RW | PG_V | pgeflag;
797 pte = (unsigned *)vtopte(va);
c2fb025d 798 pmap_inval_interlock(&info, &kernel_pmap, va);
984263bc 799 *pte = npte;
c2fb025d
MD
800 pmap_inval_deinterlock(&info, &kernel_pmap);
801 pmap_inval_done(&info);
984263bc
MD
802}
803
6d1ec6fa 804/*
4107b0c0
MD
805 * Map a wired VM page to a KVA, synchronized on current cpu only.
806 *
807 * No requirements, non blocking.
6d1ec6fa 808 */
24712b90
MD
809void
810pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
811{
812 unsigned *pte;
813 unsigned npte;
814
815 npte = pa | PG_RW | PG_V | pgeflag;
816 pte = (unsigned *)vtopte(va);
817 *pte = npte;
818 cpu_invlpg((void *)va);
819}
820
4107b0c0
MD
821/*
822 * Synchronize a previously entered VA on all cpus.
823 *
824 * No requirements, non blocking.
825 */
24712b90
MD
826void
827pmap_kenter_sync(vm_offset_t va)
828{
829 pmap_inval_info info;
830
831 pmap_inval_init(&info);
c2fb025d
MD
832 pmap_inval_interlock(&info, &kernel_pmap, va);
833 pmap_inval_deinterlock(&info, &kernel_pmap);
834 pmap_inval_done(&info);
24712b90
MD
835}
836
4107b0c0
MD
837/*
838 * Synchronize a previously entered VA on the current cpu only.
839 *
840 * No requirements, non blocking.
841 */
24712b90
MD
842void
843pmap_kenter_sync_quick(vm_offset_t va)
844{
845 cpu_invlpg((void *)va);
846}
847
984263bc 848/*
4107b0c0
MD
849 * Remove a page from the kernel pagetables, fully SMP synchronized.
850 *
851 * No requirements, non blocking.
984263bc 852 */
24712b90 853void
840de426 854pmap_kremove(vm_offset_t va)
984263bc 855{
840de426 856 unsigned *pte;
0f7a3396 857 pmap_inval_info info;
984263bc 858
0f7a3396 859 pmap_inval_init(&info);
984263bc 860 pte = (unsigned *)vtopte(va);
c2fb025d 861 pmap_inval_interlock(&info, &kernel_pmap, va);
984263bc 862 *pte = 0;
c2fb025d
MD
863 pmap_inval_deinterlock(&info, &kernel_pmap);
864 pmap_inval_done(&info);
984263bc
MD
865}
866
4107b0c0
MD
867/*
868 * Remove a page from the kernel pagetables, synchronized on current cpu only.
869 *
870 * No requirements, non blocking.
871 */
24712b90
MD
872void
873pmap_kremove_quick(vm_offset_t va)
874{
875 unsigned *pte;
876 pte = (unsigned *)vtopte(va);
877 *pte = 0;
878 cpu_invlpg((void *)va);
879}
880
984263bc 881/*
4107b0c0
MD
882 * Adjust the permissions of a page in the kernel page table,
883 * synchronized on the current cpu only.
884 *
885 * No requirements, non blocking.
9ad680a3
MD
886 */
887void
888pmap_kmodify_rw(vm_offset_t va)
889{
4107b0c0 890 atomic_set_int(vtopte(va), PG_RW);
9ad680a3
MD
891 cpu_invlpg((void *)va);
892}
893
4107b0c0
MD
894/*
895 * Adjust the permissions of a page in the kernel page table,
896 * synchronized on the current cpu only.
897 *
898 * No requirements, non blocking.
899 */
9ad680a3
MD
900void
901pmap_kmodify_nc(vm_offset_t va)
902{
4107b0c0 903 atomic_set_int(vtopte(va), PG_N);
9ad680a3
MD
904 cpu_invlpg((void *)va);
905}
906
907/*
4107b0c0 908 * Map a range of physical addresses into kernel virtual address space.
984263bc 909 *
4107b0c0 910 * No requirements, non blocking.
984263bc
MD
911 */
912vm_offset_t
8e5e6f1b 913pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot)
984263bc 914{
8e5e6f1b
AH
915 vm_offset_t sva, virt;
916
917 sva = virt = *virtp;
984263bc
MD
918 while (start < end) {
919 pmap_kenter(virt, start);
920 virt += PAGE_SIZE;
921 start += PAGE_SIZE;
922 }
8e5e6f1b
AH
923 *virtp = virt;
924 return (sva);
984263bc
MD
925}
926
984263bc 927/*
4107b0c0
MD
928 * Add a list of wired pages to the kva, fully SMP synchronized.
929 *
930 * No requirements, non blocking.
984263bc
MD
931 */
932void
840de426 933pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
984263bc
MD
934{
935 vm_offset_t end_va;
936
937 end_va = va + count * PAGE_SIZE;
938
939 while (va < end_va) {
940 unsigned *pte;
941
942 pte = (unsigned *)vtopte(va);
943 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
984263bc 944 cpu_invlpg((void *)va);
984263bc
MD
945 va += PAGE_SIZE;
946 m++;
947 }
948#ifdef SMP
0f7a3396 949 smp_invltlb(); /* XXX */
984263bc
MD
950#endif
951}
952
953/*
4107b0c0 954 * Remove pages from KVA, fully SMP synchronized.
7155fc7d 955 *
4107b0c0 956 * No requirements, non blocking.
984263bc
MD
957 */
958void
840de426 959pmap_qremove(vm_offset_t va, int count)
984263bc
MD
960{
961 vm_offset_t end_va;
962
963 end_va = va + count*PAGE_SIZE;
964
965 while (va < end_va) {
966 unsigned *pte;
967
968 pte = (unsigned *)vtopte(va);
969 *pte = 0;
984263bc 970 cpu_invlpg((void *)va);
984263bc
MD
971 va += PAGE_SIZE;
972 }
973#ifdef SMP
974 smp_invltlb();
975#endif
976}
977
06ecca5a
MD
978/*
979 * This routine works like vm_page_lookup() but also blocks as long as the
980 * page is busy. This routine does not busy the page it returns.
981 *
b12defdc 982 * The caller must hold the object.
06ecca5a 983 */
984263bc 984static vm_page_t
840de426 985pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
984263bc
MD
986{
987 vm_page_t m;
06ecca5a 988
b12defdc
MD
989 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
990 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp");
17cde63e 991
06ecca5a 992 return(m);
984263bc
MD
993}
994
995/*
263e4574 996 * Create a new thread and optionally associate it with a (new) process.
6ef943a3 997 * NOTE! the new thread's cpu may not equal the current cpu.
263e4574 998 */
7d0bac62
MD
999void
1000pmap_init_thread(thread_t td)
263e4574 1001{
f470d0c8 1002 /* enforce pcb placement */
f470d0c8 1003 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1;
65d6ce10 1004 td->td_savefpu = &td->td_pcb->pcb_save;
7d0bac62 1005 td->td_sp = (char *)td->td_pcb - 16;
263e4574
MD
1006}
1007
1008/*
984263bc
MD
1009 * This routine directly affects the fork perf for a process.
1010 */
1011void
13d13d89 1012pmap_init_proc(struct proc *p)
984263bc 1013{
984263bc
MD
1014}
1015
984263bc
MD
1016/***************************************************
1017 * Page table page management routines.....
1018 ***************************************************/
1019
1020/*
90244566
MD
1021 * This routine unwires page table pages, removing and freeing the page
1022 * tale page when the wire count drops to 0.
4107b0c0
MD
1023 *
1024 * The caller must hold vm_token.
1025 * This function can block.
984263bc
MD
1026 */
1027static int
90244566 1028_pmap_unwire_pte(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
840de426 1029{
17cde63e
MD
1030 /*
1031 * Wait until we can busy the page ourselves. We cannot have
1032 * any active flushes if we block.
1033 */
b12defdc 1034 vm_page_busy_wait(m, FALSE, "pmuwpt");
eec2b734 1035 KASSERT(m->queue == PQ_NONE,
90244566 1036 ("_pmap_unwire_pte: %p->queue != PQ_NONE", m));
984263bc 1037
90244566 1038 if (m->wire_count == 1) {
984263bc 1039 /*
be3aecf7
MD
1040 * Unmap the page table page.
1041 *
1042 * NOTE: We must clear pm_cached for all cpus, including
1043 * the current one, when clearing a page directory
1044 * entry.
984263bc 1045 */
c2fb025d 1046 pmap_inval_interlock(info, pmap, -1);
2247fe02 1047 KKASSERT(pmap->pm_pdir[m->pindex]);
984263bc 1048 pmap->pm_pdir[m->pindex] = 0;
be3aecf7 1049 pmap->pm_cached = 0;
c2fb025d 1050 pmap_inval_deinterlock(info, pmap);
eec2b734
MD
1051
1052 KKASSERT(pmap->pm_stats.resident_count > 0);
984263bc 1053 --pmap->pm_stats.resident_count;
984263bc
MD
1054
1055 if (pmap->pm_ptphint == m)
1056 pmap->pm_ptphint = NULL;
1057
1058 /*
eec2b734
MD
1059 * This was our last hold, the page had better be unwired
1060 * after we decrement wire_count.
1061 *
1062 * FUTURE NOTE: shared page directory page could result in
1063 * multiple wire counts.
984263bc 1064 */
90244566 1065 vm_page_unwire(m, 0);
17cde63e 1066 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
eec2b734
MD
1067 vm_page_flash(m);
1068 vm_page_free_zero(m);
984263bc 1069 return 1;
17cde63e 1070 } else {
90244566
MD
1071 KKASSERT(m->wire_count > 1);
1072 if (vm_page_unwire_quick(m))
1073 panic("pmap_unwire_pte: Insufficient wire_count");
b12defdc 1074 vm_page_wakeup(m);
17cde63e 1075 return 0;
984263bc 1076 }
984263bc
MD
1077}
1078
4107b0c0
MD
1079/*
1080 * The caller must hold vm_token.
92ba8d28 1081 *
4107b0c0 1082 * This function can block.
92ba8d28
MD
1083 *
1084 * This function can race the wire_count 2->1 case because the page
1085 * is not busied during the unwire_quick operation. An eventual
1086 * pmap_release() will catch the case.
4107b0c0 1087 */
984263bc 1088static PMAP_INLINE int
90244566 1089pmap_unwire_pte(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
984263bc 1090{
90244566
MD
1091 KKASSERT(m->wire_count > 0);
1092 if (m->wire_count > 1) {
1093 if (vm_page_unwire_quick(m))
1094 panic("pmap_unwire_pte: Insufficient wire_count");
984263bc 1095 return 0;
eec2b734 1096 } else {
90244566 1097 return _pmap_unwire_pte(pmap, m, info);
eec2b734 1098 }
984263bc
MD
1099}
1100
1101/*
4107b0c0 1102 * After removing a (user) page table entry, this routine is used to
984263bc 1103 * conditionally free the page, and manage the hold/wire counts.
5926987a 1104 *
4107b0c0
MD
1105 * The caller must hold vm_token.
1106 * This function can block regardless.
984263bc
MD
1107 */
1108static int
0f7a3396 1109pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte,
4107b0c0 1110 pmap_inval_info_t info)
984263bc
MD
1111{
1112 unsigned ptepindex;
4107b0c0 1113
b12defdc
MD
1114 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj));
1115
984263bc
MD
1116 if (va >= UPT_MIN_ADDRESS)
1117 return 0;
1118
1119 if (mpte == NULL) {
1120 ptepindex = (va >> PDRSHIFT);
1121 if (pmap->pm_ptphint &&
1122 (pmap->pm_ptphint->pindex == ptepindex)) {
1123 mpte = pmap->pm_ptphint;
1124 } else {
b12defdc 1125 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
984263bc 1126 pmap->pm_ptphint = mpte;
b12defdc 1127 vm_page_wakeup(mpte);
984263bc
MD
1128 }
1129 }
1130
90244566 1131 return pmap_unwire_pte(pmap, mpte, info);
984263bc
MD
1132}
1133
54a764e8 1134/*
fbbaeba3
MD
1135 * Initialize pmap0/vmspace0. This pmap is not added to pmap_list because
1136 * it, and IdlePTD, represents the template used to update all other pmaps.
1137 *
1138 * On architectures where the kernel pmap is not integrated into the user
1139 * process pmap, this pmap represents the process pmap, not the kernel pmap.
1140 * kernel_pmap should be used to directly access the kernel_pmap.
4107b0c0
MD
1141 *
1142 * No requirements.
54a764e8 1143 */
984263bc 1144void
840de426 1145pmap_pinit0(struct pmap *pmap)
984263bc
MD
1146{
1147 pmap->pm_pdir =
e4846942 1148 (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
24712b90 1149 pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD);
984263bc
MD
1150 pmap->pm_count = 1;
1151 pmap->pm_active = 0;
be3aecf7 1152 pmap->pm_cached = 0;
984263bc
MD
1153 pmap->pm_ptphint = NULL;
1154 TAILQ_INIT(&pmap->pm_pvlist);
b12defdc
MD
1155 TAILQ_INIT(&pmap->pm_pvlist_free);
1156 spin_init(&pmap->pm_spin);
1157 lwkt_token_init(&pmap->pm_token, "pmap_tok");
984263bc
MD
1158 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1159}
1160
1161/*
1162 * Initialize a preallocated and zeroed pmap structure,
1163 * such as one in a vmspace structure.
4107b0c0
MD
1164 *
1165 * No requirements.
984263bc
MD
1166 */
1167void
840de426 1168pmap_pinit(struct pmap *pmap)
984263bc
MD
1169{
1170 vm_page_t ptdpg;
1171
1172 /*
1173 * No need to allocate page table space yet but we do need a valid
1174 * page directory table.
1175 */
b5b32410 1176 if (pmap->pm_pdir == NULL) {
984263bc 1177 pmap->pm_pdir =
e4846942 1178 (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
b5b32410 1179 }
984263bc
MD
1180
1181 /*
c3834cb2 1182 * Allocate an object for the ptes
984263bc
MD
1183 */
1184 if (pmap->pm_pteobj == NULL)
c3834cb2 1185 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1);
984263bc
MD
1186
1187 /*
c3834cb2
MD
1188 * Allocate the page directory page, unless we already have
1189 * one cached. If we used the cached page the wire_count will
1190 * already be set appropriately.
984263bc 1191 */
c3834cb2
MD
1192 if ((ptdpg = pmap->pm_pdirm) == NULL) {
1193 ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI,
d2d8515b
MD
1194 VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
1195 VM_ALLOC_ZERO);
c3834cb2 1196 pmap->pm_pdirm = ptdpg;
b12defdc
MD
1197 vm_page_flag_clear(ptdpg, PG_MAPPED);
1198 vm_page_wire(ptdpg);
d2d8515b 1199 KKASSERT(ptdpg->valid == VM_PAGE_BITS_ALL);
c3834cb2 1200 pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
b12defdc 1201 vm_page_wakeup(ptdpg);
c3834cb2 1202 }
984263bc 1203 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
984263bc
MD
1204
1205 /* install self-referential address mapping entry */
1206 *(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1207 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
1208
1209 pmap->pm_count = 1;
1210 pmap->pm_active = 0;
be3aecf7 1211 pmap->pm_cached = 0;
984263bc
MD
1212 pmap->pm_ptphint = NULL;
1213 TAILQ_INIT(&pmap->pm_pvlist);
b12defdc
MD
1214 TAILQ_INIT(&pmap->pm_pvlist_free);
1215 spin_init(&pmap->pm_spin);
1216 lwkt_token_init(&pmap->pm_token, "pmap_tok");
984263bc 1217 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
eec2b734 1218 pmap->pm_stats.resident_count = 1;
984263bc
MD
1219}
1220
1221/*
c3834cb2
MD
1222 * Clean up a pmap structure so it can be physically freed. This routine
1223 * is called by the vmspace dtor function. A great deal of pmap data is
1224 * left passively mapped to improve vmspace management so we have a bit
1225 * of cleanup work to do here.
4107b0c0
MD
1226 *
1227 * No requirements.
e3161323
MD
1228 */
1229void
1230pmap_puninit(pmap_t pmap)
1231{
c3834cb2
MD
1232 vm_page_t p;
1233
e3161323 1234 KKASSERT(pmap->pm_active == 0);
c3834cb2
MD
1235 if ((p = pmap->pm_pdirm) != NULL) {
1236 KKASSERT(pmap->pm_pdir != NULL);
1237 pmap_kremove((vm_offset_t)pmap->pm_pdir);
b12defdc 1238 vm_page_busy_wait(p, FALSE, "pgpun");
90244566 1239 vm_page_unwire(p, 0);
c3834cb2
MD
1240 vm_page_free_zero(p);
1241 pmap->pm_pdirm = NULL;
1242 }
e3161323
MD
1243 if (pmap->pm_pdir) {
1244 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE);
1245 pmap->pm_pdir = NULL;
1246 }
1247 if (pmap->pm_pteobj) {
1248 vm_object_deallocate(pmap->pm_pteobj);
1249 pmap->pm_pteobj = NULL;
1250 }
1251}
1252
1253/*
984263bc
MD
1254 * Wire in kernel global address entries. To avoid a race condition
1255 * between pmap initialization and pmap_growkernel, this procedure
54a764e8
MD
1256 * adds the pmap to the master list (which growkernel scans to update),
1257 * then copies the template.
4107b0c0
MD
1258 *
1259 * No requirements.
984263bc
MD
1260 */
1261void
840de426 1262pmap_pinit2(struct pmap *pmap)
984263bc 1263{
b12defdc
MD
1264 /*
1265 * XXX copies current process, does not fill in MPPTDI
1266 */
1267 spin_lock(&pmap_spin);
54a764e8 1268 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode);
984263bc 1269 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
b12defdc 1270 spin_unlock(&pmap_spin);
984263bc
MD
1271}
1272
344ad853 1273/*
eec2b734 1274 * Attempt to release and free a vm_page in a pmap. Returns 1 on success,
344ad853 1275 * 0 on failure (if the procedure had to sleep).
c3834cb2
MD
1276 *
1277 * When asked to remove the page directory page itself, we actually just
1278 * leave it cached so we do not have to incur the SMP inval overhead of
1279 * removing the kernel mapping. pmap_puninit() will take care of it.
4107b0c0
MD
1280 *
1281 * The caller must hold vm_token.
1282 * This function can block regardless.
344ad853 1283 */
984263bc 1284static int
840de426 1285pmap_release_free_page(struct pmap *pmap, vm_page_t p)
984263bc
MD
1286{
1287 unsigned *pde = (unsigned *) pmap->pm_pdir;
4107b0c0 1288
984263bc
MD
1289 /*
1290 * This code optimizes the case of freeing non-busy
1291 * page-table pages. Those pages are zero now, and
1292 * might as well be placed directly into the zero queue.
1293 */
b12defdc
MD
1294 if (vm_page_busy_try(p, FALSE)) {
1295 vm_page_sleep_busy(p, FALSE, "pmaprl");
984263bc 1296 return 0;
b12defdc 1297 }
984263bc
MD
1298
1299 /*
1300 * Remove the page table page from the processes address space.
1301 */
eec2b734 1302 KKASSERT(pmap->pm_stats.resident_count > 0);
2247fe02
MD
1303 KKASSERT(pde[p->pindex]);
1304 pde[p->pindex] = 0;
eec2b734 1305 --pmap->pm_stats.resident_count;
a93980ab 1306 pmap->pm_cached = 0;
984263bc 1307
90244566
MD
1308 if (p->wire_count != 1) {
1309 panic("pmap_release: freeing wired page table page");
984263bc 1310 }
c3834cb2
MD
1311 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1312 pmap->pm_ptphint = NULL;
1313
984263bc 1314 /*
c3834cb2
MD
1315 * We leave the page directory page cached, wired, and mapped in
1316 * the pmap until the dtor function (pmap_puninit()) gets called.
1317 * However, still clean it up so we can set PG_ZERO.
c1692ddf
MD
1318 *
1319 * The pmap has already been removed from the pmap_list in the
1320 * PTDPTDI case.
984263bc
MD
1321 */
1322 if (p->pindex == PTDPTDI) {
1323 bzero(pde + KPTDI, nkpt * PTESIZE);
9388fcaa 1324 bzero(pde + MPPTDI, (NPDEPG - MPPTDI) * PTESIZE);
c3834cb2
MD
1325 vm_page_flag_set(p, PG_ZERO);
1326 vm_page_wakeup(p);
1327 } else {
92ba8d28
MD
1328 /*
1329 * This case can occur if a pmap_unwire_pte() loses a race
1330 * while the page is unbusied.
1331 */
1332 /*panic("pmap_release: page should already be gone %p", p);*/
1333 vm_page_flag_clear(p, PG_MAPPED);
90244566 1334 vm_page_unwire(p, 0);
c3834cb2 1335 vm_page_free_zero(p);
984263bc 1336 }
984263bc
MD
1337 return 1;
1338}
1339
1340/*
4107b0c0
MD
1341 * This routine is called if the page table page is not mapped correctly.
1342 *
1343 * The caller must hold vm_token.
984263bc
MD
1344 */
1345static vm_page_t
840de426 1346_pmap_allocpte(pmap_t pmap, unsigned ptepindex)
984263bc 1347{
480c83b6 1348 vm_offset_t ptepa;
984263bc
MD
1349 vm_page_t m;
1350
1351 /*
d2d8515b
MD
1352 * Find or fabricate a new pagetable page. Setting VM_ALLOC_ZERO
1353 * will zero any new page and mark it valid.
984263bc
MD
1354 */
1355 m = vm_page_grab(pmap->pm_pteobj, ptepindex,
d2d8515b 1356 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
984263bc
MD
1357
1358 KASSERT(m->queue == PQ_NONE,
1359 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1360
eec2b734 1361 /*
90244566 1362 * Increment the wire count for the page we will be returning to
eec2b734
MD
1363 * the caller.
1364 */
90244566 1365 vm_page_wire(m);
eec2b734
MD
1366
1367 /*
1368 * It is possible that someone else got in and mapped by the page
1369 * directory page while we were blocked, if so just unbusy and
90244566 1370 * return the wired page.
eec2b734
MD
1371 */
1372 if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) {
1373 KKASSERT((ptepa & PG_FRAME) == VM_PAGE_TO_PHYS(m));
1374 vm_page_wakeup(m);
1375 return(m);
1376 }
1377
984263bc
MD
1378 /*
1379 * Map the pagetable page into the process address space, if
1380 * it isn't already there.
be3aecf7
MD
1381 *
1382 * NOTE: For safety clear pm_cached for all cpus including the
1383 * current one when adding a PDE to the map.
984263bc 1384 */
eec2b734 1385 ++pmap->pm_stats.resident_count;
984263bc
MD
1386
1387 ptepa = VM_PAGE_TO_PHYS(m);
1388 pmap->pm_pdir[ptepindex] =
1389 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
be3aecf7 1390 pmap->pm_cached = 0;
984263bc
MD
1391
1392 /*
1393 * Set the page table hint
1394 */
1395 pmap->pm_ptphint = m;
984263bc
MD
1396 vm_page_flag_set(m, PG_MAPPED);
1397 vm_page_wakeup(m);
1398
1399 return m;
1400}
1401
4107b0c0
MD
1402/*
1403 * Allocate a page table entry for a va.
1404 *
1405 * The caller must hold vm_token.
1406 */
984263bc 1407static vm_page_t
840de426 1408pmap_allocpte(pmap_t pmap, vm_offset_t va)
984263bc
MD
1409{
1410 unsigned ptepindex;
1411 vm_offset_t ptepa;
1412 vm_page_t m;
1413
b12defdc
MD
1414 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj));
1415
984263bc
MD
1416 /*
1417 * Calculate pagetable page index
1418 */
1419 ptepindex = va >> PDRSHIFT;
1420
1421 /*
1422 * Get the page directory entry
1423 */
1424 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1425
1426 /*
1427 * This supports switching from a 4MB page to a
1428 * normal 4K page.
1429 */
1430 if (ptepa & PG_PS) {
1431 pmap->pm_pdir[ptepindex] = 0;
1432 ptepa = 0;
0f7a3396 1433 smp_invltlb();
54341a3b 1434 cpu_invltlb();
984263bc
MD
1435 }
1436
1437 /*
1438 * If the page table page is mapped, we just increment the
90244566 1439 * wire count, and activate it.
984263bc
MD
1440 */
1441 if (ptepa) {
1442 /*
1443 * In order to get the page table page, try the
1444 * hint first.
1445 */
1446 if (pmap->pm_ptphint &&
1447 (pmap->pm_ptphint->pindex == ptepindex)) {
1448 m = pmap->pm_ptphint;
1449 } else {
b12defdc 1450 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
984263bc 1451 pmap->pm_ptphint = m;
b12defdc 1452 vm_page_wakeup(m);
984263bc 1453 }
90244566 1454 vm_page_wire_quick(m);
984263bc
MD
1455 return m;
1456 }
1457 /*
1458 * Here if the pte page isn't mapped, or if it has been deallocated.
1459 */
1460 return _pmap_allocpte(pmap, ptepindex);
1461}
1462
1463
1464/***************************************************
1f804340 1465 * Pmap allocation/deallocation routines.
984263bc
MD
1466 ***************************************************/
1467
1468/*
1469 * Release any resources held by the given physical map.
1470 * Called when a pmap initialized by pmap_pinit is being released.
1471 * Should only be called if the map contains no valid mappings.
4107b0c0 1472 *
b12defdc 1473 * Caller must hold pmap->pm_token
984263bc 1474 */
1f804340
MD
1475static int pmap_release_callback(struct vm_page *p, void *data);
1476
984263bc 1477void
840de426 1478pmap_release(struct pmap *pmap)
984263bc 1479{
984263bc 1480 vm_object_t object = pmap->pm_pteobj;
1f804340 1481 struct rb_vm_page_scan_info info;
984263bc 1482
4107b0c0
MD
1483 KASSERT(pmap->pm_active == 0,
1484 ("pmap still active! %08x", pmap->pm_active));
984263bc
MD
1485#if defined(DIAGNOSTIC)
1486 if (object->ref_count != 1)
1487 panic("pmap_release: pteobj reference count != 1");
1488#endif
1489
1f804340
MD
1490 info.pmap = pmap;
1491 info.object = object;
b12defdc
MD
1492
1493 spin_lock(&pmap_spin);
54a764e8 1494 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode);
b12defdc 1495 spin_unlock(&pmap_spin);
1f804340 1496
b12defdc 1497 vm_object_hold(object);
1f804340 1498 do {
1f804340
MD
1499 info.error = 0;
1500 info.mpte = NULL;
1501 info.limit = object->generation;
1502
1503 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
1504 pmap_release_callback, &info);
1505 if (info.error == 0 && info.mpte) {
1506 if (!pmap_release_free_page(pmap, info.mpte))
1507 info.error = 1;
984263bc 1508 }
1f804340 1509 } while (info.error);
2f2d9e58 1510 vm_object_drop(object);
b12defdc
MD
1511
1512 pmap->pm_cached = 0;
1f804340
MD
1513}
1514
4107b0c0
MD
1515/*
1516 * The caller must hold vm_token.
1517 */
1f804340
MD
1518static int
1519pmap_release_callback(struct vm_page *p, void *data)
1520{
1521 struct rb_vm_page_scan_info *info = data;
1522
1523 if (p->pindex == PTDPTDI) {
1524 info->mpte = p;
1525 return(0);
344ad853 1526 }
1f804340
MD
1527 if (!pmap_release_free_page(info->pmap, p)) {
1528 info->error = 1;
1529 return(-1);
1530 }
1531 if (info->object->generation != info->limit) {
1532 info->error = 1;
1533 return(-1);
1534 }
1535 return(0);
984263bc 1536}
984263bc
MD
1537
1538/*
0e5797fe 1539 * Grow the number of kernel page table entries, if needed.
4107b0c0
MD
1540 *
1541 * No requirements.
984263bc
MD
1542 */
1543void
a8cf2878 1544pmap_growkernel(vm_offset_t kstart, vm_offset_t kend)
984263bc 1545{
a8cf2878 1546 vm_offset_t addr = kend;
54a764e8 1547 struct pmap *pmap;
984263bc
MD
1548 vm_offset_t ptppaddr;
1549 vm_page_t nkpg;
1550 pd_entry_t newpdir;
1551
b12defdc 1552 vm_object_hold(kptobj);
984263bc
MD
1553 if (kernel_vm_end == 0) {
1554 kernel_vm_end = KERNBASE;
1555 nkpt = 0;
1556 while (pdir_pde(PTD, kernel_vm_end)) {
4107b0c0
MD
1557 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1558 ~(PAGE_SIZE * NPTEPG - 1);
984263bc
MD
1559 nkpt++;
1560 }
1561 }
1562 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1563 while (kernel_vm_end < addr) {
1564 if (pdir_pde(PTD, kernel_vm_end)) {
4107b0c0
MD
1565 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1566 ~(PAGE_SIZE * NPTEPG - 1);
984263bc
MD
1567 continue;
1568 }
1569
1570 /*
1571 * This index is bogus, but out of the way
1572 */
4107b0c0
MD
1573 nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_NORMAL |
1574 VM_ALLOC_SYSTEM |
1575 VM_ALLOC_INTERRUPT);
dc1fd4b3 1576 if (nkpg == NULL)
984263bc
MD
1577 panic("pmap_growkernel: no memory to grow kernel");
1578
984263bc
MD
1579 vm_page_wire(nkpg);
1580 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1581 pmap_zero_page(ptppaddr);
1582 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
1583 pdir_pde(PTD, kernel_vm_end) = newpdir;
fbbaeba3 1584 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir;
0e5797fe
MD
1585 nkpt++;
1586
1587 /*
54a764e8 1588 * This update must be interlocked with pmap_pinit2.
0e5797fe 1589 */
b12defdc 1590 spin_lock(&pmap_spin);
54a764e8
MD
1591 TAILQ_FOREACH(pmap, &pmap_list, pm_pmnode) {
1592 *pmap_pde(pmap, kernel_vm_end) = newpdir;
1593 }
b12defdc 1594 spin_unlock(&pmap_spin);
54a764e8
MD
1595 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1596 ~(PAGE_SIZE * NPTEPG - 1);
984263bc 1597 }
b12defdc 1598 vm_object_drop(kptobj);
984263bc
MD
1599}
1600
1601/*
4107b0c0
MD
1602 * Retire the given physical map from service.
1603 *
1604 * Should only be called if the map contains no valid mappings.
1605 *
1606 * No requirements.
984263bc
MD
1607 */
1608void
840de426 1609pmap_destroy(pmap_t pmap)
984263bc 1610{
984263bc
MD
1611 if (pmap == NULL)
1612 return;
1613
4107b0c0
MD
1614 lwkt_gettoken(&vm_token);
1615 if (--pmap->pm_count == 0) {
984263bc
MD
1616 pmap_release(pmap);
1617 panic("destroying a pmap is not yet implemented");
1618 }
4107b0c0 1619 lwkt_reltoken(&vm_token);
984263bc
MD
1620}
1621
1622/*
4107b0c0
MD
1623 * Add a reference to the specified pmap.
1624 *
1625 * No requirements.
984263bc
MD
1626 */
1627void
840de426 1628pmap_reference(pmap_t pmap)
984263bc 1629{
4107b0c0
MD
1630 if (pmap) {
1631 lwkt_gettoken(&vm_token);
1632 ++pmap->pm_count;
1633 lwkt_reltoken(&vm_token);
984263bc
MD
1634 }
1635}
1636
1637/***************************************************
4107b0c0 1638 * page management routines.
984263bc
MD
1639 ***************************************************/
1640
1641/*
8a8d5d85
MD
1642 * free the pv_entry back to the free list. This function may be
1643 * called from an interrupt.
4107b0c0
MD
1644 *
1645 * The caller must hold vm_token.
984263bc
MD
1646 */
1647static PMAP_INLINE void
840de426 1648free_pv_entry(pv_entry_t pv)
984263bc 1649{
2bb9cc6f
MD
1650 struct mdglobaldata *gd;
1651
5926987a
MD
1652#ifdef PMAP_DEBUG
1653 KKASSERT(pv->pv_m != NULL);
1654 pv->pv_m = NULL;
1655#endif
2bb9cc6f 1656 gd = mdcpu;
984263bc 1657 pv_entry_count--;
2bb9cc6f
MD
1658 if (gd->gd_freepv == NULL)
1659 gd->gd_freepv = pv;
1660 else
1661 zfree(pvzone, pv);
984263bc
MD
1662}
1663
1664/*
1665 * get a new pv_entry, allocating a block from the system
2bb9cc6f
MD
1666 * when needed. This function may be called from an interrupt thread.
1667 *
1668 * THIS FUNCTION CAN BLOCK ON THE ZALLOC TOKEN, serialization of other
1669 * tokens (aka vm_token) to be temporarily lost.
4107b0c0
MD
1670 *
1671 * The caller must hold vm_token.
984263bc
MD
1672 */
1673static pv_entry_t
1674get_pv_entry(void)
1675{
2bb9cc6f
MD
1676 struct mdglobaldata *gd;
1677 pv_entry_t pv;
1678
984263bc
MD
1679 pv_entry_count++;
1680 if (pv_entry_high_water &&
20479584
MD
1681 (pv_entry_count > pv_entry_high_water) &&
1682 (pmap_pagedaemon_waken == 0)) {
984263bc
MD
1683 pmap_pagedaemon_waken = 1;
1684 wakeup (&vm_pages_needed);
1685 }
2bb9cc6f
MD
1686 gd = mdcpu;
1687 if ((pv = gd->gd_freepv) != NULL)
1688 gd->gd_freepv = NULL;
1689 else
1690 pv = zalloc(pvzone);
1691 return pv;
984263bc
MD
1692}
1693
1694/*
1695 * This routine is very drastic, but can save the system
1696 * in a pinch.
4107b0c0
MD
1697 *
1698 * No requirements.
984263bc
MD
1699 */
1700void
840de426 1701pmap_collect(void)
984263bc
MD
1702{
1703 int i;
1704 vm_page_t m;
1705 static int warningdone=0;
1706
1707 if (pmap_pagedaemon_waken == 0)
1708 return;
4107b0c0 1709 lwkt_gettoken(&vm_token);
20479584 1710 pmap_pagedaemon_waken = 0;
984263bc
MD
1711
1712 if (warningdone < 5) {
948209ce
MD
1713 kprintf("pmap_collect: collecting pv entries -- "
1714 "suggest increasing PMAP_SHPGPERPROC\n");
984263bc
MD
1715 warningdone++;
1716 }
1717
b12defdc 1718 for (i = 0; i < vm_page_array_size; i++) {
984263bc 1719 m = &vm_page_array[i];
b12defdc 1720 if (m->wire_count || m->hold_count)
984263bc 1721 continue;
b12defdc
MD
1722 if (vm_page_busy_try(m, TRUE) == 0) {
1723 if (m->wire_count == 0 && m->hold_count == 0) {
1724 pmap_remove_all(m);
1725 }
1726 vm_page_wakeup(m);
4107b0c0 1727 }
984263bc 1728 }
4107b0c0 1729 lwkt_reltoken(&vm_token);
984263bc
MD
1730}
1731
1732
1733/*
1734 * If it is the first entry on the list, it is actually
1735 * in the header and we must copy the following entry up
1736 * to the header. Otherwise we must search the list for
1737 * the entry. In either case we free the now unused entry.
4107b0c0
MD
1738 *
1739 * The caller must hold vm_token.
984263bc 1740 */
984263bc 1741static int
0f7a3396 1742pmap_remove_entry(struct pmap *pmap, vm_page_t m,
4107b0c0 1743 vm_offset_t va, pmap_inval_info_t info)
984263bc
MD
1744{
1745 pv_entry_t pv;
1746 int rtval;
984263bc 1747
4107b0c0 1748 ASSERT_LWKT_TOKEN_HELD(&vm_token);
984263bc
MD
1749 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1750 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1751 if (pmap == pv->pv_pmap && va == pv->pv_va)
1752 break;
1753 }
1754 } else {
1755 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
5926987a
MD
1756#ifdef PMAP_DEBUG
1757 KKASSERT(pv->pv_pmap == pmap);
1758#endif
1759 if (va == pv->pv_va)
984263bc
MD
1760 break;
1761 }
1762 }
5926987a 1763 KKASSERT(pv);
984263bc
MD
1764
1765 rtval = 0;
5926987a
MD
1766 test_m_maps_pv(m, pv);
1767 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1768 m->md.pv_list_count--;
cef01e15
MD
1769 if (m->object)
1770 atomic_add_int(&m->object->agg_pv_list_count, -1);
5926987a
MD
1771 if (TAILQ_EMPTY(&m->md.pv_list))
1772 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1773 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1774 ++pmap->pm_generation;
b12defdc 1775 vm_object_hold(pmap->pm_pteobj);
5926987a 1776 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info);
b12defdc 1777 vm_object_drop(pmap->pm_pteobj);
5926987a 1778 free_pv_entry(pv);
b12defdc 1779
984263bc
MD
1780 return rtval;
1781}
1782
1783/*
4107b0c0
MD
1784 * Create a pv entry for page at pa for (pmap, va).
1785 *
1786 * The caller must hold vm_token.
984263bc
MD
1787 */
1788static void
2bb9cc6f
MD
1789pmap_insert_entry(pmap_t pmap, pv_entry_t pv, vm_offset_t va,
1790 vm_page_t mpte, vm_page_t m)
984263bc 1791{
5926987a
MD
1792#ifdef PMAP_DEBUG
1793 KKASSERT(pv->pv_m == NULL);
1794 pv->pv_m = m;
1795#endif
984263bc
MD
1796 pv->pv_va = va;
1797 pv->pv_pmap = pmap;
1798 pv->pv_ptem = mpte;
1799
1800 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1801 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
5926987a 1802 ++pmap->pm_generation;
984263bc 1803 m->md.pv_list_count++;
cef01e15
MD
1804 if (m->object)
1805 atomic_add_int(&m->object->agg_pv_list_count, 1);
984263bc
MD
1806}
1807
1808/*
5926987a
MD
1809 * pmap_remove_pte: do the things to unmap a page in a process.
1810 *
4107b0c0
MD
1811 * The caller must hold vm_token.
1812 *
1813 * WARNING! As with most other pmap functions this one can block, so
1814 * callers using temporary page table mappings must reload
1815 * them.
984263bc
MD
1816 */
1817static int
0f7a3396 1818pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va,
5926987a 1819 pmap_inval_info_t info)
984263bc
MD
1820{
1821 unsigned oldpte;
1822 vm_page_t m;
1823
5926987a 1824 ptbase_assert(pmap);
c2fb025d 1825 pmap_inval_interlock(info, pmap, va);
5926987a 1826 ptbase_assert(pmap);
984263bc
MD
1827 oldpte = loadandclear(ptq);
1828 if (oldpte & PG_W)
1829 pmap->pm_stats.wired_count -= 1;
c2fb025d 1830 pmap_inval_deinterlock(info, pmap);
90244566 1831 KKASSERT(oldpte & PG_V);
984263bc
MD
1832 /*
1833 * Machines that don't support invlpg, also don't support
0f7a3396
MD
1834 * PG_G. XXX PG_G is disabled for SMP so don't worry about
1835 * the SMP case.
984263bc
MD
1836 */
1837 if (oldpte & PG_G)
41a01a4d 1838 cpu_invlpg((void *)va);
eec2b734
MD
1839 KKASSERT(pmap->pm_stats.resident_count > 0);
1840 --pmap->pm_stats.resident_count;
984263bc
MD
1841 if (oldpte & PG_MANAGED) {
1842 m = PHYS_TO_VM_PAGE(oldpte);
1843 if (oldpte & PG_M) {
1844#if defined(PMAP_DIAGNOSTIC)
1845 if (pmap_nw_modified((pt_entry_t) oldpte)) {
d557216f
MD
1846 kprintf("pmap_remove: modified page not "
1847 "writable: va: %p, pte: 0x%lx\n",
1848 (void *)va, (long)oldpte);
984263bc
MD
1849 }
1850#endif
1851 if (pmap_track_modified(va))
1852 vm_page_dirty(m);
1853 }
1854 if (oldpte & PG_A)
1855 vm_page_flag_set(m, PG_REFERENCED);
0f7a3396 1856 return pmap_remove_entry(pmap, m, va, info);
984263bc 1857 } else {
0f7a3396 1858 return pmap_unuse_pt(pmap, va, NULL, info);
984263bc
MD
1859 }
1860
1861 return 0;
1862}
1863
1864/*
5926987a 1865 * Remove a single page from a process address space.
e0e69b7d 1866 *
4107b0c0 1867 * The caller must hold vm_token.
984263bc
MD
1868 */
1869static void
0f7a3396 1870pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info)
984263bc 1871{
840de426 1872 unsigned *ptq;
984263bc
MD
1873
1874 /*
90244566 1875 * If there is no pte for this address, just skip it!!! Otherwise
e0e69b7d 1876 * get a local va for mappings for this pmap and remove the entry.
984263bc 1877 */
e0e69b7d
MD
1878 if (*pmap_pde(pmap, va) != 0) {
1879 ptq = get_ptbase(pmap) + i386_btop(va);
1880 if (*ptq) {
0f7a3396 1881 pmap_remove_pte(pmap, ptq, va, info);
5926987a 1882 /* ptq invalid */
e0e69b7d 1883 }
984263bc 1884 }
984263bc
MD
1885}
1886
1887/*
4107b0c0 1888 * Remove the given range of addresses from the specified map.
984263bc 1889 *
4107b0c0
MD
1890 * It is assumed that the start and end are properly rounded to the page
1891 * size.
e0e69b7d 1892 *
4107b0c0 1893 * No requirements.
984263bc
MD
1894 */
1895void
840de426 1896pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
984263bc 1897{
840de426 1898 unsigned *ptbase;
984263bc
MD
1899 vm_offset_t pdnxt;
1900 vm_offset_t ptpaddr;
1901 vm_offset_t sindex, eindex;
0f7a3396 1902 struct pmap_inval_info info;
984263bc
MD
1903
1904 if (pmap == NULL)
1905 return;
1906
b12defdc 1907 vm_object_hold(pmap->pm_pteobj);
4107b0c0
MD
1908 lwkt_gettoken(&vm_token);
1909 if (pmap->pm_stats.resident_count == 0) {
1910 lwkt_reltoken(&vm_token);
b12defdc 1911 vm_object_drop(pmap->pm_pteobj);
984263bc 1912 return;
4107b0c0 1913 }
984263bc 1914
0f7a3396
MD
1915 pmap_inval_init(&info);
1916
984263bc
MD
1917 /*
1918 * special handling of removing one page. a very
1919 * common operation and easy to short circuit some
1920 * code.
1921 */
1922 if (((sva + PAGE_SIZE) == eva) &&
1923 (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
0f7a3396 1924 pmap_remove_page(pmap, sva, &info);
c2fb025d 1925 pmap_inval_done(&info);
4107b0c0 1926 lwkt_reltoken(&vm_token);
b12defdc 1927 vm_object_drop(pmap->pm_pteobj);
984263bc
MD
1928 return;
1929 }
1930
984263bc
MD
1931 /*
1932 * Get a local virtual address for the mappings that are being
1933 * worked with.
1934 */
984263bc
MD
1935 sindex = i386_btop(sva);
1936 eindex = i386_btop(eva);
1937
1938 for (; sindex < eindex; sindex = pdnxt) {
1939 unsigned pdirindex;
1940
1941 /*
1942 * Calculate index for next page table.
1943 */
1944 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1945 if (pmap->pm_stats.resident_count == 0)
1946 break;
1947
1948 pdirindex = sindex / NPDEPG;
1949 if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
c2fb025d 1950 pmap_inval_interlock(&info, pmap, -1);
984263bc
MD
1951 pmap->pm_pdir[pdirindex] = 0;
1952 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
be3aecf7 1953 pmap->pm_cached = 0;
c2fb025d 1954 pmap_inval_deinterlock(&info, pmap);
984263bc
MD
1955 continue;
1956 }
1957
1958 /*
1959 * Weed out invalid mappings. Note: we assume that the page
1960 * directory table is always allocated, and in kernel virtual.
1961 */
1962 if (ptpaddr == 0)
1963 continue;
1964
1965 /*
1966 * Limit our scan to either the end of the va represented
1967 * by the current page table page, or to the end of the
1968 * range being removed.
1969 */
1970 if (pdnxt > eindex) {
1971 pdnxt = eindex;
1972 }
1973
8790d7d8 1974 /*
5926987a
MD
1975 * NOTE: pmap_remove_pte() can block and wipe the temporary
1976 * ptbase.
8790d7d8 1977 */
0f7a3396 1978 for (; sindex != pdnxt; sindex++) {
984263bc 1979 vm_offset_t va;
8790d7d8
MD
1980
1981 ptbase = get_ptbase(pmap);
0f7a3396 1982 if (ptbase[sindex] == 0)
984263bc 1983 continue;
984263bc 1984 va = i386_ptob(sindex);
0f7a3396 1985 if (pmap_remove_pte(pmap, ptbase + sindex, va, &info))
984263bc
MD
1986 break;
1987 }
1988 }
c2fb025d 1989 pmap_inval_done(&info);
4107b0c0 1990 lwkt_reltoken(&vm_token);
b12defdc 1991 vm_object_drop(pmap->pm_pteobj);
984263bc
MD
1992}
1993
1994/*
4107b0c0
MD
1995 * Removes this physical page from all physical maps in which it resides.
1996 * Reflects back modify bits to the pager.
984263bc 1997 *
4107b0c0 1998 * No requirements.
984263bc 1999 */
984263bc 2000static void
840de426 2001pmap_remove_all(vm_page_t m)
984263bc 2002{
0f7a3396 2003 struct pmap_inval_info info;
840de426 2004 unsigned *pte, tpte;
0f7a3396 2005 pv_entry_t pv;
984263bc 2006
bee81bdd
SS
2007 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2008 return;
984263bc 2009
0f7a3396 2010 pmap_inval_init(&info);
984263bc 2011 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
eec2b734
MD
2012 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
2013 --pv->pv_pmap->pm_stats.resident_count;
984263bc
MD
2014
2015 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
c2fb025d 2016 pmap_inval_interlock(&info, pv->pv_pmap, pv->pv_va);
984263bc
MD
2017 tpte = loadandclear(pte);
2018 if (tpte & PG_W)
2019 pv->pv_pmap->pm_stats.wired_count--;
c2fb025d 2020 pmap_inval_deinterlock(&info, pv->pv_pmap);
984263bc
MD
2021 if (tpte & PG_A)
2022 vm_page_flag_set(m, PG_REFERENCED);
c2fb025d
MD
2023#ifdef PMAP_DEBUG
2024 KKASSERT(PHYS_TO_VM_PAGE(tpte) == m);
2025#endif
984263bc
MD
2026
2027 /*
2028 * Update the vm_page_t clean and reference bits.
2029 */
2030 if (tpte & PG_M) {
2031#if defined(PMAP_DIAGNOSTIC)
2032 if (pmap_nw_modified((pt_entry_t) tpte)) {
d557216f
MD
2033 kprintf("pmap_remove_all: modified page "
2034 "not writable: va: %p, pte: 0x%lx\n",
2035 (void *)pv->pv_va, (long)tpte);
984263bc
MD
2036 }
2037#endif
2038 if (pmap_track_modified(pv->pv_va))
2039 vm_page_dirty(m);
2040 }
5926987a
MD
2041#ifdef PMAP_DEBUG
2042 KKASSERT(pv->pv_m == m);
2043#endif
2bb9cc6f 2044 KKASSERT(pv == TAILQ_FIRST(&m->md.pv_list));
984263bc 2045 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
8790d7d8
MD
2046 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2047 ++pv->pv_pmap->pm_generation;
984263bc 2048 m->md.pv_list_count--;
cef01e15
MD
2049 if (m->object)
2050 atomic_add_int(&m->object->agg_pv_list_count, -1);
17cde63e
MD
2051 if (TAILQ_EMPTY(&m->md.pv_list))
2052 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
b12defdc 2053 vm_object_hold(pv->pv_pmap->pm_pteobj);
0f7a3396 2054 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
b12defdc 2055 vm_object_drop(pv->pv_pmap->pm_pteobj);
984263bc
MD
2056 free_pv_entry(pv);
2057 }
17cde63e 2058 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0);
c2fb025d 2059 pmap_inval_done(&info);
984263bc
MD
2060}
2061
2062/*
4107b0c0
MD
2063 * Set the physical protection on the specified range of this map
2064 * as requested.
e0e69b7d 2065 *
4107b0c0 2066 * No requirements.
984263bc
MD
2067 */
2068void
2069pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
2070{
840de426 2071 unsigned *ptbase;
984263bc
MD
2072 vm_offset_t pdnxt, ptpaddr;
2073 vm_pindex_t sindex, eindex;
0f7a3396 2074 pmap_inval_info info;
984263bc
MD
2075
2076 if (pmap == NULL)
2077 return;
2078
2079 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2080 pmap_remove(pmap, sva, eva);
2081 return;
2082 }
2083
2084 if (prot & VM_PROT_WRITE)
2085 return;
2086
4107b0c0 2087 lwkt_gettoken(&vm_token);
0f7a3396 2088 pmap_inval_init(&info);
984263bc
MD
2089
2090 ptbase = get_ptbase(pmap);
2091
2092 sindex = i386_btop(sva);
2093 eindex = i386_btop(eva);
2094
2095 for (; sindex < eindex; sindex = pdnxt) {
984263bc
MD
2096 unsigned pdirindex;
2097
2098 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
2099
2100 pdirindex = sindex / NPDEPG;
2101 if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
c2fb025d 2102 pmap_inval_interlock(&info, pmap, -1);
55f2596a 2103 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
984263bc 2104 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
c2fb025d 2105 pmap_inval_deinterlock(&info, pmap);
984263bc
MD
2106 continue;
2107 }
2108
2109 /*
2110 * Weed out invalid mappings. Note: we assume that the page
2111 * directory table is always allocated, and in kernel virtual.
2112 */
2113 if (ptpaddr == 0)
2114 continue;
2115
2116 if (pdnxt > eindex) {
2117 pdnxt = eindex;
2118 }
2119
2120 for (; sindex != pdnxt; sindex++) {
984263bc 2121 unsigned pbits;
c2fb025d 2122 unsigned cbits;
984263bc
MD
2123 vm_page_t m;
2124
17cde63e 2125 /*
d5b2d319 2126 * XXX non-optimal.
17cde63e 2127 */
c2fb025d
MD
2128 pmap_inval_interlock(&info, pmap, i386_ptob(sindex));
2129again:
984263bc 2130 pbits = ptbase[sindex];
c2fb025d 2131 cbits = pbits;
984263bc
MD
2132
2133 if (pbits & PG_MANAGED) {
2134 m = NULL;
2135 if (pbits & PG_A) {
2136 m = PHYS_TO_VM_PAGE(pbits);
2137 vm_page_flag_set(m, PG_REFERENCED);
c2fb025d 2138 cbits &= ~PG_A;
984263bc
MD
2139 }
2140 if (pbits & PG_M) {
2141 if (pmap_track_modified(i386_ptob(sindex))) {
2142 if (m == NULL)
2143 m = PHYS_TO_VM_PAGE(pbits);
2144 vm_page_dirty(m);
c2fb025d 2145 cbits &= ~PG_M;
984263bc
MD
2146 }
2147 }
2148 }
c2fb025d
MD
2149 cbits &= ~PG_RW;
2150 if (pbits != cbits &&
2151 !atomic_cmpset_int(ptbase + sindex, pbits, cbits)) {
2152 goto again;
984263bc 2153 }
c2fb025d 2154 pmap_inval_deinterlock(&info, pmap);
984263bc
MD
2155 }
2156 }
c2fb025d 2157 pmap_inval_done(&info);
4107b0c0 2158 lwkt_reltoken(&vm_token);
984263bc
MD
2159}
2160
2161/*
4107b0c0
MD
2162 * Insert the given physical page (p) at the specified virtual address (v)
2163 * in the target physical map with the protection requested.
984263bc 2164 *
4107b0c0
MD
2165 * If specified, the page will be wired down, meaning that the related pte
2166 * cannot be reclaimed.
984263bc 2167 *
4107b0c0 2168 * No requirements.
984263bc
MD
2169 */
2170void
2171pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2172 boolean_t wired)
2173{
6ef943a3 2174 vm_paddr_t pa;
840de426 2175 unsigned *pte;
6ef943a3 2176 vm_paddr_t opa;
984263bc
MD
2177 vm_offset_t origpte, newpte;
2178 vm_page_t mpte;
0f7a3396 2179 pmap_inval_info info;
2bb9cc6f 2180 pv_entry_t pv;
984263bc
MD
2181
2182 if (pmap == NULL)
2183 return;
2184
2185 va &= PG_FRAME;
2186#ifdef PMAP_DIAGNOSTIC
c439ad8f 2187 if (va >= KvaEnd)
984263bc 2188 panic("pmap_enter: toobig");
d557216f
MD
2189 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) {
2190 panic("pmap_enter: invalid to pmap_enter page "
2191 "table pages (va: %p)", (void *)va);
2192 }
984263bc 2193#endif
fbbaeba3
MD
2194 if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) {
2195 kprintf("Warning: pmap_enter called on UVA with kernel_pmap\n");
7ce2998e 2196 print_backtrace(-1);
fbbaeba3
MD
2197 }
2198 if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) {
2199 kprintf("Warning: pmap_enter called on KVA without kernel_pmap\n");
7ce2998e 2200 print_backtrace(-1);
fbbaeba3 2201 }
984263bc 2202
b12defdc 2203 vm_object_hold(pmap->pm_pteobj);
4107b0c0
MD
2204 lwkt_gettoken(&vm_token);
2205
984263bc 2206 /*
2bb9cc6f
MD
2207 * This can block, get it before we do anything important.
2208 */
2209 if (pmap_initialized &&
2210 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
2211 pv = get_pv_entry();
2212 } else {
2213 pv = NULL;
2214 }
2215
2216 /*
984263bc
MD
2217 * In the case that a page table page is not
2218 * resident, we are creating it here.
2219 */
17cde63e 2220 if (va < UPT_MIN_ADDRESS)
984263bc 2221 mpte = pmap_allocpte(pmap, va);
17cde63e
MD
2222 else
2223 mpte = NULL;
984263bc 2224
b12defdc
MD
2225 if ((prot & VM_PROT_NOSYNC) == 0)
2226 pmap_inval_init(&info);
984263bc
MD
2227 pte = pmap_pte(pmap, va);
2228
2229 /*
2230 * Page Directory table entry not valid, we need a new PT page
2231 */
2232 if (pte == NULL) {
ed20d0e3 2233 panic("pmap_enter: invalid page directory pdir=0x%lx, va=%p",
d557216f 2234 (long)pmap->pm_pdir[PTDPTDI], (void *)va);
984263bc
MD
2235 }
2236
2237 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
2238 origpte = *(vm_offset_t *)pte;
2239 opa = origpte & PG_FRAME;
2240
2241 if (origpte & PG_PS)
2242 panic("pmap_enter: attempted pmap_enter on 4MB page");
2243
2244 /*
2245 * Mapping has not changed, must be protection or wiring change.
2246 */
2247 if (origpte && (opa == pa)) {
2248 /*
2249 * Wiring change, just update stats. We don't worry about
2250 * wiring PT pages as they remain resident as long as there
2251 * are valid mappings in them. Hence, if a user page is wired,
2252 * the PT page will be also.
2253 */
2254 if (wired && ((origpte & PG_W) == 0))
2255 pmap->pm_stats.wired_count++;
2256 else if (!wired && (origpte & PG_W))
2257 pmap->pm_stats.wired_count--;
2258
2259#if defined(PMAP_DIAGNOSTIC)
2260 if (pmap_nw_modified((pt_entry_t) origpte)) {
d557216f
MD
2261 kprintf("pmap_enter: modified page not "
2262 "writable: va: %p, pte: 0x%lx\n",
2263 (void *)va, (long )origpte);
984263bc
MD
2264 }
2265#endif
2266
2267 /*
984263bc
MD
2268 * We might be turning off write access to the page,
2269 * so we go ahead and sense modify status.
2270 */
2271 if (origpte & PG_MANAGED) {
2272 if ((origpte & PG_M) && pmap_track_modified(va)) {
2273 vm_page_t om;
2274 om = PHYS_TO_VM_PAGE(opa);
2275 vm_page_dirty(om);
2276 }
2277 pa |= PG_MANAGED;
17cde63e 2278 KKASSERT(m->flags & PG_MAPPED);
984263bc
MD
2279 }
2280 goto validate;
2281 }
2282 /*
2283 * Mapping has changed, invalidate old range and fall through to
2284 * handle validating new mapping.
5926987a
MD
2285 *
2286 * Since we have a ref on the page directory page pmap_pte()
2287 * will always return non-NULL.
2288 *
2289 * NOTE: pmap_remove_pte() can block and cause the temporary ptbase
2290 * to get wiped. reload the ptbase. I'm not sure if it is
2291 * also possible to race another pmap_enter() but check for
2292 * that case too.
984263bc 2293 */
5926987a 2294 while (opa) {
984263bc 2295 int err;
5926987a
MD
2296
2297 KKASSERT((origpte & PG_FRAME) ==
2298 (*(vm_offset_t *)pte & PG_FRAME));
0f7a3396 2299 err = pmap_remove_pte(pmap, pte, va, &info);
984263bc 2300 if (err)
d557216f 2301 panic("pmap_enter: pte vanished, va: %p", (void *)va);
5926987a
MD
2302 pte = pmap_pte(pmap, va);
2303 origpte = *(vm_offset_t *)pte;
2304 opa = origpte & PG_FRAME;
2305 if (opa) {
2306 kprintf("pmap_enter: Warning, raced pmap %p va %p\n",
2307 pmap, (void *)va);
2308 }
984263bc
MD
2309 }
2310
2311 /*
2312 * Enter on the PV list if part of our managed memory. Note that we
2313 * raise IPL while manipulating pv_table since pmap_enter can be
2314 * called at interrupt time.
2315 */
2316 if (pmap_initialized &&
2317 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
2bb9cc6f
MD
2318 pmap_insert_entry(pmap, pv, va, mpte, m);
2319 pv = NULL;
5926987a 2320 ptbase_assert(pmap);
984263bc 2321 pa |= PG_MANAGED;
17cde63e 2322 vm_page_flag_set(m, PG_MAPPED);
984263bc
MD
2323 }
2324
2325 /*
2326 * Increment counters
2327 */
eec2b734 2328 ++pmap->pm_stats.resident_count;
984263bc
MD
2329 if (wired)
2330 pmap->pm_stats.wired_count++;
5926987a 2331 KKASSERT(*pte == 0);
984263bc
MD
2332
2333validate:
2334 /*
2335 * Now validate mapping with desired protection/wiring.
2336 */
5926987a 2337 ptbase_assert(pmap);
984263bc
MD
2338 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2339
2340 if (wired)
2341 newpte |= PG_W;
2342 if (va < UPT_MIN_ADDRESS)
2343 newpte |= PG_U;
fbbaeba3 2344 if (pmap == &kernel_pmap)
984263bc
MD
2345 newpte |= pgeflag;
2346
2347 /*
2bb9cc6f
MD
2348 * If the mapping or permission bits are different, we need
2349 * to update the pte. If the pte is already present we have
2350 * to get rid of the extra wire-count on mpte we had obtained
2351 * above.
984263bc
MD
2352 */
2353 if ((origpte & ~(PG_M|PG_A)) != newpte) {
b12defdc
MD
2354 if (prot & VM_PROT_NOSYNC)
2355 cpu_invlpg((void *)va);
2356 else
2357 pmap_inval_interlock(&info, pmap, va);
5926987a 2358 ptbase_assert(pmap);
2bb9cc6f
MD
2359
2360 if (*pte) {
2361 KKASSERT((*pte & PG_FRAME) == (newpte & PG_FRAME));
2362 if (vm_page_unwire_quick(mpte))
2363 panic("pmap_enter: Insufficient wire_count");
2364 }
2365
984263bc 2366 *pte = newpte | PG_A;
b12defdc
MD
2367 if ((prot & VM_PROT_NOSYNC) == 0)
2368 pmap_inval_deinterlock(&info, pmap);
17cde63e
MD
2369 if (newpte & PG_RW)
2370 vm_page_flag_set(m, PG_WRITEABLE);
984263bc 2371 }
c695044a 2372 KKASSERT((newpte & PG_MANAGED) == 0 || (m->flags & PG_MAPPED));
b12defdc
MD
2373 if ((prot & VM_PROT_NOSYNC) == 0)
2374 pmap_inval_done(&info);
2bb9cc6f
MD
2375 if (pv)
2376 free_pv_entry(pv);
4107b0c0 2377 lwkt_reltoken(&vm_token);
b12defdc 2378 vm_object_drop(pmap->pm_pteobj);
984263bc
MD
2379}
2380
2381/*
17cde63e
MD
2382 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
2383 * This code also assumes that the pmap has no pre-existing entry for this
2384 * VA.
2385 *
2386 * This code currently may only be used on user pmaps, not kernel_pmap.
4107b0c0
MD
2387 *
2388 * No requirements.
984263bc 2389 */
1b9d3514 2390void
17cde63e 2391pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
984263bc
MD
2392{
2393 unsigned *pte;
6ef943a3 2394 vm_paddr_t pa;
17cde63e
MD
2395 vm_page_t mpte;
2396 unsigned ptepindex;
2397 vm_offset_t ptepa;
0f7a3396 2398 pmap_inval_info info;
2bb9cc6f 2399 pv_entry_t pv;
0f7a3396 2400
b12defdc 2401 vm_object_hold(pmap->pm_pteobj);
4107b0c0 2402 lwkt_gettoken(&vm_token);
2bb9cc6f
MD
2403
2404 /*
2405 * This can block, get it before we do anything important.
2406 */
2407 if (pmap_initialized &&
2408 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
2409 pv = get_pv_entry();
2410 } else {
2411 pv = NULL;
2412 }
2413
0f7a3396 2414 pmap_inval_init(&info);
984263bc 2415
fbbaeba3
MD
2416 if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) {
2417 kprintf("Warning: pmap_enter_quick called on UVA with kernel_pmap\n");
7ce2998e 2418 print_backtrace(-1);
fbbaeba3
MD
2419 }
2420 if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) {
2421 kprintf("Warning: pmap_enter_quick called on KVA without kernel_pmap\n");
7ce2998e 2422 print_backtrace(-1);
fbbaeba3
MD
2423 }
2424
17cde63e
MD
2425 KKASSERT(va < UPT_MIN_ADDRESS); /* assert used on user pmaps only */
2426
984263bc 2427 /*
17cde63e
MD
2428 * Calculate the page table page (mpte), allocating it if necessary.
2429 *
2430 * A held page table page (mpte), or NULL, is passed onto the
2431 * section following.
984263bc
MD
2432 */
2433 if (va < UPT_MIN_ADDRESS) {
984263bc
MD
2434 /*
2435 * Calculate pagetable page index
2436 */
2437 ptepindex = va >> PDRSHIFT;
17cde63e
MD
2438
2439 do {
984263bc
MD
2440 /*
2441 * Get the page directory entry
2442 */
2443 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2444
2445 /*
2446 * If the page table page is mapped, we just increment
90244566 2447 * the wire count, and activate it.
984263bc
MD
2448 */
2449 if (ptepa) {
2450 if (ptepa & PG_PS)
2451 panic("pmap_enter_quick: unexpected mapping into 4MB page");
2452 if (pmap->pm_ptphint &&
17cde63e 2453 (pmap->pm_ptphint->pindex == ptepindex)) {
984263bc 2454 mpte = pmap->pm_ptphint;
2bb9cc6f 2455 vm_page_wire_quick(mpte);
984263bc 2456 } else {
2bb9cc6f
MD
2457 mpte = pmap_page_lookup(pmap->pm_pteobj,
2458 ptepindex);
984263bc 2459 pmap->pm_ptphint = mpte;
2bb9cc6f 2460 vm_page_wire_quick(mpte);
b12defdc 2461 vm_page_wakeup(mpte);
984263bc 2462 }
984263bc
MD
2463 } else {
2464 mpte = _pmap_allocpte(pmap, ptepindex);
2465 }
17cde63e 2466 } while (mpte == NULL);
984263bc
MD
2467 } else {
2468 mpte = NULL;
17cde63e 2469 /* this code path is not yet used */
984263bc
MD
2470 }
2471
2472 /*
17cde63e
MD
2473 * With a valid (and held) page directory page, we can just use
2474 * vtopte() to get to the pte. If the pte is already present
2475 * we do not disturb it.
984263bc
MD
2476 */
2477 pte = (unsigned *)vtopte(va);
17cde63e 2478 if (*pte & PG_V) {
984263bc 2479 if (mpte)
90244566 2480 pmap_unwire_pte(pmap, mpte, &info);
17cde63e
MD
2481 pa = VM_PAGE_TO_PHYS(m);
2482 KKASSERT(((*pte ^ pa) & PG_FRAME) == 0);
c2fb025d 2483 pmap_inval_done(&info);
4107b0c0 2484 lwkt_reltoken(&vm_token);
b12defdc 2485 vm_object_drop(pmap->pm_pteobj);
2bb9cc6f
MD
2486 if (pv)
2487 free_pv_entry(pv);
17cde63e 2488 return;
984263bc
MD
2489 }
2490
2491 /*
17cde63e 2492 * Enter on the PV list if part of our managed memory
984263bc 2493 */
2bb9cc6f
MD
2494 if (pmap_initialized &&
2495 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
2496 pmap_insert_entry(pmap, pv, va, mpte, m);
2497 pv = NULL;
17cde63e
MD
2498 vm_page_flag_set(m, PG_MAPPED);
2499 }
984263bc
MD
2500
2501 /*
2502 * Increment counters
2503 */
eec2b734 2504 ++pmap->pm_stats.resident_count;
984263bc
MD
2505
2506 pa = VM_PAGE_TO_PHYS(m);
2507
2508 /*
2509 * Now validate mapping with RO protection
2510 */
2511 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
2512 *pte = pa | PG_V | PG_U;
2513 else
2514 *pte = pa | PG_V | PG_U | PG_MANAGED;
17cde63e 2515/* pmap_inval_add(&info, pmap, va); shouldn't be needed inval->valid */
c2fb025d 2516 pmap_inval_done(&info);
2bb9cc6f
MD
2517 if (pv)
2518 free_pv_entry(pv);
4107b0c0 2519 lwkt_reltoken(&vm_token);
b12defdc 2520 vm_object_drop(pmap->pm_pteobj);
984263bc
MD
2521}
2522
2523/*
2524 * Make a temporary mapping for a physical address. This is only intended
2525 * to be used for panic dumps.
4107b0c0 2526 *
fb8345e6
MD
2527 * The caller is responsible for calling smp_invltlb().
2528 *
4107b0c0 2529 * No requirements.
984263bc
MD
2530 */
2531void *
8e5ea5f7 2532pmap_kenter_temporary(vm_paddr_t pa, long i)
984263bc 2533{
fb8345e6 2534 pmap_kenter_quick((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
984263bc
MD
2535 return ((void *)crashdumpmap);
2536}
2537
2538#define MAX_INIT_PT (96)
06ecca5a 2539
984263bc 2540/*
06ecca5a
MD
2541 * This routine preloads the ptes for a given object into the specified pmap.
2542 * This eliminates the blast of soft faults on process startup and
2543 * immediately after an mmap.
4107b0c0
MD
2544 *
2545 * No requirements.
984263bc 2546 */
1f804340
MD
2547static int pmap_object_init_pt_callback(vm_page_t p, void *data);
2548
984263bc 2549void
083a7402
MD
2550pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
2551 vm_object_t object, vm_pindex_t pindex,
2552 vm_size_t size, int limit)
984263bc 2553{
1f804340 2554 struct rb_vm_page_scan_info info;
287ebb09 2555 struct lwp *lp;
984263bc 2556 int psize;
984263bc 2557
54a764e8
MD
2558 /*
2559 * We can't preinit if read access isn't set or there is no pmap
2560 * or object.
2561 */
083a7402 2562 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
984263bc
MD
2563 return;
2564
54a764e8
MD
2565 /*
2566 * We can't preinit if the pmap is not the current pmap
2567 */
287ebb09
MD
2568 lp = curthread->td_lwp;
2569 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace))
54a764e8
MD
2570 return;
2571
984263bc
MD
2572 psize = i386_btop(size);
2573
2574 if ((object->type != OBJT_VNODE) ||
2575 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
2576 (object->resident_page_count > MAX_INIT_PT))) {
2577 return;
2578 }
2579
2580 if (psize + pindex > object->size) {
2581 if (object->size < pindex)
2582 return;
2583 psize = object->size - pindex;
2584 }
2585
1f804340
MD
2586 if (psize == 0)
2587 return;
06ecca5a 2588
984263bc 2589 /*
1f804340
MD
2590 * Use a red-black scan to traverse the requested range and load
2591 * any valid pages found into the pmap.
06ecca5a 2592 *
9acd5bbb
MD
2593 * We cannot safely scan the object's memq unless we are in a
2594 * critical section since interrupts can remove pages from objects.
984263bc 2595 */
1f804340
MD
2596 info.start_pindex = pindex;
2597 info.end_pindex = pindex + psize - 1;
2598 info.limit = limit;
2599 info.mpte = NULL;
2600 info.addr = addr;
2601 info.pmap = pmap;
2602
2f2d9e58 2603 vm_object_hold(object);
1f804340
MD
2604 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
2605 pmap_object_init_pt_callback, &info);
2f2d9e58 2606 vm_object_drop(object);
1f804340 2607}
06ecca5a 2608
4107b0c0
MD
2609/*
2610 * The caller must hold vm_token.
2611 */
1f804340
MD
2612static
2613int
2614pmap_object_init_pt_callback(vm_page_t p, void *data)
2615{
2616 struct rb_vm_page_scan_info *info = data;
2617 vm_pindex_t rel_index;
2618 /*
2619 * don't allow an madvise to blow away our really
2620 * free pages allocating pv entries.
2621 */
2622 if ((info->limit & MAP_PREFAULT_MADVISE) &&
2623 vmstats.v_free_count < vmstats.v_free_reserved) {
2624 return(-1);
984263bc 2625 }
0d987a03
MD
2626
2627 /*
2628 * Ignore list markers and ignore pages we cannot instantly
2629 * busy (while holding the object token).
2630 */
2631 if (p->flags & PG_MARKER)
2632 return 0;
b12defdc
MD
2633 if (vm_page_busy_try(p, TRUE))
2634 return 0;
1f804340 2635 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
b12defdc 2636 (p->flags & PG_FICTITIOUS) == 0) {
1f804340
MD
2637 if ((p->queue - p->pc) == PQ_CACHE)
2638 vm_page_deactivate(p);
1f804340 2639 rel_index = p->pindex - info->start_pindex;
17cde63e
MD
2640 pmap_enter_quick(info->pmap,
2641 info->addr + i386_ptob(rel_index), p);
1f804340 2642 }
b12defdc 2643 vm_page_wakeup(p);
1f804340 2644 return(0);
984263bc
MD
2645}
2646
2647/*
1b9d3514
MD
2648 * Return TRUE if the pmap is in shape to trivially
2649 * pre-fault the specified address.
2650 *
2651 * Returns FALSE if it would be non-trivial or if a
2652 * pte is already loaded into the slot.
4107b0c0
MD
2653 *
2654 * No requirements.
984263bc 2655 */
1b9d3514
MD
2656int
2657pmap_prefault_ok(pmap_t pmap, vm_offset_t addr)
984263bc 2658{
1b9d3514 2659 unsigned *pte;
4107b0c0 2660 int ret;
984263bc 2661
4107b0c0
MD
2662 lwkt_gettoken(&vm_token);
2663 if ((*pmap_pde(pmap, addr)) == 0) {
2664 ret = 0;
2665 } else {
2666 pte = (unsigned *) vtopte(addr);
2667 ret = (*pte) ? 0 : 1;
2668 }
2669 lwkt_reltoken(&vm_token);
2670 return(ret);
984263bc
MD
2671}
2672
2673/*
4107b0c0
MD
2674 * Change the wiring attribute for a map/virtual-adderss pair. The mapping
2675 * must already exist.
2676 *
2677 * No requirements.
984263bc
MD
2678 */
2679void
840de426 2680pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
984263bc 2681{
840de426 2682 unsigned *pte;
984263bc
MD
2683
2684 if (pmap == NULL)
2685 return;
2686
4107b0c0 2687 lwkt_gettoken(&vm_token);
984263bc
MD
2688 pte = pmap_pte(pmap, va);
2689
2690 if (wired && !pmap_pte_w(pte))
2691 pmap->pm_stats.wired_count++;
2692 else if (!wired && pmap_pte_w(pte))
2693 pmap->pm_stats.wired_count--;
2694
2695 /*
2696 * Wiring is not a hardware characteristic so there is no need to
0f7a3396
MD
2697 * invalidate TLB. However, in an SMP environment we must use
2698 * a locked bus cycle to update the pte (if we are not using
2699 * the pmap_inval_*() API that is)... it's ok to do this for simple
2700 * wiring changes.
984263bc 2701 */
0f7a3396
MD
2702#ifdef SMP
2703 if (wired)
2704 atomic_set_int(pte, PG_W);
2705 else
2706 atomic_clear_int(pte, PG_W);
2707#else
2708 if (wired)
2709 atomic_set_int_nonlocked(pte, PG_W);
2710 else
2711 atomic_clear_int_nonlocked(pte, PG_W);
2712#endif
4107b0c0 2713 lwkt_reltoken(&vm_token);
984263bc
MD
2714}
2715
984263bc 2716/*
4107b0c0
MD
2717 * Copy the range specified by src_addr/len from the source map to the
2718 * range dst_addr/len in the destination map.
2719 *
2720 * This routine is only advisory and need not do anything.
984263bc 2721 *
4107b0c0 2722 * No requirements.
984263bc 2723 */
984263bc 2724void
840de426 2725pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
4107b0c0 2726 vm_size_t len, vm_offset_t src_addr)
984263bc 2727{
4107b0c0 2728 /* does nothing */
984263bc
MD
2729}
2730
2731/*
4107b0c0
MD
2732 * Zero the specified PA by mapping the page into KVM and clearing its
2733 * contents.
e0e69b7d 2734 *
4107b0c0 2735 * No requirements.
984263bc
MD
2736 */
2737void
6ef943a3 2738pmap_zero_page(vm_paddr_t phys)
984263bc 2739{
85100692 2740 struct mdglobaldata *gd = mdcpu;
17a9f566 2741
e0e69b7d 2742 crit_enter();
85100692
MD
2743 if (*(int *)gd->gd_CMAP3)
2744 panic("pmap_zero_page: CMAP3 busy");
85100692 2745 *(int *)gd->gd_CMAP3 =
17a9f566 2746 PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
85100692 2747 cpu_invlpg(gd->gd_CADDR3);
1fa15583 2748 bzero(gd->gd_CADDR3, PAGE_SIZE);
85100692 2749 *(int *) gd->gd_CMAP3 = 0;
e0e69b7d 2750 crit_exit();
8100156a
MD
2751}
2752
2753/*
4107b0c0 2754 * Assert that a page is empty, panic if it isn't.
8100156a 2755 *
4107b0c0 2756 * No requirements.
8100156a
MD
2757 */
2758void
2759pmap_page_assertzero(vm_paddr_t phys)
2760{
2761 struct mdglobaldata *gd = mdcpu;
2762 int i;
2763
2764 crit_enter();
2765 if (*(int *)gd->gd_CMAP3)
2766 panic("pmap_zero_page: CMAP3 busy");
2767 *(int *)gd->gd_CMAP3 =
2768 PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2769 cpu_invlpg(gd->gd_CADDR3);
2770 for (i = 0; i < PAGE_SIZE; i += 4) {
2771 if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
ed20d0e3 2772 panic("pmap_page_assertzero() @ %p not zero!",
8100156a
MD
2773 (void *)gd->gd_CADDR3);
2774 }
2775 }
2776 *(int *) gd->gd_CMAP3 = 0;
2777 crit_exit();
984263bc
MD
2778}
2779
2780/*
4107b0c0
MD
2781 * Zero part of a physical page by mapping it into memory and clearing
2782 * its contents with bzero.
e0e69b7d 2783 *
4107b0c0 2784 * off and size may not cover an area beyond a single hardware page.
984263bc 2785 *
4107b0c0 2786 * No requirements.
984263bc
MD
2787 */
2788void
6ef943a3 2789pmap_zero_page_area(vm_paddr_t phys, int off, int size)
984263bc 2790{
85100692 2791 struct mdglobaldata *gd = mdcpu;
17a9f566 2792
e0e69b7d 2793 crit_enter();
85100692
MD
2794 if (*(int *) gd->gd_CMAP3)
2795 panic("pmap_zero_page: CMAP3 busy");
85100692
MD
2796 *(int *) gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2797 cpu_invlpg(gd->gd_CADDR3);
1fa15583 2798 bzero((char *)gd->gd_CADDR3 + off, size);
85100692 2799 *(int *) gd->gd_CMAP3 = 0;
e0e69b7d 2800 crit_exit();
984263bc
MD
2801}
2802
2803/*
4107b0c0
MD
2804 * Copy the physical page from the source PA to the target PA.
2805 * This function may be called from an interrupt. No locking
2806 * is required.
e0e69b7d 2807 *
4107b0c0 2808 * No requirements.
984263bc
MD
2809 */
2810void
6ef943a3 2811pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
984263bc 2812{
85100692 2813 struct mdglobaldata *gd = mdcpu;
17a9f566 2814
e0e69b7d 2815 crit_enter();
85100692
MD
2816 if (*(int *) gd->gd_CMAP1)
2817 panic("pmap_copy_page: CMAP1 busy");
2818 if (*(int *) gd->gd_CMAP2)
2819 panic("pmap_copy_page: CMAP2 busy");
984263bc 2820
85100692
MD
2821 *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2822 *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
984263bc 2823
85100692
MD
2824 cpu_invlpg(gd->gd_CADDR1);
2825 cpu_invlpg(gd->gd_CADDR2);
984263bc 2826
85100692 2827 bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
984263bc 2828
85100692
MD
2829 *(int *) gd->gd_CMAP1 = 0;
2830 *(int *) gd->gd_CMAP2 = 0;
e0e69b7d 2831 crit_exit();
984263bc
MD
2832}
2833
f6bf3af1 2834/*
4107b0c0
MD
2835 * Copy the physical page from the source PA to the target PA.
2836 * This function may be called from an interrupt. No locking
2837 * is required.
f6bf3af1 2838 *
4107b0c0 2839 * No requirements.
f6bf3af1
MD
2840 */
2841void
2842pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
2843{
2844 struct mdglobaldata *gd = mdcpu;
2845
2846 crit_enter();
2847 if (*(int *) gd->gd_CMAP1)
2848 panic("pmap_copy_page: CMAP1 busy");
2849 if (*(int *) gd->gd_CMAP2)
2850 panic("pmap_copy_page: CMAP2 busy");
2851
2852 *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2853 *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2854
2855 cpu_invlpg(gd->gd_CADDR1);
2856 cpu_invlpg(gd->gd_CADDR2);
2857
2858 bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
2859 (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
2860 bytes);
2861
2862 *(int *) gd->gd_CMAP1 = 0;
2863 *(int *) gd->gd_CMAP2 = 0;
2864 crit_exit();
2865}
2866
984263bc
MD
2867/*
2868 * Returns true if the pmap's pv is one of the first
2869 * 16 pvs linked to from this page. This count may
2870 * be changed upwards or downwards in the future; it
2871 * is only necessary that true be returned for a small
2872 * subset of pmaps for proper page aging.
4107b0c0
MD
2873 *
2874 * No requirements.
984263bc
MD
2875 */
2876boolean_t
840de426 2877pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
984263bc
MD
2878{
2879 pv_entry_t pv;
2880 int loops = 0;
984263bc
MD
2881
2882 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2883 return FALSE;
2884
4107b0c0 2885 lwkt_gettoken(&vm_token);
984263bc
MD
2886 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2887 if (pv->pv_pmap == pmap) {
11502947 2888 lwkt_reltoken(&vm_token);
984263bc
MD
2889 return TRUE;
2890 }
2891 loops++;
2892 if (loops >= 16)
2893 break;
2894 }
4107b0c0 2895 lwkt_reltoken(&vm_token);
984263bc
MD
2896 return (FALSE);
2897}
2898
984263bc
MD
2899/*
2900 * Remove all pages from specified address space
2901 * this aids process exit speeds. Also, this code
2902 * is special cased for current process only, but
2903 * can have the more generic (and slightly slower)
2904 * mode enabled. This is much faster than pmap_remove
2905 * in the case of running down an entire address space.
4107b0c0
MD
2906 *
2907 * No requirements.
984263bc
MD
2908 */
2909void
840de426 2910pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
984263bc 2911{
287ebb09 2912 struct lwp *lp;
984263bc
MD
2913 unsigned *pte, tpte;
2914 pv_entry_t pv, npv;
984263bc 2915 vm_page_t m;
0f7a3396 2916 pmap_inval_info info;
4a22e893 2917 int iscurrentpmap;
8790d7d8 2918 int32_t save_generation;
984263bc 2919
287ebb09
MD
2920 lp = curthread->td_lwp;
2921 if (lp && pmap == vmspace_pmap(lp->lwp_vmspace))
4a22e893
MD
2922 iscurrentpmap = 1;
2923 else
2924 iscurrentpmap = 0;
984263bc 2925
b12defdc
MD
2926 if (pmap->pm_pteobj)
2927 vm_object_hold(pmap->pm_pteobj);
4107b0c0 2928 lwkt_gettoken(&vm_token);
0f7a3396 2929 pmap_inval_init(&info);
b12defdc 2930
4a22e893 2931 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
984263bc
MD
2932 if (pv->pv_va >= eva || pv->pv_va < sva) {
2933 npv = TAILQ_NEXT(pv, pv_plist);
2934 continue;
2935 }
2936
8790d7d8
MD
2937 KKASSERT(pmap == pv->pv_pmap);
2938
4a22e893
MD
2939 if (iscurrentpmap)
2940 pte = (unsigned *)vtopte(pv->pv_va);
2941 else
8790d7d8 2942 pte = pmap_pte_quick(pmap, pv->pv_va);
5926987a 2943 KKASSERT(*pte);
c2fb025d 2944 pmap_inval_interlock(&info, pmap, pv->pv_va);
984263bc 2945
4a22e893
MD
2946 /*
2947 * We cannot remove wired pages from a process' mapping
2948 * at this time
2949 */
17cde63e 2950 if (*pte & PG_W) {
c2fb025d 2951 pmap_inval_deinterlock(&info, pmap);
984263bc
MD
2952 npv = TAILQ_NEXT(pv, pv_plist);
2953 continue;
2954 }
2247fe02 2955 KKASSERT(*pte);
17cde63e 2956 tpte = loadandclear(pte);
c2fb025d 2957 pmap_inval_deinterlock(&info, pmap);
984263bc
MD
2958
2959 m = PHYS_TO_VM_PAGE(tpte);
5926987a 2960 test_m_maps_pv(m, pv);
984263bc
MD
2961
2962 KASSERT(m < &vm_page_array[vm_page_array_size],
2963 ("pmap_remove_pages: bad tpte %x", tpte));
2964
eec2b734
MD
2965 KKASSERT(pmap->pm_stats.resident_count > 0);
2966 --pmap->pm_stats.resident_count;
984263bc
MD
2967
2968 /*
2969 * Update the vm_page_t clean and reference bits.
2970 */
2971 if (tpte & PG_M) {
2972 vm_page_dirty(m);
2973 }
2974
984263bc 2975 npv = TAILQ_NEXT(pv, pv_plist);
5926987a
MD
2976#ifdef PMAP_DEBUG
2977 KKASSERT(pv->pv_m == m);
2978 KKASSERT(pv->pv_pmap == pmap);
2979#endif
8790d7d8
MD
2980 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2981 save_generation = ++pmap->pm_generation;
984263bc
MD
2982
2983 m->md.pv_list_count--;
cef01e15
MD
2984 if (m->object)
2985 atomic_add_int(&m->object->agg_pv_list_count, -1);
984263bc 2986 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
17cde63e 2987 if (TAILQ_EMPTY(&m->md.pv_list))
984263bc 2988 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
984263bc 2989
8790d7d8 2990 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem, &info);
984263bc 2991 free_pv_entry(pv);
8790d7d8
MD
2992
2993 /*
2994 * Restart the scan if we blocked during the unuse or free
2995 * calls and other removals were made.
2996 */
2997 if (save_generation != pmap->pm_generation) {
2998 kprintf("Warning: pmap_remove_pages race-A avoided\n");
5926987a 2999 npv = TAILQ_FIRST(&pmap->pm_pvlist);
8790d7d8 3000 }
984263bc 3001 }
c2fb025d 3002 pmap_inval_done(&info);
4107b0c0 3003 lwkt_reltoken(&vm_token);
b12defdc
MD
3004 if (pmap->pm_pteobj)
3005 vm_object_drop(pmap->pm_pteobj);
984263bc
MD
3006}
3007
3008/*
3009 * pmap_testbit tests bits in pte's
5e8d0349 3010 * note that the testbit/clearbit routines are inline,
984263bc 3011 * and a lot of things compile-time evaluate.
4107b0c0
MD
3012 *
3013 * The caller must hold vm_token.
984263bc
MD
3014 */
3015static boolean_t
840de426 3016pmap_testbit(vm_page_t m, int bit)
984263bc
MD
3017{
3018 pv_entry_t pv;
3019 unsigned *pte;
984263bc
MD
3020
3021 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
3022 return FALSE;
3023
3024 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
3025 return FALSE;
3026
984263bc
MD
3027 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
3028 /*
3029 * if the bit being tested is the modified bit, then
3030 * mark clean_map and ptes as never
3031 * modified.
3032 */
3033 if (bit & (PG_A|PG_M)) {
3034 if (!pmap_track_modified(pv->pv_va))
3035 continue;
3036 }
3037
3038#if defined(PMAP_DIAGNOSTIC)
3039 if (!pv->pv_pmap) {
d557216f
MD
3040 kprintf("Null pmap (tb) at va: %p\n",
3041 (void *)pv->pv_va);
984263bc
MD
3042 continue;
3043 }
3044#endif
3045 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
74b9d1ec 3046 if (*pte & bit) {
984263bc 3047 return TRUE;
74b9d1ec 3048 }
984263bc 3049 }
984263bc
MD
3050 return (FALSE);
3051}
3052
3053/*
4107b0c0
MD
3054 * This routine is used to modify bits in ptes
3055 *
3056 * The caller must hold vm_token.
984263bc
MD
3057 */
3058static __inline void
5e8d0349 3059pmap_clearbit(vm_page_t m, int bit)
984263bc 3060{
0f7a3396 3061 struct pmap_inval_info info;
840de426
MD
3062 pv_entry_t pv;
3063 unsigned *pte;
5e8d0349 3064 unsigned pbits;
984263bc
MD
3065
3066 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
3067 return;
3068
0f7a3396 3069 pmap_inval_init(&info);
984263bc
MD
3070
3071 /*
3072 * Loop over all current mappings setting/clearing as appropos If
3073 * setting RO do we need to clear the VAC?
3074 */
3075 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
3076 /*
3077 * don't write protect pager mappings
3078 */
5e8d0349 3079 if (bit == PG_RW) {
984263bc
MD
3080 if (!pmap_track_modified(pv->pv_va))
3081 continue;
3082 }
3083
3084#if defined(PMAP_DIAGNOSTIC)
3085 if (!pv->pv_pmap) {
d557216f
MD
3086 kprintf("Null pmap (cb) at va: %p\n",
3087 (void *)pv->pv_va);
984263bc
MD
3088 continue;
3089 }
3090#endif
3091
0f7a3396
MD
3092 /*
3093 * Careful here. We can use a locked bus instruction to
3094 * clear PG_A or PG_M safely but we need to synchronize
3095 * with the target cpus when we mess with PG_RW.
70fc5283
MD
3096 *
3097 * We do not have to force synchronization when clearing
3098 * PG_M even for PTEs generated via virtual memory maps,
3099 * because the virtual kernel will invalidate the pmap
3100 * entry when/if it needs to resynchronize the Modify bit.
0f7a3396 3101 */
70fc5283 3102 if (bit & PG_RW)
c2fb025d 3103 pmap_inval_interlock(&info, pv->pv_pmap, pv->pv_va);
17cde63e
MD
3104 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3105again:
5e8d0349
MD
3106 pbits = *pte;
3107 if (pbits & bit) {
3108 if (bit == PG_RW) {
17cde63e 3109 if (pbits & PG_M) {
5e8d0349 3110 vm_page_dirty(m);
17cde63e
MD
3111 atomic_clear_int(pte, PG_M|PG_RW);
3112 } else {
3113 /*
3114 * The cpu may be trying to set PG_M
3115 * simultaniously with our clearing
3116 * of PG_RW.
3117 */
3118 if (!atomic_cmpset_int(pte, pbits,
3119 pbits & ~PG_RW))
3120 goto again;
3121 }
5e8d0349
MD
3122 } else if (bit == PG_M) {
3123 /*
70fc5283
MD
3124 * We could also clear PG_RW here to force
3125 * a fault on write to redetect PG_M for
3126 * virtual kernels, but it isn't necessary
3127 * since virtual kernels invalidate the pte
3128 * when they clear the VPTE_M bit in their
3129 * virtual page tables.
5e8d0349 3130 */
70fc5283 3131 atomic_clear_int(pte, PG_M);
5e8d0349
MD
3132 } else {
3133 atomic_clear_int(pte, bit);
984263bc
MD
3134 }
3135 }
c2fb025d
MD
3136 if (bit & PG_RW)
3137 pmap_inval_deinterlock(&info, pv->pv_pmap);
984263bc 3138 }
c2fb025d 3139 pmap_inval_done(&info);
984263bc
MD
3140}
3141
3142/*
4107b0c0 3143 * Lower the permission for all mappings to a given page.
984263bc 3144 *
4107b0c0 3145 * No requirements.
984263bc
MD
3146 */
3147void
3148pmap_page_protect(vm_page_t m, vm_prot_t prot)
3149{
3150 if ((prot & VM_PROT_WRITE) == 0) {
4107b0c0 3151 lwkt_gettoken(&vm_token);
984263bc 3152 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
5e8d0349 3153 pmap_clearbit(m, PG_RW);
17cde63e 3154 vm_page_flag_clear(m, PG_WRITEABLE);
984263bc
MD
3155 } else {
3156 pmap_remove_all(m);
3157 }
4107b0c0 3158 lwkt_reltoken(&vm_token);
984263bc
MD
3159 }
3160}
3161
4107b0c0
MD
3162/*
3163 * Return the physical address given a physical page index.
3164 *
3165 * No requirements.
3166 */
6ef943a3 3167vm_paddr_t
cfd17028 3168pmap_phys_address(vm_pindex_t ppn)
984263bc
MD
3169{
3170 return (i386_ptob(ppn));
3171}
3172
3173/*
4107b0c0
MD
3174 * Return a count of reference bits for a page, clearing those bits.
3175 * It is not necessary for every reference bit to be cleared, but it
3176 * is necessary that 0 only be returned when there are truly no
3177 * reference bits set.
984263bc 3178 *
4107b0c0 3179 * No requirements.
984263bc
MD
3180 */
3181int