This is a major revamping of the pageout and low-memory handling code.
[dragonfly.git] / sys / platform / vkernel / platform / pmap.c
CommitLineData
e4a473f1
MD
1/*
2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
3 * Copyright (c) 1991 Regents of the University of California.
4 * All rights reserved.
5 * Copyright (c) 1994 John S. Dyson
6 * All rights reserved.
7 * Copyright (c) 1994 David Greenman
8 * All rights reserved.
9 * Copyright (c) 2004-2006 Matthew Dillon
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in
20 * the documentation and/or other materials provided with the
21 * distribution.
22 * 3. Neither the name of The DragonFly Project nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific, prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
35 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
36 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
40 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
cfd17028 41 * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.31 2008/08/25 17:01:40 dillon Exp $
d6c96d4d
MD
42 */
43/*
44 * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting
45 * the PTE in the page table, because a cpu synchronization might be required.
46 * The actual invalidation is delayed until the following call or flush. In
47 * the VKERNEL build this function is called prior to adjusting the PTE and
48 * invalidates the table synchronously (not delayed), and is not SMP safe
49 * as a consequence.
e4a473f1
MD
50 */
51
52#include <sys/types.h>
53#include <sys/systm.h>
54#include <sys/kernel.h>
55#include <sys/stat.h>
56#include <sys/mman.h>
57#include <sys/vkernel.h>
58#include <sys/proc.h>
59#include <sys/thread.h>
60#include <sys/user.h>
135d7199 61#include <sys/vmspace.h>
e4a473f1
MD
62
63#include <vm/pmap.h>
64#include <vm/vm_page.h>
65#include <vm/vm_extern.h>
66#include <vm/vm_kern.h>
67#include <vm/vm_object.h>
68#include <vm/vm_zone.h>
69#include <vm/vm_pageout.h>
70
71#include <machine/md_var.h>
72#include <machine/pcb.h>
73#include <machine/pmap_inval.h>
74#include <machine/globaldata.h>
75
e3161323
MD
76#include <sys/sysref2.h>
77
e4a473f1
MD
78#include <assert.h>
79
80struct pmap kernel_pmap;
81
82static struct vm_zone pvzone;
83static struct vm_object pvzone_obj;
84static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list);
85static int pv_entry_count;
86static int pv_entry_max;
87static int pv_entry_high_water;
88static int pmap_pagedaemon_waken;
89static boolean_t pmap_initialized = FALSE;
90static int protection_codes[8];
91
92static void i386_protection_init(void);
93static void pmap_remove_all(vm_page_t m);
94static int pmap_release_free_page(struct pmap *pmap, vm_page_t p);
95
96#define MINPV 2048
97#ifndef PMAP_SHPGPERPROC
98#define PMAP_SHPGPERPROC 200
99#endif
100
101#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
102
103#define pte_prot(m, p) \
104 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)])
105
106void
107pmap_init(void)
108{
109 int i;
110 struct pv_entry *pvinit;
111
112 for (i = 0; i < vm_page_array_size; i++) {
113 vm_page_t m;
114
115 m = &vm_page_array[i];
116 TAILQ_INIT(&m->md.pv_list);
117 m->md.pv_list_count = 0;
118 }
119
120 i = vm_page_array_size;
121 if (i < MINPV)
122 i = MINPV;
123 pvinit = (struct pv_entry *)kmem_alloc(&kernel_map, i*sizeof(*pvinit));
124 zbootinit(&pvzone, "PV ENTRY", sizeof(*pvinit), pvinit, i);
125 pmap_initialized = TRUE;
126}
127
128void
129pmap_init2(void)
130{
131 int shpgperproc = PMAP_SHPGPERPROC;
132
133 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
134 pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
135 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
136 pv_entry_high_water = 9 * (pv_entry_max / 10);
137 zinitna(&pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
138}
139
140/*
141 * Bootstrap the kernel_pmap so it can be used with pmap_enter().
142 *
71152ac6
MD
143 * NOTE! pm_pdir for the kernel pmap is offset so VA's translate
144 * directly into PTD indexes (PTA is also offset for the same reason).
145 * This is necessary because, for now, KVA is not mapped at address 0.
146 *
e4a473f1
MD
147 * Page table pages are not managed like they are in normal pmaps, so
148 * no pteobj is needed.
149 */
150void
151pmap_bootstrap(void)
152{
71152ac6 153 vm_pindex_t i = (vm_offset_t)KernelPTD >> PAGE_SHIFT;
e4a473f1 154
71152ac6 155 kernel_pmap.pm_pdir = KernelPTD - (KvaStart >> SEG_SHIFT);
e4a473f1
MD
156 kernel_pmap.pm_pdirpte = KernelPTA[i];
157 kernel_pmap.pm_count = 1;
158 kernel_pmap.pm_active = (cpumask_t)-1;
159 TAILQ_INIT(&kernel_pmap.pm_pvlist);
160 i386_protection_init();
161}
162
163/*
164 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we
165 * just dummy it up so it works well enough for fork().
166 *
167 * In DragonFly, process pmaps may only be used to manipulate user address
168 * space, never kernel address space.
169 */
170void
171pmap_pinit0(struct pmap *pmap)
172{
173 pmap_pinit(pmap);
174}
175
176/************************************************************************
177 * Procedures to manage whole physical maps *
178 ************************************************************************
179 *
180 * Initialize a preallocated and zeroed pmap structure,
181 * such as one in a vmspace structure.
182 */
183void
184pmap_pinit(struct pmap *pmap)
185{
186 vm_page_t ptdpg;
187 int npages;
188
189 /*
190 * No need to allocate page table space yet but we do need a valid
191 * page directory table.
192 */
193 if (pmap->pm_pdir == NULL) {
194 pmap->pm_pdir =
195 (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
196 }
197
198 /*
199 * allocate object for the pte array and page directory
200 */
201 npages = VPTE_PAGETABLE_SIZE +
202 (VM_MAX_USER_ADDRESS / PAGE_SIZE) * sizeof(vpte_t);
203 npages = (npages + PAGE_MASK) / PAGE_SIZE;
204
205 if (pmap->pm_pteobj == NULL)
206 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, npages);
207 pmap->pm_pdindex = npages - 1;
208
209 /*
210 * allocate the page directory page
211 */
212 ptdpg = vm_page_grab(pmap->pm_pteobj, pmap->pm_pdindex,
213 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
214
215 ptdpg->wire_count = 1;
216 ++vmstats.v_wire_count;
217
218 /* not usually mapped */
219 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY);
220 ptdpg->valid = VM_PAGE_BITS_ALL;
221
222 pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
135d7199 223 pmap->pm_pdirpte = KernelPTA[(vm_offset_t)pmap->pm_pdir >> PAGE_SHIFT];
e4a473f1
MD
224 if ((ptdpg->flags & PG_ZERO) == 0)
225 bzero(pmap->pm_pdir, PAGE_SIZE);
226
227 pmap->pm_count = 1;
228 pmap->pm_active = 0;
229 pmap->pm_ptphint = NULL;
24eb47e0 230 pmap->pm_cpucachemask = 0;
e4a473f1
MD
231 TAILQ_INIT(&pmap->pm_pvlist);
232 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
eec2b734 233 pmap->pm_stats.resident_count = 1;
e4a473f1
MD
234}
235
236/*
e3161323
MD
237 * Clean up a pmap structure so it can be physically freed
238 */
239void
240pmap_puninit(pmap_t pmap)
241{
242 if (pmap->pm_pdir) {
243 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE);
244 pmap->pm_pdir = NULL;
245 }
246 if (pmap->pm_pteobj) {
247 vm_object_deallocate(pmap->pm_pteobj);
248 pmap->pm_pteobj = NULL;
249 }
250}
251
252
253/*
e4a473f1
MD
254 * Wire in kernel global address entries. To avoid a race condition
255 * between pmap initialization and pmap_growkernel, this procedure
256 * adds the pmap to the master list (which growkernel scans to update),
257 * then copies the template.
258 *
259 * In a virtual kernel there are no kernel global address entries.
260 */
261void
262pmap_pinit2(struct pmap *pmap)
263{
264 crit_enter();
265 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode);
266 crit_exit();
267}
268
269/*
270 * Release all resources held by the given physical map.
271 *
272 * Should only be called if the map contains no valid mappings.
273 */
274static int pmap_release_callback(struct vm_page *p, void *data);
275
276void
277pmap_release(struct pmap *pmap)
278{
aaf8b91f 279 struct mdglobaldata *gd = mdcpu;
e4a473f1
MD
280 vm_object_t object = pmap->pm_pteobj;
281 struct rb_vm_page_scan_info info;
282
283 KKASSERT(pmap != &kernel_pmap);
284
285#if defined(DIAGNOSTIC)
286 if (object->ref_count != 1)
287 panic("pmap_release: pteobj reference count != 1");
288#endif
aaf8b91f
MD
289 /*
290 * Once we destroy the page table, the mapping becomes invalid.
24eb47e0
MD
291 * Don't waste time doing a madvise to invalidate the mapping, just
292 * set cpucachemask to 0.
aaf8b91f
MD
293 */
294 if (pmap->pm_pdir == gd->gd_PT1pdir) {
295 gd->gd_PT1pdir = NULL;
296 *gd->gd_PT1pde = 0;
297 /* madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); */
298 }
299 if (pmap->pm_pdir == gd->gd_PT2pdir) {
300 gd->gd_PT2pdir = NULL;
301 *gd->gd_PT2pde = 0;
302 /* madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); */
303 }
eec2b734
MD
304 if (pmap->pm_pdir == gd->gd_PT3pdir) {
305 gd->gd_PT3pdir = NULL;
306 *gd->gd_PT3pde = 0;
307 /* madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL); */
308 }
e4a473f1
MD
309
310 info.pmap = pmap;
311 info.object = object;
312 crit_enter();
313 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode);
314 crit_exit();
315
316 do {
317 crit_enter();
318 info.error = 0;
319 info.mpte = NULL;
320 info.limit = object->generation;
321
322 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
323 pmap_release_callback, &info);
324 if (info.error == 0 && info.mpte) {
325 if (!pmap_release_free_page(pmap, info.mpte))
326 info.error = 1;
327 }
328 crit_exit();
329 } while (info.error);
00835518
MD
330
331 /*
332 * Leave the KVA reservation for pm_pdir cached for later reuse.
333 */
e7f2d7de 334 pmap->pm_pdirpte = 0;
24eb47e0 335 pmap->pm_cpucachemask = 0;
e4a473f1
MD
336}
337
eec2b734
MD
338/*
339 * Callback to release a page table page backing a directory
340 * entry.
341 */
e4a473f1
MD
342static int
343pmap_release_callback(struct vm_page *p, void *data)
344{
345 struct rb_vm_page_scan_info *info = data;
346
347 if (p->pindex == info->pmap->pm_pdindex) {
348 info->mpte = p;
349 return(0);
350 }
351 if (!pmap_release_free_page(info->pmap, p)) {
352 info->error = 1;
353 return(-1);
354 }
355 if (info->object->generation != info->limit) {
356 info->error = 1;
357 return(-1);
358 }
359 return(0);
360}
361
362/*
363 * Retire the given physical map from service. Should only be called if
364 * the map contains no valid mappings.
365 */
366void
367pmap_destroy(pmap_t pmap)
368{
369 int count;
370
371 if (pmap == NULL)
372 return;
373
374 count = --pmap->pm_count;
375 if (count == 0) {
376 pmap_release(pmap);
377 panic("destroying a pmap is not yet implemented");
378 }
379}
380
381/*
382 * Add a reference to the specified pmap.
383 */
384void
385pmap_reference(pmap_t pmap)
386{
387 if (pmap != NULL) {
388 pmap->pm_count++;
389 }
390}
391
392/************************************************************************
135d7199
MD
393 * VMSPACE MANAGEMENT *
394 ************************************************************************
395 *
396 * The VMSPACE management we do in our virtual kernel must be reflected
397 * in the real kernel. This is accomplished by making vmspace system
398 * calls to the real kernel.
399 */
400void
401cpu_vmspace_alloc(struct vmspace *vm)
402{
403 int r;
404 void *rp;
405
406#define LAST_EXTENT (VM_MAX_USER_ADDRESS - 0x80000000)
407
4e7c41c5 408 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0)
135d7199
MD
409 panic("vmspace_create() failed");
410
4e7c41c5 411 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
135d7199
MD
412 PROT_READ|PROT_WRITE,
413 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
414 MemImageFd, 0);
415 if (rp == MAP_FAILED)
416 panic("vmspace_mmap: failed1");
571989b5
MD
417 vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
418 MADV_NOSYNC, 0);
4e7c41c5 419 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
135d7199
MD
420 PROT_READ|PROT_WRITE,
421 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
422 MemImageFd, 0x40000000);
423 if (rp == MAP_FAILED)
424 panic("vmspace_mmap: failed2");
571989b5
MD
425 vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
426 MADV_NOSYNC, 0);
4e7c41c5 427 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
135d7199
MD
428 PROT_READ|PROT_WRITE,
429 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
430 MemImageFd, 0x80000000);
571989b5
MD
431 vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
432 MADV_NOSYNC, 0);
135d7199
MD
433 if (rp == MAP_FAILED)
434 panic("vmspace_mmap: failed3");
435
4e7c41c5
MD
436 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
437 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
438 if (r < 0)
439 panic("vmspace_mcontrol: failed1");
4e7c41c5
MD
440 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
441 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
442 if (r < 0)
443 panic("vmspace_mcontrol: failed2");
4e7c41c5
MD
444 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
445 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
446 if (r < 0)
447 panic("vmspace_mcontrol: failed3");
448}
449
450void
451cpu_vmspace_free(struct vmspace *vm)
452{
4e7c41c5 453 if (vmspace_destroy(&vm->vm_pmap) < 0)
135d7199
MD
454 panic("vmspace_destroy() failed");
455}
456
457/************************************************************************
e4a473f1
MD
458 * Procedures which operate directly on the kernel PMAP *
459 ************************************************************************/
460
461/*
462 * This maps the requested page table and gives us access to it.
eec2b734
MD
463 *
464 * This routine can be called from a potentially preempting interrupt
465 * thread or from a normal thread.
e4a473f1
MD
466 */
467static vpte_t *
71152ac6 468get_ptbase(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
469{
470 struct mdglobaldata *gd = mdcpu;
471
472 if (pmap == &kernel_pmap) {
71152ac6
MD
473 KKASSERT(va >= KvaStart && va < KvaEnd);
474 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 475 } else if (pmap->pm_pdir == gd->gd_PT1pdir) {
24eb47e0
MD
476 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
477 *gd->gd_PT1pde = pmap->pm_pdirpte;
478 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
479 atomic_set_int(&pmap->pm_cpucachemask, gd->mi.gd_cpumask);
480 }
71152ac6 481 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1 482 } else if (pmap->pm_pdir == gd->gd_PT2pdir) {
24eb47e0
MD
483 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
484 *gd->gd_PT2pde = pmap->pm_pdirpte;
485 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
486 atomic_set_int(&pmap->pm_cpucachemask, gd->mi.gd_cpumask);
487 }
71152ac6 488 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
489 }
490
491 /*
eec2b734
MD
492 * If we aren't running from a potentially preempting interrupt,
493 * load a new page table directory into the page table cache
e4a473f1 494 */
eec2b734
MD
495 if (gd->mi.gd_intr_nesting_level == 0 &&
496 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0) {
497 /*
498 * Choose one or the other and map the page table
499 * in the KVA space reserved for it.
500 */
501 if ((gd->gd_PTflip = 1 - gd->gd_PTflip) == 0) {
502 gd->gd_PT1pdir = pmap->pm_pdir;
503 *gd->gd_PT1pde = pmap->pm_pdirpte;
504 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
505 atomic_set_int(&pmap->pm_cpucachemask,
506 gd->mi.gd_cpumask);
507 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
508 } else {
509 gd->gd_PT2pdir = pmap->pm_pdir;
510 *gd->gd_PT2pde = pmap->pm_pdirpte;
511 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
512 atomic_set_int(&pmap->pm_cpucachemask,
513 gd->mi.gd_cpumask);
514 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
515 }
516 }
e4a473f1 517
eec2b734
MD
518 /*
519 * If we are running from a preempting interrupt use a private
520 * map. The caller must be in a critical section.
521 */
522 KKASSERT(IN_CRITICAL_SECT(curthread));
523 if (pmap->pm_pdir == gd->gd_PT3pdir) {
524 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
525 *gd->gd_PT3pde = pmap->pm_pdirpte;
526 madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL);
527 atomic_set_int(&pmap->pm_cpucachemask,
528 gd->mi.gd_cpumask);
529 }
e4a473f1 530 } else {
eec2b734
MD
531 gd->gd_PT3pdir = pmap->pm_pdir;
532 *gd->gd_PT3pde = pmap->pm_pdirpte;
533 madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL);
534 atomic_set_int(&pmap->pm_cpucachemask,
535 gd->mi.gd_cpumask);
e4a473f1 536 }
eec2b734 537 return(gd->gd_PT3map + (va >> PAGE_SHIFT));
e4a473f1
MD
538}
539
540static vpte_t *
71152ac6 541get_ptbase1(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
542{
543 struct mdglobaldata *gd = mdcpu;
544
545 if (pmap == &kernel_pmap) {
71152ac6
MD
546 KKASSERT(va >= KvaStart && va < KvaEnd);
547 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 548 } else if (pmap->pm_pdir == gd->gd_PT1pdir) {
d5b116a0
MD
549 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
550 *gd->gd_PT1pde = pmap->pm_pdirpte;
551 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
552 atomic_set_int(&pmap->pm_cpucachemask, gd->mi.gd_cpumask);
553 }
71152ac6 554 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1
MD
555 }
556 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
557 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
558 gd->gd_PT1pdir = pmap->pm_pdir;
559 *gd->gd_PT1pde = pmap->pm_pdirpte;
560 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
71152ac6 561 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1
MD
562}
563
564static vpte_t *
71152ac6 565get_ptbase2(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
566{
567 struct mdglobaldata *gd = mdcpu;
568
569 if (pmap == &kernel_pmap) {
71152ac6
MD
570 KKASSERT(va >= KvaStart && va < KvaEnd);
571 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 572 } else if (pmap->pm_pdir == gd->gd_PT2pdir) {
d5b116a0
MD
573 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
574 *gd->gd_PT2pde = pmap->pm_pdirpte;
575 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
576 atomic_set_int(&pmap->pm_cpucachemask, gd->mi.gd_cpumask);
577 }
71152ac6 578 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
579 }
580 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
581 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
582 gd->gd_PT2pdir = pmap->pm_pdir;
583 *gd->gd_PT2pde = pmap->pm_pdirpte;
584 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
71152ac6 585 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
586}
587
588/*
589 * Return a pointer to the page table entry for the specified va in the
590 * specified pmap. NULL is returned if there is no valid page table page
591 * for the VA.
592 */
593static __inline vpte_t *
594pmap_pte(struct pmap *pmap, vm_offset_t va)
595{
596 vpte_t *ptep;
597
71152ac6 598 ptep = &pmap->pm_pdir[va >> SEG_SHIFT];
e4a473f1
MD
599 if (*ptep & VPTE_PS)
600 return(ptep);
601 if (*ptep)
71152ac6 602 return (get_ptbase(pmap, va));
e4a473f1
MD
603 return(NULL);
604}
605
606
607/*
608 * Enter a mapping into kernel_pmap. Mappings created in this fashion
d5b116a0
MD
609 * are not managed. Mappings must be immediately accessible on all cpus.
610 *
611 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the
612 * real pmap and handle related races before storing the new vpte.
e4a473f1
MD
613 */
614void
615pmap_kenter(vm_offset_t va, vm_paddr_t pa)
616{
617 vpte_t *ptep;
618 vpte_t npte;
e4a473f1
MD
619
620 KKASSERT(va >= KvaStart && va < KvaEnd);
621 npte = (vpte_t)pa | VPTE_R | VPTE_W | VPTE_V;
71152ac6 622 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
623 if (*ptep & VPTE_V)
624 pmap_inval_pte(ptep, &kernel_pmap, va);
625 *ptep = npte;
e4a473f1
MD
626}
627
d5b116a0
MD
628/*
629 * Synchronize a kvm mapping originally made for the private use on
630 * some other cpu so it can be used on all cpus.
631 *
632 * XXX add MADV_RESYNC to improve performance.
633 */
6f7b98e0
MD
634void
635pmap_kenter_sync(vm_offset_t va)
636{
d5b116a0 637 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
6f7b98e0
MD
638}
639
d5b116a0
MD
640/*
641 * Synchronize a kvm mapping originally made for the private use on
642 * some other cpu so it can be used on our cpu. Turns out to be the
643 * same madvise() call, because we have to sync the real pmaps anyway.
644 *
645 * XXX add MADV_RESYNC to improve performance.
646 */
6f7b98e0
MD
647void
648pmap_kenter_sync_quick(vm_offset_t va)
649{
650 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
651}
652
d5b116a0 653#if 0
6f7b98e0 654/*
d5b116a0
MD
655 * Make a previously read-only kernel mapping R+W (not implemented by
656 * virtual kernels).
9ad680a3
MD
657 */
658void
659pmap_kmodify_rw(vm_offset_t va)
660{
661 *pmap_kpte(va) |= VPTE_R | VPTE_W;
662 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
663}
664
d5b116a0
MD
665/*
666 * Make a kernel mapping non-cacheable (not applicable to virtual kernels)
667 */
9ad680a3
MD
668void
669pmap_kmodify_nc(vm_offset_t va)
670{
9ad680a3
MD
671 *pmap_kpte(va) |= VPTE_N;
672 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
9ad680a3
MD
673}
674
d5b116a0
MD
675#endif
676
9ad680a3 677/*
6f7b98e0
MD
678 * Map a contiguous range of physical memory to a KVM
679 */
680vm_offset_t
681pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
682{
683 while (start < end) {
684 pmap_kenter(virt, start);
685 virt += PAGE_SIZE;
686 start += PAGE_SIZE;
687 }
688 return (virt);
689}
690
691vpte_t *
692pmap_kpte(vm_offset_t va)
693{
694 vpte_t *ptep;
695
696 KKASSERT(va >= KvaStart && va < KvaEnd);
71152ac6 697 ptep = KernelPTA + (va >> PAGE_SHIFT);
6f7b98e0
MD
698 return(ptep);
699}
700
e4a473f1 701/*
d5b116a0
MD
702 * Enter an unmanaged KVA mapping for the private use of the current
703 * cpu only. pmap_kenter_sync() may be called to make the mapping usable
704 * by other cpus.
705 *
706 * It is illegal for the mapping to be accessed by other cpus unleess
707 * pmap_kenter_sync*() is called.
e4a473f1
MD
708 */
709void
710pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
711{
712 vpte_t *ptep;
713 vpte_t npte;
714
715 KKASSERT(va >= KvaStart && va < KvaEnd);
716
717 npte = (vpte_t)pa | VPTE_R | VPTE_W | VPTE_V;
71152ac6 718 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
719 if (*ptep & VPTE_V)
720 pmap_inval_pte_quick(ptep, &kernel_pmap, va);
721 *ptep = npte;
e4a473f1
MD
722}
723
724/*
725 * Make a temporary mapping for a physical address. This is only intended
726 * to be used for panic dumps.
727 */
728void *
729pmap_kenter_temporary(vm_paddr_t pa, int i)
730{
731 pmap_kenter(crashdumpmap + (i * PAGE_SIZE), pa);
732 return ((void *)crashdumpmap);
733}
734
735/*
736 * Remove an unmanaged mapping created with pmap_kenter*().
737 */
738void
739pmap_kremove(vm_offset_t va)
740{
741 vpte_t *ptep;
e4a473f1
MD
742
743 KKASSERT(va >= KvaStart && va < KvaEnd);
744
71152ac6 745 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
746 if (*ptep & VPTE_V)
747 pmap_inval_pte(ptep, &kernel_pmap, va);
748 *ptep = 0;
e4a473f1
MD
749}
750
751/*
d5b116a0
MD
752 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize
753 * only with this cpu.
754 *
755 * Unfortunately because we optimize new entries by testing VPTE_V later
756 * on, we actually still have to synchronize with all the cpus. XXX maybe
757 * store a junk value and test against 0 in the other places instead?
e4a473f1
MD
758 */
759void
760pmap_kremove_quick(vm_offset_t va)
761{
762 vpte_t *ptep;
763
764 KKASSERT(va >= KvaStart && va < KvaEnd);
765
71152ac6 766 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
767 if (*ptep & VPTE_V)
768 pmap_inval_pte(ptep, &kernel_pmap, va); /* NOT _quick */
769 *ptep = 0;
e4a473f1
MD
770}
771
772/*
773 * Extract the physical address from the kernel_pmap that is associated
774 * with the specified virtual address.
775 */
776vm_paddr_t
777pmap_kextract(vm_offset_t va)
778{
779 vpte_t *ptep;
780 vm_paddr_t pa;
781
782 KKASSERT(va >= KvaStart && va < KvaEnd);
783
71152ac6 784 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1
MD
785 pa = (vm_paddr_t)(*ptep & VPTE_FRAME) | (va & PAGE_MASK);
786 return(pa);
787}
788
789/*
790 * Map a set of unmanaged VM pages into KVM.
791 */
792void
793pmap_qenter(vm_offset_t va, struct vm_page **m, int count)
794{
795 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
796 while (count) {
797 vpte_t *ptep;
798
71152ac6 799 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1 800 if (*ptep & VPTE_V)
d5b116a0 801 pmap_inval_pte(ptep, &kernel_pmap, va);
e4a473f1
MD
802 *ptep = (vpte_t)(*m)->phys_addr | VPTE_R | VPTE_W | VPTE_V;
803 --count;
804 ++m;
805 va += PAGE_SIZE;
806 }
e4a473f1
MD
807}
808
809/*
810 * Map a set of VM pages to kernel virtual memory. If a mapping changes
811 * clear the supplied mask. The caller handles any SMP interactions.
812 * The mask is used to provide the caller with hints on what SMP interactions
813 * might be needed.
814 */
815void
816pmap_qenter2(vm_offset_t va, struct vm_page **m, int count, cpumask_t *mask)
817{
818 cpumask_t cmask = mycpu->gd_cpumask;
819
820 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
821 while (count) {
822 vpte_t *ptep;
823 vpte_t npte;
824
71152ac6 825 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1
MD
826 npte = (vpte_t)(*m)->phys_addr | VPTE_R | VPTE_W | VPTE_V;
827 if (*ptep != npte) {
828 *mask = 0;
d5b116a0 829 pmap_inval_pte_quick(ptep, &kernel_pmap, va);
e4a473f1 830 *ptep = npte;
e4a473f1 831 } else if ((*mask & cmask) == 0) {
d5b116a0 832 pmap_kenter_sync_quick(va);
e4a473f1
MD
833 }
834 --count;
835 ++m;
836 va += PAGE_SIZE;
837 }
838 *mask |= cmask;
839}
840
841/*
842 * Undo the effects of pmap_qenter*().
843 */
844void
845pmap_qremove(vm_offset_t va, int count)
846{
847 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
848 while (count) {
849 vpte_t *ptep;
850
71152ac6 851 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1 852 if (*ptep & VPTE_V)
d5b116a0 853 pmap_inval_pte(ptep, &kernel_pmap, va);
e4a473f1
MD
854 *ptep = 0;
855 --count;
856 va += PAGE_SIZE;
857 }
e4a473f1
MD
858}
859
860/************************************************************************
861 * Misc support glue called by machine independant code *
862 ************************************************************************
863 *
864 * These routines are called by machine independant code to operate on
865 * certain machine-dependant aspects of processes, threads, and pmaps.
866 */
867
868/*
869 * Initialize MD portions of the thread structure.
870 */
871void
872pmap_init_thread(thread_t td)
873{
874 /* enforce pcb placement */
875 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1;
876 td->td_savefpu = &td->td_pcb->pcb_save;
877 td->td_sp = (char *)td->td_pcb - 16;
878}
879
880/*
13d13d89 881 * This routine directly affects the fork perf for a process.
e4a473f1
MD
882 */
883void
13d13d89 884pmap_init_proc(struct proc *p)
e4a473f1 885{
e4a473f1
MD
886}
887
888/*
889 * Destroy the UPAGES for a process that has exited and disassociate
890 * the process from its thread.
891 */
c6880072 892void
e4a473f1
MD
893pmap_dispose_proc(struct proc *p)
894{
e4a473f1 895 KASSERT(p->p_lock == 0, ("attempt to dispose referenced proc! %p", p));
e4a473f1
MD
896}
897
898/*
899 * We pre-allocate all page table pages for kernel virtual memory so
900 * this routine will only be called if KVM has been exhausted.
901 */
902void
00835518 903pmap_growkernel(vm_offset_t addr)
e4a473f1 904{
00835518
MD
905 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
906
907 if (addr > virtual_end - SEG_SIZE)
908 panic("KVM exhausted");
909 kernel_vm_end = addr;
e4a473f1
MD
910}
911
912/*
913 * The modification bit is not tracked for any pages in this range. XXX
914 * such pages in this maps should always use pmap_k*() functions and not
915 * be managed anyhow.
d6c96d4d
MD
916 *
917 * XXX User and kernel address spaces are independant for virtual kernels,
918 * this function only applies to the kernel pmap.
e4a473f1
MD
919 */
920static int
d6c96d4d 921pmap_track_modified(pmap_t pmap, vm_offset_t va)
e4a473f1 922{
d6c96d4d
MD
923 if (pmap != &kernel_pmap)
924 return 1;
e4a473f1
MD
925 if ((va < clean_sva) || (va >= clean_eva))
926 return 1;
927 else
928 return 0;
929}
930
931/************************************************************************
932 * Procedures supporting managed page table pages *
933 ************************************************************************
934 *
935 * These procedures are used to track managed page table pages. These pages
936 * use the page table page's vm_page_t to track PTEs in the page. The
937 * page table pages themselves are arranged in a VM object, pmap->pm_pteobj.
938 *
939 * This allows the system to throw away page table pages for user processes
940 * at will and reinstantiate them on demand.
941 */
942
943/*
944 * This routine works like vm_page_lookup() but also blocks as long as the
945 * page is busy. This routine does not busy the page it returns.
946 *
947 * Unless the caller is managing objects whos pages are in a known state,
948 * the call should be made with a critical section held so the page's object
949 * association remains valid on return.
950 */
951static vm_page_t
952pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
953{
954 vm_page_t m;
955
956retry:
957 m = vm_page_lookup(object, pindex);
958 if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
959 goto retry;
960 return(m);
961}
962
963/*
964 * This routine unholds page table pages, and if the hold count
965 * drops to zero, then it decrements the wire count.
eec2b734
MD
966 *
967 * We must recheck that this is the last hold reference after busy-sleeping
968 * on the page.
e4a473f1
MD
969 */
970static int
d5b116a0 971_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
e4a473f1 972{
e4a473f1
MD
973 while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
974 ;
eec2b734
MD
975 KASSERT(m->queue == PQ_NONE,
976 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m));
e4a473f1 977
eec2b734 978 if (m->hold_count == 1) {
e4a473f1 979 /*
d5b116a0 980 * Unmap the page table page.
e4a473f1 981 */
eec2b734
MD
982 vm_page_busy(m);
983 KKASSERT(pmap->pm_pdir[m->pindex] != 0);
d5b116a0
MD
984 pmap_inval_pde(&pmap->pm_pdir[m->pindex], pmap,
985 (vm_offset_t)m->pindex << SEG_SHIFT);
eec2b734 986 KKASSERT(pmap->pm_stats.resident_count > 0);
e4a473f1
MD
987 --pmap->pm_stats.resident_count;
988
989 if (pmap->pm_ptphint == m)
990 pmap->pm_ptphint = NULL;
991
992 /*
eec2b734
MD
993 * This was our last hold, the page had better be unwired
994 * after we decrement wire_count.
995 *
996 * FUTURE NOTE: shared page directory page could result in
997 * multiple wire counts.
e4a473f1 998 */
eec2b734 999 vm_page_unhold(m);
e4a473f1 1000 --m->wire_count;
eec2b734
MD
1001 KKASSERT(m->wire_count == 0);
1002 --vmstats.v_wire_count;
17cde63e 1003 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
eec2b734
MD
1004 vm_page_flash(m);
1005 vm_page_free_zero(m);
e4a473f1
MD
1006 return 1;
1007 }
17cde63e 1008 KKASSERT(m->hold_count > 1);
eec2b734 1009 vm_page_unhold(m);
e4a473f1
MD
1010 return 0;
1011}
1012
1013static __inline int
d5b116a0 1014pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
e4a473f1 1015{
eec2b734
MD
1016 KKASSERT(m->hold_count > 0);
1017 if (m->hold_count > 1) {
1018 vm_page_unhold(m);
e4a473f1 1019 return 0;
eec2b734
MD
1020 } else {
1021 return _pmap_unwire_pte_hold(pmap, m);
1022 }
e4a473f1
MD
1023}
1024
1025/*
1026 * After removing a page table entry, this routine is used to
1027 * conditionally free the page, and manage the hold/wire counts.
1028 */
1029static int
d5b116a0 1030pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
e4a473f1
MD
1031{
1032 unsigned ptepindex;
1033
1034 if (mpte == NULL) {
1035 /*
1036 * page table pages in the kernel_pmap are not managed.
1037 */
1038 if (pmap == &kernel_pmap)
1039 return(0);
1040 ptepindex = (va >> PDRSHIFT);
1041 if (pmap->pm_ptphint &&
1042 (pmap->pm_ptphint->pindex == ptepindex)) {
1043 mpte = pmap->pm_ptphint;
1044 } else {
e4a473f1
MD
1045 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1046 pmap->pm_ptphint = mpte;
1047 }
1048 }
d5b116a0 1049 return pmap_unwire_pte_hold(pmap, mpte);
e4a473f1
MD
1050}
1051
1052/*
eec2b734
MD
1053 * Attempt to release and free the vm_page backing a page directory page
1054 * in a pmap. Returns 1 on success, 0 on failure (if the procedure had
1055 * to sleep).
e4a473f1
MD
1056 */
1057static int
1058pmap_release_free_page(struct pmap *pmap, vm_page_t p)
1059{
1060 vpte_t *pde = pmap->pm_pdir;
eec2b734 1061
e4a473f1
MD
1062 /*
1063 * This code optimizes the case of freeing non-busy
1064 * page-table pages. Those pages are zero now, and
1065 * might as well be placed directly into the zero queue.
1066 */
1067 if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
1068 return 0;
1069
1070 vm_page_busy(p);
eec2b734
MD
1071 KKASSERT(pmap->pm_stats.resident_count > 0);
1072 --pmap->pm_stats.resident_count;
e4a473f1
MD
1073
1074 if (p->hold_count) {
1075 panic("pmap_release: freeing held page table page");
1076 }
1077 /*
1078 * Page directory pages need to have the kernel stuff cleared, so
1079 * they can go into the zero queue also.
1080 *
1081 * In virtual kernels there is no 'kernel stuff'. For the moment
1082 * I just make sure the whole thing has been zero'd even though
1083 * it should already be completely zero'd.
d6c96d4d
MD
1084 *
1085 * pmaps for vkernels do not self-map because they do not share
1086 * their address space with the vkernel. Clearing of pde[] thus
1087 * only applies to page table pages and not to the page directory
1088 * page.
e4a473f1
MD
1089 */
1090 if (p->pindex == pmap->pm_pdindex) {
1091 bzero(pde, VPTE_PAGETABLE_SIZE);
1092 pmap_kremove((vm_offset_t)pmap->pm_pdir);
d6c96d4d 1093 } else {
eec2b734 1094 KKASSERT(pde[p->pindex] != 0);
d5b116a0
MD
1095 pmap_inval_pde(&pde[p->pindex], pmap,
1096 (vm_offset_t)p->pindex << SEG_SHIFT);
e4a473f1
MD
1097 }
1098
1099 /*
1100 * Clear the matching hint
1101 */
1102 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1103 pmap->pm_ptphint = NULL;
1104
1105 /*
1106 * And throw the page away. The page is completely zero'd out so
1107 * optimize the free call.
1108 */
1109 p->wire_count--;
1110 vmstats.v_wire_count--;
1111 vm_page_free_zero(p);
1112 return 1;
1113}
1114
1115/*
1116 * This routine is called if the page table page is not mapped in the page
1117 * table directory.
1118 *
1119 * The routine is broken up into two parts for readability.
eec2b734
MD
1120 *
1121 * It must return a held mpte and map the page directory page as required.
1122 * Because vm_page_grab() can block, we must re-check pm_pdir[ptepindex]
e4a473f1
MD
1123 */
1124static vm_page_t
1125_pmap_allocpte(pmap_t pmap, unsigned ptepindex)
1126{
1127 vm_paddr_t ptepa;
1128 vm_page_t m;
1129
1130 /*
eec2b734
MD
1131 * Find or fabricate a new pagetable page. A busied page will be
1132 * returned. This call may block.
e4a473f1
MD
1133 */
1134 m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1135 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
1136
1137 KASSERT(m->queue == PQ_NONE,
1138 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1139
e4a473f1 1140 /*
eec2b734
MD
1141 * Increment the hold count for the page we will be returning to
1142 * the caller.
e4a473f1
MD
1143 */
1144 m->hold_count++;
1145
1146 /*
eec2b734
MD
1147 * It is possible that someone else got in and mapped by the page
1148 * directory page while we were blocked, if so just unbusy and
1149 * return the held page.
1150 */
1151 if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) {
1152 Debugger("PTEPA RACE");
1153 KKASSERT((ptepa & VPTE_FRAME) == VM_PAGE_TO_PHYS(m));
1154 vm_page_wakeup(m);
1155 return(m);
1156 }
1157
1158 if (m->wire_count == 0)
1159 vmstats.v_wire_count++;
1160 m->wire_count++;
1161
1162 /*
e4a473f1
MD
1163 * Map the pagetable page into the process address space, if
1164 * it isn't already there.
1165 */
eec2b734 1166 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1167
1168 ptepa = VM_PAGE_TO_PHYS(m);
1169 pmap->pm_pdir[ptepindex] = (vpte_t)ptepa | VPTE_R | VPTE_W | VPTE_V |
1170 VPTE_A | VPTE_M;
1171
1172 /*
1173 * We are likely about to access this page table page, so set the
1174 * page table hint to reduce overhead.
1175 */
1176 pmap->pm_ptphint = m;
1177
1178 /*
1179 * Try to use the new mapping, but if we cannot, then
1180 * do it with the routine that maps the page explicitly.
1181 */
1182 if ((m->flags & PG_ZERO) == 0)
1183 pmap_zero_page(ptepa);
1184
1185 m->valid = VM_PAGE_BITS_ALL;
1186 vm_page_flag_clear(m, PG_ZERO);
1187 vm_page_flag_set(m, PG_MAPPED);
1188 vm_page_wakeup(m);
1189
1190 return (m);
1191}
1192
1193/*
1194 * Determine the page table page required to access the VA in the pmap
1195 * and allocate it if necessary. Return a held vm_page_t for the page.
1196 *
1197 * Only used with user pmaps.
1198 */
1199static vm_page_t
1200pmap_allocpte(pmap_t pmap, vm_offset_t va)
1201{
1202 unsigned ptepindex;
1203 vm_offset_t ptepa;
1204 vm_page_t m;
1205
1206 /*
1207 * Calculate pagetable page index
1208 */
1209 ptepindex = va >> PDRSHIFT;
1210
1211 /*
1212 * Get the page directory entry
1213 */
1214 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1215
1216 /*
1217 * This supports switching from a 4MB page to a
1218 * normal 4K page.
1219 */
1220 if (ptepa & VPTE_PS) {
eec2b734 1221 KKASSERT(pmap->pm_pdir[ptepindex] != 0);
d5b116a0
MD
1222 pmap_inval_pde(&pmap->pm_pdir[ptepindex], pmap,
1223 (vm_offset_t)ptepindex << SEG_SHIFT);
e4a473f1 1224 ptepa = 0;
e4a473f1
MD
1225 }
1226
1227 /*
1228 * If the page table page is mapped, we just increment the
1229 * hold count, and activate it.
1230 */
1231 if (ptepa) {
1232 /*
1233 * In order to get the page table page, try the
1234 * hint first.
1235 */
1236 if (pmap->pm_ptphint &&
1237 (pmap->pm_ptphint->pindex == ptepindex)) {
1238 m = pmap->pm_ptphint;
1239 } else {
1240 m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1241 pmap->pm_ptphint = m;
1242 }
1243 m->hold_count++;
1244 return m;
1245 }
1246 /*
1247 * Here if the pte page isn't mapped, or if it has been deallocated.
1248 */
1249 return _pmap_allocpte(pmap, ptepindex);
1250}
1251
1252/************************************************************************
1253 * Managed pages in pmaps *
1254 ************************************************************************
1255 *
1256 * All pages entered into user pmaps and some pages entered into the kernel
1257 * pmap are managed, meaning that pmap_protect() and other related management
1258 * functions work on these pages.
1259 */
1260
1261/*
1262 * free the pv_entry back to the free list. This function may be
1263 * called from an interrupt.
1264 */
1265static __inline void
1266free_pv_entry(pv_entry_t pv)
1267{
1268 pv_entry_count--;
1269 zfree(&pvzone, pv);
1270}
1271
1272/*
1273 * get a new pv_entry, allocating a block from the system
1274 * when needed. This function may be called from an interrupt.
1275 */
1276static pv_entry_t
1277get_pv_entry(void)
1278{
1279 pv_entry_count++;
1280 if (pv_entry_high_water &&
20479584
MD
1281 (pv_entry_count > pv_entry_high_water) &&
1282 (pmap_pagedaemon_waken == 0)) {
e4a473f1
MD
1283 pmap_pagedaemon_waken = 1;
1284 wakeup (&vm_pages_needed);
1285 }
1286 return zalloc(&pvzone);
1287}
1288
1289/*
1290 * This routine is very drastic, but can save the system
1291 * in a pinch.
1292 */
1293void
1294pmap_collect(void)
1295{
1296 int i;
1297 vm_page_t m;
1298 static int warningdone=0;
1299
1300 if (pmap_pagedaemon_waken == 0)
1301 return;
20479584 1302 pmap_pagedaemon_waken = 0;
e4a473f1
MD
1303
1304 if (warningdone < 5) {
1305 kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1306 warningdone++;
1307 }
1308
1309 for(i = 0; i < vm_page_array_size; i++) {
1310 m = &vm_page_array[i];
1311 if (m->wire_count || m->hold_count || m->busy ||
1312 (m->flags & PG_BUSY))
1313 continue;
1314 pmap_remove_all(m);
1315 }
e4a473f1
MD
1316}
1317
1318/*
1319 * If it is the first entry on the list, it is actually
1320 * in the header and we must copy the following entry up
1321 * to the header. Otherwise we must search the list for
1322 * the entry. In either case we free the now unused entry.
1323 */
1324static int
d5b116a0 1325pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
e4a473f1
MD
1326{
1327 pv_entry_t pv;
1328 int rtval;
1329
1330 crit_enter();
1331 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1332 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1333 if (pmap == pv->pv_pmap && va == pv->pv_va)
1334 break;
1335 }
1336 } else {
1337 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1338 if (va == pv->pv_va)
1339 break;
1340 }
1341 }
1342
1343 /*
1344 * Note that pv_ptem is NULL if the page table page itself is not
1345 * managed, even if the page being removed IS managed.
1346 */
1347 rtval = 0;
1348 if (pv) {
e4a473f1
MD
1349 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1350 m->md.pv_list_count--;
e4a473f1 1351 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
17cde63e
MD
1352 if (TAILQ_EMPTY(&m->md.pv_list))
1353 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
8790d7d8 1354 ++pmap->pm_generation;
d5b116a0 1355 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
e4a473f1
MD
1356 free_pv_entry(pv);
1357 }
1358 crit_exit();
1359 return rtval;
1360}
1361
1362/*
1363 * Create a pv entry for page at pa for (pmap, va). If the page table page
1364 * holding the VA is managed, mpte will be non-NULL.
1365 */
1366static void
1367pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
1368{
1369 pv_entry_t pv;
1370
1371 crit_enter();
1372 pv = get_pv_entry();
1373 pv->pv_va = va;
1374 pv->pv_pmap = pmap;
1375 pv->pv_ptem = mpte;
1376
1377 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1378 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1379 m->md.pv_list_count++;
1380
1381 crit_exit();
1382}
1383
1384/*
1385 * pmap_remove_pte: do the things to unmap a page in a process
1386 */
1387static int
d5b116a0 1388pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va)
e4a473f1
MD
1389{
1390 vpte_t oldpte;
1391 vm_page_t m;
1392
d5b116a0 1393 oldpte = pmap_inval_loadandclear(ptq, pmap, va);
e7f2d7de
MD
1394 if (oldpte & VPTE_WIRED)
1395 --pmap->pm_stats.wired_count;
1396 KKASSERT(pmap->pm_stats.wired_count >= 0);
d6c96d4d
MD
1397
1398#if 0
e4a473f1
MD
1399 /*
1400 * Machines that don't support invlpg, also don't support
1401 * VPTE_G. XXX VPTE_G is disabled for SMP so don't worry about
1402 * the SMP case.
1403 */
1404 if (oldpte & VPTE_G)
6f7b98e0 1405 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
d6c96d4d 1406#endif
eec2b734
MD
1407 KKASSERT(pmap->pm_stats.resident_count > 0);
1408 --pmap->pm_stats.resident_count;
e7f2d7de 1409 if (oldpte & VPTE_MANAGED) {
e4a473f1
MD
1410 m = PHYS_TO_VM_PAGE(oldpte);
1411 if (oldpte & VPTE_M) {
1412#if defined(PMAP_DIAGNOSTIC)
1413 if (pmap_nw_modified((pt_entry_t) oldpte)) {
1414 kprintf(
1415 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1416 va, oldpte);
1417 }
1418#endif
d6c96d4d 1419 if (pmap_track_modified(pmap, va))
e4a473f1
MD
1420 vm_page_dirty(m);
1421 }
1422 if (oldpte & VPTE_A)
1423 vm_page_flag_set(m, PG_REFERENCED);
d5b116a0 1424 return pmap_remove_entry(pmap, m, va);
e4a473f1 1425 } else {
d5b116a0 1426 return pmap_unuse_pt(pmap, va, NULL);
e4a473f1
MD
1427 }
1428
1429 return 0;
1430}
1431
1432/*
1433 * pmap_remove_page:
1434 *
1435 * Remove a single page from a process address space.
1436 *
1437 * This function may not be called from an interrupt if the pmap is
1438 * not kernel_pmap.
1439 */
1440static void
d5b116a0 1441pmap_remove_page(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
1442{
1443 vpte_t *ptq;
1444
1445 /*
1446 * if there is no pte for this address, just skip it!!! Otherwise
1447 * get a local va for mappings for this pmap and remove the entry.
1448 */
1449 if (*pmap_pde(pmap, va) != 0) {
71152ac6 1450 ptq = get_ptbase(pmap, va);
e4a473f1 1451 if (*ptq) {
d5b116a0 1452 pmap_remove_pte(pmap, ptq, va);
e4a473f1
MD
1453 }
1454 }
1455}
1456
1457/*
1458 * pmap_remove:
1459 *
1460 * Remove the given range of addresses from the specified map.
1461 *
1462 * It is assumed that the start and end are properly
1463 * rounded to the page size.
1464 *
1465 * This function may not be called from an interrupt if the pmap is
1466 * not kernel_pmap.
1467 */
1468void
1469pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1470{
1471 vpte_t *ptbase;
1472 vm_offset_t pdnxt;
1473 vm_offset_t ptpaddr;
71152ac6 1474 vm_pindex_t sindex, eindex;
e4a473f1
MD
1475
1476 if (pmap == NULL)
1477 return;
1478
d6c96d4d 1479 KKASSERT(pmap->pm_stats.resident_count >= 0);
e4a473f1
MD
1480 if (pmap->pm_stats.resident_count == 0)
1481 return;
1482
e4a473f1
MD
1483 /*
1484 * special handling of removing one page. a very
1485 * common operation and easy to short circuit some
1486 * code.
1487 */
1488 if (((sva + PAGE_SIZE) == eva) &&
1489 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & VPTE_PS) == 0)) {
d5b116a0 1490 pmap_remove_page(pmap, sva);
e4a473f1
MD
1491 return;
1492 }
1493
1494 /*
1495 * Get a local virtual address for the mappings that are being
1496 * worked with.
71152ac6
MD
1497 *
1498 * XXX this is really messy because the kernel pmap is not relative
1499 * to address 0
e4a473f1 1500 */
e4a473f1
MD
1501 sindex = (sva >> PAGE_SHIFT);
1502 eindex = (eva >> PAGE_SHIFT);
1503
1504 for (; sindex < eindex; sindex = pdnxt) {
1505 vpte_t pdirindex;
1506
1507 /*
1508 * Calculate index for next page table.
1509 */
1510 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1511 if (pmap->pm_stats.resident_count == 0)
1512 break;
1513
1514 pdirindex = sindex / NPDEPG;
1515 if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) {
eec2b734 1516 KKASSERT(pmap->pm_pdir[pdirindex] != 0);
e4a473f1 1517 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
d5b116a0
MD
1518 pmap_inval_pde(&pmap->pm_pdir[pdirindex], pmap,
1519 (vm_offset_t)pdirindex << SEG_SHIFT);
e4a473f1
MD
1520 continue;
1521 }
1522
1523 /*
1524 * Weed out invalid mappings. Note: we assume that the page
1525 * directory table is always allocated, and in kernel virtual.
1526 */
1527 if (ptpaddr == 0)
1528 continue;
1529
1530 /*
1531 * Limit our scan to either the end of the va represented
1532 * by the current page table page, or to the end of the
1533 * range being removed.
1534 */
e7f2d7de 1535 if (pdnxt > eindex)
e4a473f1 1536 pdnxt = eindex;
e4a473f1 1537
8790d7d8
MD
1538 /*
1539 * NOTE: pmap_remove_pte() can block.
1540 */
e4a473f1
MD
1541 for (; sindex != pdnxt; sindex++) {
1542 vm_offset_t va;
8790d7d8
MD
1543
1544 ptbase = get_ptbase(pmap, sindex << PAGE_SHIFT);
1545 if (*ptbase == 0)
e4a473f1
MD
1546 continue;
1547 va = i386_ptob(sindex);
d5b116a0 1548 if (pmap_remove_pte(pmap, ptbase, va))
e4a473f1
MD
1549 break;
1550 }
1551 }
e4a473f1
MD
1552}
1553
1554/*
1555 * pmap_remove_all:
1556 *
1557 * Removes this physical page from all physical maps in which it resides.
1558 * Reflects back modify bits to the pager.
1559 *
1560 * This routine may not be called from an interrupt.
1561 */
1562static void
1563pmap_remove_all(vm_page_t m)
1564{
e4a473f1
MD
1565 vpte_t *pte, tpte;
1566 pv_entry_t pv;
1567
1568#if defined(PMAP_DIAGNOSTIC)
1569 /*
1570 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1571 * pages!
1572 */
1573 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1574 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m));
1575 }
1576#endif
1577
e4a473f1
MD
1578 crit_enter();
1579 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
eec2b734
MD
1580 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
1581 --pv->pv_pmap->pm_stats.resident_count;
e4a473f1
MD
1582
1583 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
e7f2d7de
MD
1584 KKASSERT(pte != NULL);
1585
d5b116a0 1586 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va);
e7f2d7de
MD
1587 if (tpte & VPTE_WIRED)
1588 --pv->pv_pmap->pm_stats.wired_count;
1589 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0);
e4a473f1
MD
1590
1591 if (tpte & VPTE_A)
1592 vm_page_flag_set(m, PG_REFERENCED);
1593
1594 /*
1595 * Update the vm_page_t clean and reference bits.
1596 */
1597 if (tpte & VPTE_M) {
1598#if defined(PMAP_DIAGNOSTIC)
1599 if (pmap_nw_modified((pt_entry_t) tpte)) {
1600 kprintf(
1601 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1602 pv->pv_va, tpte);
1603 }
1604#endif
d6c96d4d 1605 if (pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
1606 vm_page_dirty(m);
1607 }
e4a473f1 1608 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
8790d7d8
MD
1609 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1610 ++pv->pv_pmap->pm_generation;
e4a473f1 1611 m->md.pv_list_count--;
17cde63e
MD
1612 if (TAILQ_EMPTY(&m->md.pv_list))
1613 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
d5b116a0 1614 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
e4a473f1
MD
1615 free_pv_entry(pv);
1616 }
17cde63e 1617 KKASSERT((m->flags & (PG_MAPPED | PG_WRITEABLE)) == 0);
e4a473f1 1618 crit_exit();
e4a473f1
MD
1619}
1620
1621/*
1622 * pmap_protect:
1623 *
1624 * Set the physical protection on the specified range of this map
1625 * as requested.
1626 *
1627 * This function may not be called from an interrupt if the map is
1628 * not the kernel_pmap.
1629 */
1630void
1631pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1632{
1633 vpte_t *ptbase;
d5b116a0 1634 vpte_t *ptep;
e4a473f1
MD
1635 vm_offset_t pdnxt, ptpaddr;
1636 vm_pindex_t sindex, eindex;
71152ac6 1637 vm_pindex_t sbase;
e4a473f1
MD
1638
1639 if (pmap == NULL)
1640 return;
1641
1642 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1643 pmap_remove(pmap, sva, eva);
1644 return;
1645 }
1646
1647 if (prot & VM_PROT_WRITE)
1648 return;
1649
71152ac6 1650 ptbase = get_ptbase(pmap, sva);
e4a473f1
MD
1651
1652 sindex = (sva >> PAGE_SHIFT);
1653 eindex = (eva >> PAGE_SHIFT);
71152ac6 1654 sbase = sindex;
e4a473f1
MD
1655
1656 for (; sindex < eindex; sindex = pdnxt) {
1657
1658 unsigned pdirindex;
1659
1660 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1661
1662 pdirindex = sindex / NPDEPG;
d5b116a0
MD
1663
1664 /*
1665 * Clear the modified and writable bits for a 4m page.
1666 * Throw away the modified bit (?)
1667 */
e4a473f1 1668 if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) {
d5b116a0
MD
1669 pmap_clean_pde(&pmap->pm_pdir[pdirindex], pmap,
1670 (vm_offset_t)pdirindex << SEG_SHIFT);
e4a473f1
MD
1671 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1672 continue;
1673 }
1674
1675 /*
1676 * Weed out invalid mappings. Note: we assume that the page
1677 * directory table is always allocated, and in kernel virtual.
1678 */
1679 if (ptpaddr == 0)
1680 continue;
1681
1682 if (pdnxt > eindex) {
1683 pdnxt = eindex;
1684 }
1685
1686 for (; sindex != pdnxt; sindex++) {
d6c96d4d 1687 vpte_t pbits;
e4a473f1
MD
1688 vm_page_t m;
1689
d5b116a0
MD
1690 /*
1691 * Clean managed pages and also check the accessed
1692 * bit. Just remove write perms for unmanaged
1693 * pages. Be careful of races, turning off write
1694 * access will force a fault rather then setting
1695 * the modified bit at an unexpected time.
1696 */
1697 ptep = &ptbase[sindex - sbase];
1698 if (*ptep & VPTE_MANAGED) {
1699 pbits = pmap_clean_pte(ptep, pmap,
1700 i386_ptob(sindex));
e4a473f1
MD
1701 m = NULL;
1702 if (pbits & VPTE_A) {
1703 m = PHYS_TO_VM_PAGE(pbits);
1704 vm_page_flag_set(m, PG_REFERENCED);
d5b116a0 1705 atomic_clear_int(ptep, VPTE_A);
e4a473f1
MD
1706 }
1707 if (pbits & VPTE_M) {
d6c96d4d 1708 if (pmap_track_modified(pmap, i386_ptob(sindex))) {
e4a473f1
MD
1709 if (m == NULL)
1710 m = PHYS_TO_VM_PAGE(pbits);
1711 vm_page_dirty(m);
e4a473f1
MD
1712 }
1713 }
d5b116a0
MD
1714 } else {
1715 pbits = pmap_setro_pte(ptep, pmap,
1716 i386_ptob(sindex));
e4a473f1
MD
1717 }
1718 }
1719 }
e4a473f1
MD
1720}
1721
1722/*
1723 * Enter a managed page into a pmap. If the page is not wired related pmap
1724 * data can be destroyed at any time for later demand-operation.
1725 *
1726 * Insert the vm_page (m) at virtual address (v) in (pmap), with the
1727 * specified protection, and wire the mapping if requested.
1728 *
1729 * NOTE: This routine may not lazy-evaluate or lose information. The
1730 * page must actually be inserted into the given map NOW.
1731 *
1732 * NOTE: When entering a page at a KVA address, the pmap must be the
1733 * kernel_pmap.
1734 */
1735void
1736pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1737 boolean_t wired)
1738{
1739 vm_paddr_t pa;
1740 vpte_t *pte;
1741 vm_paddr_t opa;
1742 vm_offset_t origpte, newpte;
1743 vm_page_t mpte;
e4a473f1
MD
1744
1745 if (pmap == NULL)
1746 return;
1747
1748 va &= VPTE_FRAME;
1749
1750 /*
1751 * Get the page table page. The kernel_pmap's page table pages
1752 * are preallocated and have no associated vm_page_t.
1753 */
1754 if (pmap == &kernel_pmap)
1755 mpte = NULL;
1756 else
1757 mpte = pmap_allocpte(pmap, va);
1758
e4a473f1
MD
1759 pte = pmap_pte(pmap, va);
1760
1761 /*
1762 * Page Directory table entry not valid, we need a new PT page
1763 * and pmap_allocpte() didn't give us one. Oops!
1764 */
1765 if (pte == NULL) {
1766 panic("pmap_enter: invalid page directory pmap=%p, va=0x%p\n",
1767 pmap, (void *)va);
1768 }
1769
d5b116a0
MD
1770 /*
1771 * Deal with races on the original mapping (though don't worry
1772 * about VPTE_A races) by cleaning it. This will force a fault
1773 * if an attempt is made to write to the page.
1774 */
e4a473f1 1775 pa = VM_PAGE_TO_PHYS(m) & VPTE_FRAME;
d5b116a0 1776 origpte = pmap_clean_pte(pte, pmap, va);
e4a473f1
MD
1777 opa = origpte & VPTE_FRAME;
1778
1779 if (origpte & VPTE_PS)
1780 panic("pmap_enter: attempted pmap_enter on 4MB page");
1781
1782 /*
1783 * Mapping has not changed, must be protection or wiring change.
1784 */
1785 if (origpte && (opa == pa)) {
1786 /*
1787 * Wiring change, just update stats. We don't worry about
1788 * wiring PT pages as they remain resident as long as there
1789 * are valid mappings in them. Hence, if a user page is wired,
1790 * the PT page will be also.
1791 */
e7f2d7de
MD
1792 if (wired && ((origpte & VPTE_WIRED) == 0))
1793 ++pmap->pm_stats.wired_count;
1794 else if (!wired && (origpte & VPTE_WIRED))
1795 --pmap->pm_stats.wired_count;
1796 KKASSERT(pmap->pm_stats.wired_count >= 0);
e4a473f1 1797
e4a473f1
MD
1798 /*
1799 * Remove the extra pte reference. Note that we cannot
1800 * optimize the RO->RW case because we have adjusted the
1801 * wiring count above and may need to adjust the wiring
1802 * bits below.
1803 */
1804 if (mpte)
1805 mpte->hold_count--;
1806
1807 /*
1808 * We might be turning off write access to the page,
1809 * so we go ahead and sense modify status.
1810 */
e7f2d7de 1811 if (origpte & VPTE_MANAGED) {
d5b116a0
MD
1812 if ((origpte & VPTE_M) &&
1813 pmap_track_modified(pmap, va)) {
e4a473f1
MD
1814 vm_page_t om;
1815 om = PHYS_TO_VM_PAGE(opa);
1816 vm_page_dirty(om);
1817 }
e7f2d7de 1818 pa |= VPTE_MANAGED;
17cde63e 1819 KKASSERT(m->flags & PG_MAPPED);
e4a473f1
MD
1820 }
1821 goto validate;
1822 }
1823 /*
1824 * Mapping has changed, invalidate old range and fall through to
1825 * handle validating new mapping.
1826 */
1827 if (opa) {
1828 int err;
d5b116a0 1829 err = pmap_remove_pte(pmap, pte, va);
e4a473f1
MD
1830 if (err)
1831 panic("pmap_enter: pte vanished, va: 0x%x", va);
1832 }
1833
1834 /*
1835 * Enter on the PV list if part of our managed memory. Note that we
1836 * raise IPL while manipulating pv_table since pmap_enter can be
1837 * called at interrupt time.
1838 */
1839 if (pmap_initialized &&
1840 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1841 pmap_insert_entry(pmap, va, mpte, m);
e7f2d7de 1842 pa |= VPTE_MANAGED;
17cde63e 1843 vm_page_flag_set(m, PG_MAPPED);
e4a473f1
MD
1844 }
1845
1846 /*
1847 * Increment counters
1848 */
eec2b734 1849 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1850 if (wired)
1851 pmap->pm_stats.wired_count++;
1852
1853validate:
1854 /*
1855 * Now validate mapping with desired protection/wiring.
1856 */
1857 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | VPTE_V);
1858
1859 if (wired)
e7f2d7de 1860 newpte |= VPTE_WIRED;
17cde63e
MD
1861 if (pmap != &kernel_pmap)
1862 newpte |= VPTE_U;
e4a473f1
MD
1863
1864 /*
d5b116a0
MD
1865 * If the mapping or permission bits are different from the
1866 * (now cleaned) original pte, an update is needed. We've
1867 * already downgraded or invalidated the page so all we have
1868 * to do now is update the bits.
1869 *
1870 * XXX should we synchronize RO->RW changes to avoid another
1871 * fault?
e4a473f1 1872 */
d5b116a0 1873 if ((origpte & ~(VPTE_W|VPTE_M|VPTE_A)) != newpte) {
e4a473f1 1874 *pte = newpte | VPTE_A;
17cde63e
MD
1875 if (newpte & VPTE_W)
1876 vm_page_flag_set(m, PG_WRITEABLE);
e4a473f1 1877 }
17cde63e 1878 KKASSERT((newpte & VPTE_MANAGED) == 0 || m->flags & PG_MAPPED);
e4a473f1
MD
1879}
1880
1881/*
17cde63e 1882 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
e4a473f1 1883 *
17cde63e 1884 * Currently this routine may only be used on user pmaps, not kernel_pmap.
e4a473f1 1885 */
17cde63e
MD
1886static void
1887pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
e4a473f1
MD
1888{
1889 vpte_t *pte;
1890 vm_paddr_t pa;
17cde63e 1891 vm_page_t mpte;
135d7199
MD
1892 unsigned ptepindex;
1893 vm_offset_t ptepa;
e4a473f1
MD
1894
1895 KKASSERT(pmap != &kernel_pmap);
e4a473f1
MD
1896
1897 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS);
1898
1899 /*
17cde63e
MD
1900 * Calculate pagetable page (mpte), allocating it if necessary.
1901 *
1902 * A held page table page (mpte), or NULL, is passed onto the
1903 * section following.
e4a473f1
MD
1904 */
1905 ptepindex = va >> PDRSHIFT;
17cde63e
MD
1906
1907 do {
e4a473f1
MD
1908 /*
1909 * Get the page directory entry
1910 */
1911 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1912
1913 /*
1914 * If the page table page is mapped, we just increment
1915 * the hold count, and activate it.
1916 */
1917 if (ptepa) {
1918 if (ptepa & VPTE_PS)
1919 panic("pmap_enter_quick: unexpected mapping into 4MB page");
1920 if (pmap->pm_ptphint &&
1921 (pmap->pm_ptphint->pindex == ptepindex)) {
1922 mpte = pmap->pm_ptphint;
1923 } else {
1924 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1925 pmap->pm_ptphint = mpte;
1926 }
17cde63e
MD
1927 if (mpte)
1928 mpte->hold_count++;
e4a473f1
MD
1929 } else {
1930 mpte = _pmap_allocpte(pmap, ptepindex);
1931 }
17cde63e 1932 } while (mpte == NULL);
e4a473f1
MD
1933
1934 /*
1935 * Ok, now that the page table page has been validated, get the pte.
1936 * If the pte is already mapped undo mpte's hold_count and
1937 * just return.
1938 */
1939 pte = pmap_pte(pmap, va);
1940 if (*pte) {
17cde63e
MD
1941 pmap_unwire_pte_hold(pmap, mpte);
1942 return;
e4a473f1
MD
1943 }
1944
1945 /*
1946 * Enter on the PV list if part of our managed memory. Note that we
1947 * raise IPL while manipulating pv_table since pmap_enter can be
1948 * called at interrupt time.
1949 */
17cde63e 1950 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
e4a473f1 1951 pmap_insert_entry(pmap, va, mpte, m);
17cde63e
MD
1952 vm_page_flag_set(m, PG_MAPPED);
1953 }
e4a473f1
MD
1954
1955 /*
1956 * Increment counters
1957 */
eec2b734 1958 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1959
1960 pa = VM_PAGE_TO_PHYS(m);
1961
1962 /*
1963 * Now validate mapping with RO protection
1964 */
1965 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
d5b116a0 1966 *pte = (vpte_t)pa | VPTE_V | VPTE_U;
e4a473f1 1967 else
d5b116a0 1968 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED;
17cde63e
MD
1969 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */
1970 /*pmap_inval_flush(&info); don't need for vkernel */
e4a473f1
MD
1971}
1972
e7f2d7de
MD
1973/*
1974 * Extract the physical address for the translation at the specified
1975 * virtual address in the pmap.
1976 */
6f7b98e0
MD
1977vm_paddr_t
1978pmap_extract(pmap_t pmap, vm_offset_t va)
1979{
1980 vm_paddr_t rtval;
1981 vpte_t pte;
1982
1983 if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) {
1984 if (pte & VPTE_PS) {
1985 rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1);
1986 rtval |= va & SEG_MASK;
1987 } else {
71152ac6 1988 pte = *get_ptbase(pmap, va);
6f7b98e0
MD
1989 rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK);
1990 }
1991 return(rtval);
1992 }
1993 return(0);
1994}
1995
e4a473f1
MD
1996#define MAX_INIT_PT (96)
1997
1998/*
1999 * This routine preloads the ptes for a given object into the specified pmap.
2000 * This eliminates the blast of soft faults on process startup and
2001 * immediately after an mmap.
2002 */
2003static int pmap_object_init_pt_callback(vm_page_t p, void *data);
2004
2005void
2006pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
2007 vm_object_t object, vm_pindex_t pindex,
2008 vm_size_t size, int limit)
2009{
2010 struct rb_vm_page_scan_info info;
287ebb09 2011 struct lwp *lp;
e4a473f1
MD
2012 int psize;
2013
2014 /*
2015 * We can't preinit if read access isn't set or there is no pmap
2016 * or object.
2017 */
2018 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
2019 return;
2020
2021 /*
2022 * We can't preinit if the pmap is not the current pmap
2023 */
287ebb09
MD
2024 lp = curthread->td_lwp;
2025 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace))
e4a473f1
MD
2026 return;
2027
2028 psize = size >> PAGE_SHIFT;
2029
2030 if ((object->type != OBJT_VNODE) ||
2031 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
2032 (object->resident_page_count > MAX_INIT_PT))) {
2033 return;
2034 }
2035
2036 if (psize + pindex > object->size) {
2037 if (object->size < pindex)
2038 return;
2039 psize = object->size - pindex;
2040 }
2041
2042 if (psize == 0)
2043 return;
2044
2045 /*
2046 * Use a red-black scan to traverse the requested range and load
2047 * any valid pages found into the pmap.
2048 *
2049 * We cannot safely scan the object's memq unless we are in a
2050 * critical section since interrupts can remove pages from objects.
2051 */
2052 info.start_pindex = pindex;
2053 info.end_pindex = pindex + psize - 1;
2054 info.limit = limit;
2055 info.mpte = NULL;
2056 info.addr = addr;
2057 info.pmap = pmap;
2058
2059 crit_enter();
2060 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
2061 pmap_object_init_pt_callback, &info);
2062 crit_exit();
2063}
2064
2065static
2066int
2067pmap_object_init_pt_callback(vm_page_t p, void *data)
2068{
2069 struct rb_vm_page_scan_info *info = data;
2070 vm_pindex_t rel_index;
2071 /*
2072 * don't allow an madvise to blow away our really
2073 * free pages allocating pv entries.
2074 */
2075 if ((info->limit & MAP_PREFAULT_MADVISE) &&
2076 vmstats.v_free_count < vmstats.v_free_reserved) {
2077 return(-1);
2078 }
2079 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2080 (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2081 if ((p->queue - p->pc) == PQ_CACHE)
2082 vm_page_deactivate(p);
2083 vm_page_busy(p);
2084 rel_index = p->pindex - info->start_pindex;
17cde63e
MD
2085 pmap_enter_quick(info->pmap,
2086 info->addr + i386_ptob(rel_index), p);
e4a473f1
MD
2087 vm_page_wakeup(p);
2088 }
2089 return(0);
2090}
2091
2092/*
2093 * pmap_prefault provides a quick way of clustering pagefaults into a
2094 * processes address space. It is a "cousin" of pmap_object_init_pt,
2095 * except it runs at page fault time instead of mmap time.
2096 */
2097#define PFBAK 4
2098#define PFFOR 4
2099#define PAGEORDER_SIZE (PFBAK+PFFOR)
2100
2101static int pmap_prefault_pageorder[] = {
2102 -PAGE_SIZE, PAGE_SIZE,
2103 -2 * PAGE_SIZE, 2 * PAGE_SIZE,
2104 -3 * PAGE_SIZE, 3 * PAGE_SIZE,
2105 -4 * PAGE_SIZE, 4 * PAGE_SIZE
2106};
2107
2108void
2109pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
2110{
e4a473f1
MD
2111 vm_offset_t starta;
2112 vm_offset_t addr;
2113 vm_pindex_t pindex;
17cde63e 2114 vm_page_t m;
e4a473f1 2115 vm_object_t object;
287ebb09
MD
2116 struct lwp *lp;
2117 int i;
e4a473f1
MD
2118
2119 /*
2120 * We do not currently prefault mappings that use virtual page
2121 * tables. We do not prefault foreign pmaps.
2122 */
2123 if (entry->maptype == VM_MAPTYPE_VPAGETABLE)
2124 return;
287ebb09
MD
2125 lp = curthread->td_lwp;
2126 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace))
e4a473f1
MD
2127 return;
2128
2129 object = entry->object.vm_object;
2130
2131 starta = addra - PFBAK * PAGE_SIZE;
2132 if (starta < entry->start)
2133 starta = entry->start;
2134 else if (starta > addra)
2135 starta = 0;
2136
2137 /*
2138 * critical section protection is required to maintain the
2139 * page/object association, interrupts can free pages and remove
2140 * them from their objects.
2141 */
e4a473f1
MD
2142 crit_enter();
2143 for (i = 0; i < PAGEORDER_SIZE; i++) {
2144 vm_object_t lobject;
2145 vpte_t *pte;
2146
2147 addr = addra + pmap_prefault_pageorder[i];
2148 if (addr > addra + (PFFOR * PAGE_SIZE))
2149 addr = 0;
2150
2151 if (addr < starta || addr >= entry->end)
2152 continue;
2153
2154 /*
2155 * Make sure the page table page already exists
2156 */
201b3f37 2157 if ((*pmap_pde(pmap, addr)) == 0)
e4a473f1
MD
2158 continue;
2159
2160 /*
2161 * Get a pointer to the pte and make sure that no valid page
2162 * has been mapped.
2163 */
71152ac6 2164 pte = get_ptbase(pmap, addr);
e4a473f1
MD
2165 if (*pte)
2166 continue;
2167
2168 /*
2169 * Get the page to be mapped
2170 */
2171 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2172 lobject = object;
2173
2174 for (m = vm_page_lookup(lobject, pindex);
2175 (!m && (lobject->type == OBJT_DEFAULT) &&
2176 (lobject->backing_object));
2177 lobject = lobject->backing_object
2178 ) {
2179 if (lobject->backing_object_offset & PAGE_MASK)
2180 break;
2181 pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2182 m = vm_page_lookup(lobject->backing_object, pindex);
2183 }
2184
2185 /*
2186 * give-up when a page is not in memory
2187 */
2188 if (m == NULL)
2189 break;
2190
2191 /*
2192 * If everything meets the requirements for pmap_enter_quick(),
2193 * then enter the page.
2194 */
2195
2196 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2197 (m->busy == 0) &&
2198 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2199
2200 if ((m->queue - m->pc) == PQ_CACHE) {
2201 vm_page_deactivate(m);
2202 }
2203 vm_page_busy(m);
17cde63e 2204 pmap_enter_quick(pmap, addr, m);
e4a473f1
MD
2205 vm_page_wakeup(m);
2206 }
2207 }
2208 crit_exit();
2209}
2210
2211/*
2212 * Routine: pmap_change_wiring
2213 * Function: Change the wiring attribute for a map/virtual-address
2214 * pair.
2215 * In/out conditions:
2216 * The mapping must already exist in the pmap.
2217 */
2218void
2219pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2220{
2221 vpte_t *pte;
2222
2223 if (pmap == NULL)
2224 return;
2225
71152ac6 2226 pte = get_ptbase(pmap, va);
e4a473f1 2227
e7f2d7de
MD
2228 if (wired && (*pte & VPTE_WIRED) == 0)
2229 ++pmap->pm_stats.wired_count;
2230 else if (!wired && (*pte & VPTE_WIRED))
2231 --pmap->pm_stats.wired_count;
2232 KKASSERT(pmap->pm_stats.wired_count >= 0);
e4a473f1
MD
2233
2234 /*
2235 * Wiring is not a hardware characteristic so there is no need to
2236 * invalidate TLB. However, in an SMP environment we must use
2237 * a locked bus cycle to update the pte (if we are not using
2238 * the pmap_inval_*() API that is)... it's ok to do this for simple
2239 * wiring changes.
2240 */
e4a473f1 2241 if (wired)
e7f2d7de 2242 atomic_set_int(pte, VPTE_WIRED);
e4a473f1 2243 else
e7f2d7de 2244 atomic_clear_int(pte, VPTE_WIRED);
e4a473f1
MD
2245}
2246
2247/*
2248 * Copy the range specified by src_addr/len
2249 * from the source map to the range dst_addr/len
2250 * in the destination map.
2251 *
2252 * This routine is only advisory and need not do anything.
2253 */
2254void
2255pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2256 vm_size_t len, vm_offset_t src_addr)
2257{
e4a473f1
MD
2258 vm_offset_t addr;
2259 vm_offset_t end_addr = src_addr + len;
2260 vm_offset_t pdnxt;
2261 vpte_t *src_frame;
2262 vpte_t *dst_frame;
2263 vm_page_t m;
2264
17cde63e
MD
2265 /*
2266 * XXX BUGGY. Amoung other things srcmpte is assumed to remain
2267 * valid through blocking calls, and that's just not going to
2268 * be the case.
2269 *
2270 * FIXME!
2271 */
2272 return;
2273
e4a473f1
MD
2274 if (dst_addr != src_addr)
2275 return;
2276 if (dst_pmap->pm_pdir == NULL)
2277 return;
2278 if (src_pmap->pm_pdir == NULL)
2279 return;
2280
eec2b734
MD
2281 crit_enter();
2282
71152ac6
MD
2283 src_frame = get_ptbase1(src_pmap, src_addr);
2284 dst_frame = get_ptbase2(dst_pmap, src_addr);
e4a473f1 2285
e4a473f1
MD
2286 /*
2287 * critical section protection is required to maintain the page/object
2288 * association, interrupts can free pages and remove them from
2289 * their objects.
2290 */
e4a473f1
MD
2291 for (addr = src_addr; addr < end_addr; addr = pdnxt) {
2292 vpte_t *src_pte, *dst_pte;
2293 vm_page_t dstmpte, srcmpte;
2294 vm_offset_t srcptepaddr;
2295 unsigned ptepindex;
2296
2297 if (addr >= VM_MAX_USER_ADDRESS)
2298 panic("pmap_copy: invalid to pmap_copy page tables\n");
2299
2300 /*
2301 * Don't let optional prefaulting of pages make us go
2302 * way below the low water mark of free pages or way
2303 * above high water mark of used pv entries.
2304 */
2305 if (vmstats.v_free_count < vmstats.v_free_reserved ||
2306 pv_entry_count > pv_entry_high_water)
2307 break;
2308
2309 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2310 ptepindex = addr >> PDRSHIFT;
2311
2312 srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2313 if (srcptepaddr == 0)
2314 continue;
2315
2316 if (srcptepaddr & VPTE_PS) {
2317 if (dst_pmap->pm_pdir[ptepindex] == 0) {
2318 dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2319 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
2320 }
2321 continue;
2322 }
2323
2324 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
17cde63e
MD
2325 if ((srcmpte == NULL) || (srcmpte->hold_count == 0) ||
2326 (srcmpte->flags & PG_BUSY)) {
e4a473f1 2327 continue;
17cde63e 2328 }
e4a473f1
MD
2329
2330 if (pdnxt > end_addr)
2331 pdnxt = end_addr;
2332
71152ac6
MD
2333 src_pte = src_frame + ((addr - src_addr) >> PAGE_SHIFT);
2334 dst_pte = dst_frame + ((addr - src_addr) >> PAGE_SHIFT);
e4a473f1
MD
2335 while (addr < pdnxt) {
2336 vpte_t ptetemp;
17cde63e 2337
e4a473f1
MD
2338 ptetemp = *src_pte;
2339 /*
2340 * we only virtual copy managed pages
2341 */
e7f2d7de 2342 if ((ptetemp & VPTE_MANAGED) != 0) {
e4a473f1
MD
2343 /*
2344 * We have to check after allocpte for the
2345 * pte still being around... allocpte can
2346 * block.
eec2b734
MD
2347 *
2348 * pmap_allocpte can block, unfortunately
2349 * we have to reload the tables.
e4a473f1
MD
2350 */
2351 dstmpte = pmap_allocpte(dst_pmap, addr);
eec2b734
MD
2352 src_frame = get_ptbase1(src_pmap, src_addr);
2353 dst_frame = get_ptbase2(dst_pmap, src_addr);
2354
17cde63e
MD
2355 if ((*dst_pte == 0) && (ptetemp = *src_pte) &&
2356 (ptetemp & VPTE_MANAGED) != 0) {
e4a473f1 2357 /*
70fc5283
MD
2358 * Clear the modified and accessed
2359 * (referenced) bits during the copy.
d6c96d4d 2360 *
70fc5283
MD
2361 * We do not have to clear the write
2362 * bit to force a fault-on-modify
2363 * because the real kernel's target
2364 * pmap is empty and will fault anyway.
e4a473f1
MD
2365 */
2366 m = PHYS_TO_VM_PAGE(ptetemp);
70fc5283 2367 *dst_pte = ptetemp & ~(VPTE_M | VPTE_A);
eec2b734 2368 ++dst_pmap->pm_stats.resident_count;
e4a473f1
MD
2369 pmap_insert_entry(dst_pmap, addr,
2370 dstmpte, m);
17cde63e 2371 KKASSERT(m->flags & PG_MAPPED);
e4a473f1 2372 } else {
d5b116a0 2373 pmap_unwire_pte_hold(dst_pmap, dstmpte);
e4a473f1
MD
2374 }
2375 if (dstmpte->hold_count >= srcmpte->hold_count)
2376 break;
2377 }
2378 addr += PAGE_SIZE;
2379 src_pte++;
2380 dst_pte++;
2381 }
2382 }
2383 crit_exit();
e4a473f1
MD
2384}
2385
2386/*
2387 * pmap_zero_page:
2388 *
2389 * Zero the specified PA by mapping the page into KVM and clearing its
2390 * contents.
2391 *
2392 * This function may be called from an interrupt and no locking is
2393 * required.
2394 */
2395void
2396pmap_zero_page(vm_paddr_t phys)
2397{
2398 struct mdglobaldata *gd = mdcpu;
2399
2400 crit_enter();
71152ac6 2401 if (*gd->gd_CMAP3)
e4a473f1 2402 panic("pmap_zero_page: CMAP3 busy");
a3c35df6 2403 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2404 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2405
2406 bzero(gd->gd_CADDR3, PAGE_SIZE);
71152ac6 2407 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2408 crit_exit();
2409}
2410
2411/*
2412 * pmap_page_assertzero:
2413 *
2414 * Assert that a page is empty, panic if it isn't.
2415 */
2416void
2417pmap_page_assertzero(vm_paddr_t phys)
2418{
2419 struct mdglobaldata *gd = mdcpu;
2420 int i;
2421
2422 crit_enter();
71152ac6 2423 if (*gd->gd_CMAP3)
e4a473f1 2424 panic("pmap_zero_page: CMAP3 busy");
71152ac6
MD
2425 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W |
2426 (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2427 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2428 for (i = 0; i < PAGE_SIZE; i += 4) {
2429 if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
2430 panic("pmap_page_assertzero() @ %p not zero!\n",
2431 (void *)gd->gd_CADDR3);
2432 }
2433 }
71152ac6 2434 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2435 crit_exit();
2436}
2437
2438/*
2439 * pmap_zero_page:
2440 *
2441 * Zero part of a physical page by mapping it into memory and clearing
2442 * its contents with bzero.
2443 *
2444 * off and size may not cover an area beyond a single hardware page.
2445 */
2446void
2447pmap_zero_page_area(vm_paddr_t phys, int off, int size)
2448{
2449 struct mdglobaldata *gd = mdcpu;
2450
2451 crit_enter();
71152ac6 2452 if (*gd->gd_CMAP3)
e4a473f1 2453 panic("pmap_zero_page: CMAP3 busy");
71152ac6
MD
2454 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W |
2455 (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2456 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2457
2458 bzero((char *)gd->gd_CADDR3 + off, size);
71152ac6 2459 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2460 crit_exit();
2461}
2462
2463/*
2464 * pmap_copy_page:
2465 *
2466 * Copy the physical page from the source PA to the target PA.
2467 * This function may be called from an interrupt. No locking
2468 * is required.
2469 */
2470void
2471pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
2472{
2473 struct mdglobaldata *gd = mdcpu;
2474
2475 crit_enter();
2476 if (*(int *) gd->gd_CMAP1)
2477 panic("pmap_copy_page: CMAP1 busy");
2478 if (*(int *) gd->gd_CMAP2)
2479 panic("pmap_copy_page: CMAP2 busy");
2480
4e7c41c5 2481 *(int *) gd->gd_CMAP1 = VPTE_V | VPTE_R | (src & PG_FRAME) | VPTE_A;
e4a473f1
MD
2482 *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
2483
6f7b98e0
MD
2484 madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
2485 madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2486
2487 bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
2488
2489 *(int *) gd->gd_CMAP1 = 0;
2490 *(int *) gd->gd_CMAP2 = 0;
2491 crit_exit();
2492}
2493
2494/*
2495 * pmap_copy_page_frag:
2496 *
2497 * Copy the physical page from the source PA to the target PA.
2498 * This function may be called from an interrupt. No locking
2499 * is required.
2500 */
2501void
2502pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
2503{
2504 struct mdglobaldata *gd = mdcpu;
2505
2506 crit_enter();
2507 if (*(int *) gd->gd_CMAP1)
2508 panic("pmap_copy_page: CMAP1 busy");
2509 if (*(int *) gd->gd_CMAP2)
2510 panic("pmap_copy_page: CMAP2 busy");
2511
2512 *(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A;
2513 *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
2514
6f7b98e0
MD
2515 madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
2516 madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2517
2518 bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
2519 (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
2520 bytes);
2521
2522 *(int *) gd->gd_CMAP1 = 0;
2523 *(int *) gd->gd_CMAP2 = 0;
2524 crit_exit();
2525}
2526
2527/*
2528 * Returns true if the pmap's pv is one of the first
2529 * 16 pvs linked to from this page. This count may
2530 * be changed upwards or downwards in the future; it
2531 * is only necessary that true be returned for a small
2532 * subset of pmaps for proper page aging.
2533 */
2534boolean_t
2535pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2536{
2537 pv_entry_t pv;
2538 int loops = 0;
2539
2540 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2541 return FALSE;
2542
2543 crit_enter();
2544
2545 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2546 if (pv->pv_pmap == pmap) {
2547 crit_exit();
2548 return TRUE;
2549 }
2550 loops++;
2551 if (loops >= 16)
2552 break;
2553 }
2554 crit_exit();
2555 return (FALSE);
2556}
2557
2558/*
2559 * Remove all pages from specified address space
2560 * this aids process exit speeds. Also, this code
2561 * is special cased for current process only, but
2562 * can have the more generic (and slightly slower)
2563 * mode enabled. This is much faster than pmap_remove
2564 * in the case of running down an entire address space.
2565 */
2566void
2567pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2568{
2569 vpte_t *pte, tpte;
2570 pv_entry_t pv, npv;
2571 vm_page_t m;
e4a473f1 2572 int iscurrentpmap;
8790d7d8 2573 int32_t save_generation;
287ebb09 2574 struct lwp *lp;
e4a473f1 2575
287ebb09
MD
2576 lp = curthread->td_lwp;
2577 if (lp && pmap == vmspace_pmap(lp->lwp_vmspace))
e4a473f1
MD
2578 iscurrentpmap = 1;
2579 else
2580 iscurrentpmap = 0;
2581
e4a473f1
MD
2582 crit_enter();
2583 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2584 if (pv->pv_va >= eva || pv->pv_va < sva) {
2585 npv = TAILQ_NEXT(pv, pv_plist);
2586 continue;
2587 }
2588
8790d7d8
MD
2589 KKASSERT(pmap == pv->pv_pmap);
2590
2591 pte = pmap_pte(pmap, pv->pv_va);
e4a473f1
MD
2592
2593 /*
2594 * We cannot remove wired pages from a process' mapping
2595 * at this time
2596 */
d5b116a0 2597 if (*pte & VPTE_WIRED) {
e4a473f1
MD
2598 npv = TAILQ_NEXT(pv, pv_plist);
2599 continue;
2600 }
d5b116a0 2601 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va);
e4a473f1
MD
2602
2603 m = PHYS_TO_VM_PAGE(tpte);
2604
2605 KASSERT(m < &vm_page_array[vm_page_array_size],
2606 ("pmap_remove_pages: bad tpte %x", tpte));
2607
eec2b734
MD
2608 KKASSERT(pmap->pm_stats.resident_count > 0);
2609 --pmap->pm_stats.resident_count;
e4a473f1
MD
2610
2611 /*
2612 * Update the vm_page_t clean and reference bits.
2613 */
2614 if (tpte & VPTE_M) {
2615 vm_page_dirty(m);
2616 }
2617
e4a473f1 2618 npv = TAILQ_NEXT(pv, pv_plist);
8790d7d8
MD
2619 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2620 save_generation = ++pmap->pm_generation;
e4a473f1
MD
2621
2622 m->md.pv_list_count--;
2623 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
17cde63e 2624 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
e4a473f1 2625 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
e4a473f1 2626
d5b116a0 2627 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
e4a473f1 2628 free_pv_entry(pv);
8790d7d8
MD
2629
2630 /*
2631 * Restart the scan if we blocked during the unuse or free
2632 * calls and other removals were made.
2633 */
2634 if (save_generation != pmap->pm_generation) {
2635 kprintf("Warning: pmap_remove_pages race-A avoided\n");
2636 pv = TAILQ_FIRST(&pmap->pm_pvlist);
2637 }
e4a473f1 2638 }
e4a473f1
MD
2639 crit_exit();
2640}
2641
2642/*
d5b116a0 2643 * pmap_testbit tests bits in active mappings of a VM page.
e4a473f1
MD
2644 */
2645static boolean_t
2646pmap_testbit(vm_page_t m, int bit)
2647{
2648 pv_entry_t pv;
2649 vpte_t *pte;
2650
2651 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2652 return FALSE;
2653
2654 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2655 return FALSE;
2656
2657 crit_enter();
2658
2659 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2660 /*
2661 * if the bit being tested is the modified bit, then
2662 * mark clean_map and ptes as never
2663 * modified.
2664 */
2665 if (bit & (VPTE_A|VPTE_M)) {
d6c96d4d 2666 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2667 continue;
2668 }
2669
2670#if defined(PMAP_DIAGNOSTIC)
2671 if (!pv->pv_pmap) {
2672 kprintf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2673 continue;
2674 }
2675#endif
2676 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2677 if (*pte & bit) {
2678 crit_exit();
2679 return TRUE;
2680 }
2681 }
2682 crit_exit();
2683 return (FALSE);
2684}
2685
2686/*
70fc5283
MD
2687 * This routine is used to clear bits in ptes. Certain bits require special
2688 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit.
d5b116a0
MD
2689 *
2690 * This routine is only called with certain VPTE_* bit combinations.
e4a473f1
MD
2691 */
2692static __inline void
d6c96d4d 2693pmap_clearbit(vm_page_t m, int bit)
e4a473f1 2694{
e4a473f1
MD
2695 pv_entry_t pv;
2696 vpte_t *pte;
d6c96d4d 2697 vpte_t pbits;
e4a473f1
MD
2698
2699 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2700 return;
2701
e4a473f1
MD
2702 crit_enter();
2703
2704 /*
2705 * Loop over all current mappings setting/clearing as appropos If
2706 * setting RO do we need to clear the VAC?
2707 */
2708 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2709 /*
2710 * don't write protect pager mappings
2711 */
d6c96d4d
MD
2712 if (bit == VPTE_W) {
2713 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2714 continue;
2715 }
2716
2717#if defined(PMAP_DIAGNOSTIC)
2718 if (!pv->pv_pmap) {
2719 kprintf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2720 continue;
2721 }
2722#endif
2723
2724 /*
2725 * Careful here. We can use a locked bus instruction to
2726 * clear VPTE_A or VPTE_M safely but we need to synchronize
2727 * with the target cpus when we mess with VPTE_W.
d6c96d4d 2728 *
70fc5283
MD
2729 * On virtual kernels we must force a new fault-on-write
2730 * in the real kernel if we clear the Modify bit ourselves,
2731 * otherwise the real kernel will not get a new fault and
2732 * will never set our Modify bit again.
e4a473f1
MD
2733 */
2734 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
d5b116a0 2735 if (*pte & bit) {
d6c96d4d 2736 if (bit == VPTE_W) {
d5b116a0
MD
2737 /*
2738 * We must also clear VPTE_M when clearing
2739 * VPTE_W
2740 */
2741 pbits = pmap_clean_pte(pte, pv->pv_pmap,
2742 pv->pv_va);
2743 if (pbits & VPTE_M)
d6c96d4d 2744 vm_page_dirty(m);
d6c96d4d
MD
2745 } else if (bit == VPTE_M) {
2746 /*
70fc5283
MD
2747 * We do not have to make the page read-only
2748 * when clearing the Modify bit. The real
2749 * kernel will make the real PTE read-only
2750 * or otherwise detect the write and set
2751 * our VPTE_M again simply by us invalidating
2752 * the real kernel VA for the pmap (as we did
2753 * above). This allows the real kernel to
2754 * handle the write fault without forwarding
2755 * the fault to us.
d6c96d4d 2756 */
70fc5283 2757 atomic_clear_int(pte, VPTE_M);
d5b116a0
MD
2758 } else if ((bit & (VPTE_W|VPTE_M)) == (VPTE_W|VPTE_M)) {
2759 /*
2760 * We've been asked to clear W & M, I guess
2761 * the caller doesn't want us to update
2762 * the dirty status of the VM page.
2763 */
2764 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va);
d6c96d4d 2765 } else {
d5b116a0
MD
2766 /*
2767 * We've been asked to clear bits that do
2768 * not interact with hardware.
2769 */
d6c96d4d 2770 atomic_clear_int(pte, bit);
e4a473f1
MD
2771 }
2772 }
2773 }
e4a473f1
MD
2774 crit_exit();
2775}
2776
2777/*
2778 * pmap_page_protect:
2779 *
2780 * Lower the permission for all mappings to a given page.
2781 */
2782void
2783pmap_page_protect(vm_page_t m, vm_prot_t prot)
2784{
2785 if ((prot & VM_PROT_WRITE) == 0) {
2786 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
d6c96d4d 2787 pmap_clearbit(m, VPTE_W);
17cde63e 2788 vm_page_flag_clear(m, PG_WRITEABLE);
e4a473f1
MD
2789 } else {
2790 pmap_remove_all(m);
2791 }
2792 }
2793}
2794
2795vm_paddr_t
cfd17028 2796pmap_phys_address(vm_pindex_t ppn)
e4a473f1
MD
2797{
2798 return (i386_ptob(ppn));
2799}
2800
2801/*
2802 * pmap_ts_referenced:
2803 *
2804 * Return a count of reference bits for a page, clearing those bits.
2805 * It is not necessary for every reference bit to be cleared, but it
2806 * is necessary that 0 only be returned when there are truly no
2807 * reference bits set.
2808 *
2809 * XXX: The exact number of bits to check and clear is a matter that
2810 * should be tested and standardized at some point in the future for
2811 * optimal aging of shared pages.
2812 */
2813int
2814pmap_ts_referenced(vm_page_t m)
2815{
2816 pv_entry_t pv, pvf, pvn;
2817 vpte_t *pte;
2818 int rtval = 0;
2819
2820 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2821 return (rtval);
2822
2823 crit_enter();
2824
2825 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2826
2827 pvf = pv;
2828
2829 do {
2830 pvn = TAILQ_NEXT(pv, pv_list);
2831
2832 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2833
2834 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2835
d6c96d4d 2836 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2837 continue;
2838
2839 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2840
2841 if (pte && (*pte & VPTE_A)) {
2842#ifdef SMP
2843 atomic_clear_int(pte, VPTE_A);
2844#else
2845 atomic_clear_int_nonlocked(pte, VPTE_A);
2846#endif
2847 rtval++;
2848 if (rtval > 4) {
2849 break;
2850 }
2851 }
2852 } while ((pv = pvn) != NULL && pv != pvf);
2853 }
2854 crit_exit();
2855
2856 return (rtval);
2857}
2858
2859/*
2860 * pmap_is_modified:
2861 *
2862 * Return whether or not the specified physical page was modified
2863 * in any physical maps.
2864 */
2865boolean_t
2866pmap_is_modified(vm_page_t m)
2867{
2868 return pmap_testbit(m, VPTE_M);
2869}
2870
2871/*
2872 * Clear the modify bits on the specified physical page.
2873 */
2874void
2875pmap_clear_modify(vm_page_t m)
2876{
d6c96d4d 2877 pmap_clearbit(m, VPTE_M);
e4a473f1
MD
2878}
2879
2880/*
2881 * pmap_clear_reference:
2882 *
2883 * Clear the reference bit on the specified physical page.
2884 */
2885void
2886pmap_clear_reference(vm_page_t m)
2887{
d6c96d4d 2888 pmap_clearbit(m, VPTE_A);
e4a473f1
MD
2889}
2890
2891/*
2892 * Miscellaneous support routines follow
2893 */
2894
2895static void
2896i386_protection_init(void)
2897{
2898 int *kp, prot;
2899
2900 kp = protection_codes;
2901 for (prot = 0; prot < 8; prot++) {
2902 if (prot & VM_PROT_READ)
2903 *kp |= VPTE_R;
2904 if (prot & VM_PROT_WRITE)
2905 *kp |= VPTE_W;
2906 if (prot & VM_PROT_EXECUTE)
2907 *kp |= VPTE_X;
2908 ++kp;
2909 }
2910}
2911
d5b116a0
MD
2912#if 0
2913
e4a473f1
MD
2914/*
2915 * Map a set of physical memory pages into the kernel virtual
2916 * address space. Return a pointer to where it is mapped. This
2917 * routine is intended to be used for mapping device memory,
2918 * NOT real memory.
2919 *
2920 * NOTE: we can't use pgeflag unless we invalidate the pages one at
2921 * a time.
2922 */
2923void *
2924pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2925{
2926 vm_offset_t va, tmpva, offset;
2927 vpte_t *pte;
2928
2929 offset = pa & PAGE_MASK;
2930 size = roundup(offset + size, PAGE_SIZE);
2931
2932 va = kmem_alloc_nofault(&kernel_map, size);
2933 if (!va)
2934 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2935
2936 pa = pa & VPTE_FRAME;
2937 for (tmpva = va; size > 0;) {
2938 pte = KernelPTA + (tmpva >> PAGE_SHIFT);
2939 *pte = pa | VPTE_R | VPTE_W | VPTE_V; /* | pgeflag; */
2940 size -= PAGE_SIZE;
2941 tmpva += PAGE_SIZE;
2942 pa += PAGE_SIZE;
2943 }
2944 cpu_invltlb();
2945 smp_invltlb();
2946
2947 return ((void *)(va + offset));
2948}
2949
2950void
2951pmap_unmapdev(vm_offset_t va, vm_size_t size)
2952{
2953 vm_offset_t base, offset;
2954
2955 base = va & VPTE_FRAME;
2956 offset = va & PAGE_MASK;
2957 size = roundup(offset + size, PAGE_SIZE);
2958 pmap_qremove(va, size >> PAGE_SHIFT);
2959 kmem_free(&kernel_map, base, size);
2960}
2961
d5b116a0
MD
2962#endif
2963
e4a473f1
MD
2964/*
2965 * perform the pmap work for mincore
2966 */
2967int
2968pmap_mincore(pmap_t pmap, vm_offset_t addr)
2969{
2970 vpte_t *ptep, pte;
2971 vm_page_t m;
2972 int val = 0;
2973
2974 ptep = pmap_pte(pmap, addr);
2975 if (ptep == 0) {
2976 return 0;
2977 }
2978
2979 if ((pte = *ptep) != 0) {
2980 vm_offset_t pa;
2981
2982 val = MINCORE_INCORE;
2983 if ((pte & VPTE_MANAGED) == 0)
2984 return val;
2985
2986 pa = pte & VPTE_FRAME;
2987
2988 m = PHYS_TO_VM_PAGE(pa);
2989
2990 /*
2991 * Modified by us
2992 */
2993 if (pte & VPTE_M)
2994 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2995 /*
2996 * Modified by someone
2997 */
2998 else if (m->dirty || pmap_is_modified(m))
2999 val |= MINCORE_MODIFIED_OTHER;
3000 /*
3001 * Referenced by us
3002 */
3003 if (pte & VPTE_A)
3004 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3005
3006 /*
3007 * Referenced by someone
3008 */
3009 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
3010 val |= MINCORE_REFERENCED_OTHER;
3011 vm_page_flag_set(m, PG_REFERENCED);
3012 }
3013 }
3014 return val;
3015}
3016
3017void
e3161323 3018pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
e4a473f1 3019{
e3161323 3020 struct vmspace *oldvm;
287ebb09 3021 struct lwp *lp;
e4a473f1 3022
e3161323 3023 oldvm = p->p_vmspace;
287ebb09 3024 crit_enter();
e3161323 3025 if (oldvm != newvm) {
e3161323 3026 p->p_vmspace = newvm;
287ebb09 3027 KKASSERT(p->p_nthreads == 1);
3e291793 3028 lp = RB_ROOT(&p->p_lwp_tree);
287ebb09
MD
3029 pmap_setlwpvm(lp, newvm);
3030 if (adjrefs) {
3031 sysref_get(&newvm->vm_sysref);
3032 sysref_put(&oldvm->vm_sysref);
3033 }
3034 }
3035 crit_exit();
3036}
3037
3038void
3039pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
3040{
3041 struct vmspace *oldvm;
3042 struct pmap *pmap;
3043
3044 crit_enter();
3045 oldvm = lp->lwp_vmspace;
3046
3047 if (oldvm != newvm) {
3048 lp->lwp_vmspace = newvm;
3049 if (curthread->td_lwp == lp) {
e3161323 3050 pmap = vmspace_pmap(newvm);
e4a473f1 3051#if defined(SMP)
e3161323 3052 atomic_set_int(&pmap->pm_active, 1 << mycpu->gd_cpuid);
e4a473f1 3053#else
e3161323 3054 pmap->pm_active |= 1;
e4a473f1
MD
3055#endif
3056#if defined(SWTCH_OPTIM_STATS)
e3161323 3057 tlb_flush_count++;
e4a473f1 3058#endif
e3161323 3059 pmap = vmspace_pmap(oldvm);
e4a473f1 3060#if defined(SMP)
e3161323
MD
3061 atomic_clear_int(&pmap->pm_active,
3062 1 << mycpu->gd_cpuid);
e4a473f1 3063#else
e3161323 3064 pmap->pm_active &= ~1;
e4a473f1 3065#endif
e3161323 3066 }
e3161323
MD
3067 }
3068 crit_exit();
e4a473f1
MD
3069}
3070
287ebb09 3071
e4a473f1
MD
3072vm_offset_t
3073pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
3074{
3075
3076 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3077 return addr;
3078 }
3079
3080 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3081 return addr;
3082}
3083
3084
3085#if defined(DEBUG)
3086
3087static void pads (pmap_t pm);
3088void pmap_pvdump (vm_paddr_t pa);
3089
3090/* print address space of pmap*/
3091static void
3092pads(pmap_t pm)
3093{
3094 vm_offset_t va;
3095 int i, j;
3096 vpte_t *ptep;
3097
3098 if (pm == &kernel_pmap)
3099 return;
3100 for (i = 0; i < 1024; i++)
3101 if (pm->pm_pdir[i])
3102 for (j = 0; j < 1024; j++) {
3103 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3104 if (pm == &kernel_pmap && va < KERNBASE)
3105 continue;
3106 if (pm != &kernel_pmap && va > UPT_MAX_ADDRESS)
3107 continue;
3108 ptep = pmap_pte(pm, va);
3109 if (ptep && (*ptep & VPTE_V)) {
3110 kprintf("%p:%x ",
3111 (void *)va, (unsigned)*ptep);
3112 }
3113 };
3114
3115}
3116
3117void
3118pmap_pvdump(vm_paddr_t pa)
3119{
3120 pv_entry_t pv;
3121 vm_page_t m;
3122
3123 kprintf("pa %08llx", (long long)pa);
3124 m = PHYS_TO_VM_PAGE(pa);
3125 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
3126#ifdef used_to_be
3127 kprintf(" -> pmap %p, va %x, flags %x",
3128 (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
3129#endif
3130 kprintf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3131 pads(pv->pv_pmap);
3132 }
3133 kprintf(" ");
3134}
3135#endif
4e7c41c5 3136