kernel - Check PG_MARKER in pmap_object_init_pt_callback()
[dragonfly.git] / sys / platform / vkernel / platform / pmap.c
CommitLineData
e4a473f1 1/*
5bce55a9
MD
2 * (MPSAFE)
3 *
e4a473f1
MD
4 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
5 * Copyright (c) 1991 Regents of the University of California.
6 * All rights reserved.
7 * Copyright (c) 1994 John S. Dyson
8 * All rights reserved.
9 * Copyright (c) 1994 David Greenman
10 * All rights reserved.
11 * Copyright (c) 2004-2006 Matthew Dillon
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * 3. Neither the name of The DragonFly Project nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific, prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
36 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
37 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
38 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
42 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
d6c96d4d
MD
43 */
44/*
45 * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting
46 * the PTE in the page table, because a cpu synchronization might be required.
47 * The actual invalidation is delayed until the following call or flush. In
48 * the VKERNEL build this function is called prior to adjusting the PTE and
49 * invalidates the table synchronously (not delayed), and is not SMP safe
50 * as a consequence.
e4a473f1
MD
51 */
52
53#include <sys/types.h>
54#include <sys/systm.h>
55#include <sys/kernel.h>
56#include <sys/stat.h>
57#include <sys/mman.h>
58#include <sys/vkernel.h>
59#include <sys/proc.h>
60#include <sys/thread.h>
61#include <sys/user.h>
135d7199 62#include <sys/vmspace.h>
e4a473f1
MD
63
64#include <vm/pmap.h>
65#include <vm/vm_page.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_kern.h>
68#include <vm/vm_object.h>
69#include <vm/vm_zone.h>
70#include <vm/vm_pageout.h>
71
72#include <machine/md_var.h>
73#include <machine/pcb.h>
74#include <machine/pmap_inval.h>
75#include <machine/globaldata.h>
76
e3161323 77#include <sys/sysref2.h>
b12defdc 78#include <sys/spinlock2.h>
e3161323 79
e4a473f1
MD
80#include <assert.h>
81
82struct pmap kernel_pmap;
83
84static struct vm_zone pvzone;
85static struct vm_object pvzone_obj;
86static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list);
87static int pv_entry_count;
88static int pv_entry_max;
89static int pv_entry_high_water;
90static int pmap_pagedaemon_waken;
91static boolean_t pmap_initialized = FALSE;
92static int protection_codes[8];
93
94static void i386_protection_init(void);
95static void pmap_remove_all(vm_page_t m);
96static int pmap_release_free_page(struct pmap *pmap, vm_page_t p);
97
98#define MINPV 2048
99#ifndef PMAP_SHPGPERPROC
100#define PMAP_SHPGPERPROC 200
101#endif
102
103#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
104
105#define pte_prot(m, p) \
106 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)])
107
108void
109pmap_init(void)
110{
111 int i;
112 struct pv_entry *pvinit;
113
114 for (i = 0; i < vm_page_array_size; i++) {
115 vm_page_t m;
116
117 m = &vm_page_array[i];
118 TAILQ_INIT(&m->md.pv_list);
119 m->md.pv_list_count = 0;
120 }
121
122 i = vm_page_array_size;
123 if (i < MINPV)
124 i = MINPV;
125 pvinit = (struct pv_entry *)kmem_alloc(&kernel_map, i*sizeof(*pvinit));
126 zbootinit(&pvzone, "PV ENTRY", sizeof(*pvinit), pvinit, i);
127 pmap_initialized = TRUE;
128}
129
130void
131pmap_init2(void)
132{
133 int shpgperproc = PMAP_SHPGPERPROC;
134
135 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
136 pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
137 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
138 pv_entry_high_water = 9 * (pv_entry_max / 10);
139 zinitna(&pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
140}
141
142/*
143 * Bootstrap the kernel_pmap so it can be used with pmap_enter().
144 *
71152ac6
MD
145 * NOTE! pm_pdir for the kernel pmap is offset so VA's translate
146 * directly into PTD indexes (PTA is also offset for the same reason).
147 * This is necessary because, for now, KVA is not mapped at address 0.
148 *
e4a473f1
MD
149 * Page table pages are not managed like they are in normal pmaps, so
150 * no pteobj is needed.
151 */
152void
153pmap_bootstrap(void)
154{
71152ac6 155 vm_pindex_t i = (vm_offset_t)KernelPTD >> PAGE_SHIFT;
e4a473f1 156
b12defdc
MD
157 /*
158 * The kernel_pmap's pm_pteobj is used only for locking and not
159 * for mmu pages.
160 */
71152ac6 161 kernel_pmap.pm_pdir = KernelPTD - (KvaStart >> SEG_SHIFT);
e4a473f1
MD
162 kernel_pmap.pm_pdirpte = KernelPTA[i];
163 kernel_pmap.pm_count = 1;
c2fb025d 164 kernel_pmap.pm_active = (cpumask_t)-1 & ~CPUMASK_LOCK;
b12defdc 165 kernel_pmap.pm_pteobj = &kernel_object;
e4a473f1 166 TAILQ_INIT(&kernel_pmap.pm_pvlist);
b12defdc
MD
167 TAILQ_INIT(&kernel_pmap.pm_pvlist_free);
168 spin_init(&kernel_pmap.pm_spin);
169 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok");
e4a473f1
MD
170 i386_protection_init();
171}
172
173/*
174 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we
175 * just dummy it up so it works well enough for fork().
176 *
177 * In DragonFly, process pmaps may only be used to manipulate user address
178 * space, never kernel address space.
179 */
180void
181pmap_pinit0(struct pmap *pmap)
182{
183 pmap_pinit(pmap);
184}
185
186/************************************************************************
187 * Procedures to manage whole physical maps *
188 ************************************************************************
189 *
190 * Initialize a preallocated and zeroed pmap structure,
191 * such as one in a vmspace structure.
192 */
193void
194pmap_pinit(struct pmap *pmap)
195{
196 vm_page_t ptdpg;
197 int npages;
198
199 /*
200 * No need to allocate page table space yet but we do need a valid
201 * page directory table.
202 */
203 if (pmap->pm_pdir == NULL) {
204 pmap->pm_pdir =
8608b858 205 (vpte_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
e4a473f1
MD
206 }
207
208 /*
209 * allocate object for the pte array and page directory
210 */
211 npages = VPTE_PAGETABLE_SIZE +
212 (VM_MAX_USER_ADDRESS / PAGE_SIZE) * sizeof(vpte_t);
213 npages = (npages + PAGE_MASK) / PAGE_SIZE;
214
215 if (pmap->pm_pteobj == NULL)
216 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, npages);
217 pmap->pm_pdindex = npages - 1;
218
219 /*
220 * allocate the page directory page
221 */
222 ptdpg = vm_page_grab(pmap->pm_pteobj, pmap->pm_pdindex,
d2d8515b 223 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_ZERO);
54341a3b 224 vm_page_wire(ptdpg);
e4a473f1
MD
225
226 /* not usually mapped */
b12defdc
MD
227 vm_page_flag_clear(ptdpg, PG_MAPPED);
228 vm_page_wakeup(ptdpg);
e4a473f1
MD
229
230 pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
135d7199 231 pmap->pm_pdirpte = KernelPTA[(vm_offset_t)pmap->pm_pdir >> PAGE_SHIFT];
e4a473f1
MD
232
233 pmap->pm_count = 1;
234 pmap->pm_active = 0;
235 pmap->pm_ptphint = NULL;
24eb47e0 236 pmap->pm_cpucachemask = 0;
e4a473f1 237 TAILQ_INIT(&pmap->pm_pvlist);
b12defdc
MD
238 TAILQ_INIT(&pmap->pm_pvlist_free);
239 spin_init(&pmap->pm_spin);
240 lwkt_token_init(&pmap->pm_token, "pmap_tok");
e4a473f1 241 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
eec2b734 242 pmap->pm_stats.resident_count = 1;
e4a473f1
MD
243}
244
245/*
e3161323 246 * Clean up a pmap structure so it can be physically freed
5bce55a9
MD
247 *
248 * No requirements.
e3161323
MD
249 */
250void
251pmap_puninit(pmap_t pmap)
252{
253 if (pmap->pm_pdir) {
254 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE);
255 pmap->pm_pdir = NULL;
256 }
257 if (pmap->pm_pteobj) {
258 vm_object_deallocate(pmap->pm_pteobj);
259 pmap->pm_pteobj = NULL;
260 }
261}
262
263
264/*
e4a473f1
MD
265 * Wire in kernel global address entries. To avoid a race condition
266 * between pmap initialization and pmap_growkernel, this procedure
267 * adds the pmap to the master list (which growkernel scans to update),
268 * then copies the template.
269 *
270 * In a virtual kernel there are no kernel global address entries.
5bce55a9
MD
271 *
272 * No requirements.
e4a473f1
MD
273 */
274void
275pmap_pinit2(struct pmap *pmap)
276{
b12defdc 277 spin_lock(&pmap_spin);
e4a473f1 278 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode);
b12defdc 279 spin_unlock(&pmap_spin);
e4a473f1
MD
280}
281
282/*
283 * Release all resources held by the given physical map.
284 *
285 * Should only be called if the map contains no valid mappings.
5bce55a9 286 *
b12defdc 287 * Caller must hold pmap->pm_token
e4a473f1
MD
288 */
289static int pmap_release_callback(struct vm_page *p, void *data);
290
291void
292pmap_release(struct pmap *pmap)
293{
aaf8b91f 294 struct mdglobaldata *gd = mdcpu;
e4a473f1
MD
295 vm_object_t object = pmap->pm_pteobj;
296 struct rb_vm_page_scan_info info;
297
298 KKASSERT(pmap != &kernel_pmap);
299
300#if defined(DIAGNOSTIC)
301 if (object->ref_count != 1)
302 panic("pmap_release: pteobj reference count != 1");
303#endif
aaf8b91f
MD
304 /*
305 * Once we destroy the page table, the mapping becomes invalid.
24eb47e0
MD
306 * Don't waste time doing a madvise to invalidate the mapping, just
307 * set cpucachemask to 0.
aaf8b91f
MD
308 */
309 if (pmap->pm_pdir == gd->gd_PT1pdir) {
310 gd->gd_PT1pdir = NULL;
311 *gd->gd_PT1pde = 0;
312 /* madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); */
313 }
314 if (pmap->pm_pdir == gd->gd_PT2pdir) {
315 gd->gd_PT2pdir = NULL;
316 *gd->gd_PT2pde = 0;
317 /* madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); */
318 }
eec2b734
MD
319 if (pmap->pm_pdir == gd->gd_PT3pdir) {
320 gd->gd_PT3pdir = NULL;
321 *gd->gd_PT3pde = 0;
322 /* madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL); */
323 }
e4a473f1
MD
324
325 info.pmap = pmap;
326 info.object = object;
b12defdc
MD
327
328 spin_lock(&pmap_spin);
e4a473f1 329 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode);
b12defdc 330 spin_unlock(&pmap_spin);
e4a473f1 331
b12defdc 332 vm_object_hold(object);
e4a473f1 333 do {
e4a473f1
MD
334 info.error = 0;
335 info.mpte = NULL;
336 info.limit = object->generation;
337
338 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
339 pmap_release_callback, &info);
340 if (info.error == 0 && info.mpte) {
341 if (!pmap_release_free_page(pmap, info.mpte))
342 info.error = 1;
343 }
e4a473f1 344 } while (info.error);
b12defdc 345 vm_object_drop(object);
00835518
MD
346
347 /*
348 * Leave the KVA reservation for pm_pdir cached for later reuse.
349 */
e7f2d7de 350 pmap->pm_pdirpte = 0;
24eb47e0 351 pmap->pm_cpucachemask = 0;
e4a473f1
MD
352}
353
eec2b734
MD
354/*
355 * Callback to release a page table page backing a directory
356 * entry.
357 */
e4a473f1
MD
358static int
359pmap_release_callback(struct vm_page *p, void *data)
360{
361 struct rb_vm_page_scan_info *info = data;
362
363 if (p->pindex == info->pmap->pm_pdindex) {
364 info->mpte = p;
365 return(0);
366 }
367 if (!pmap_release_free_page(info->pmap, p)) {
368 info->error = 1;
369 return(-1);
370 }
371 if (info->object->generation != info->limit) {
372 info->error = 1;
373 return(-1);
374 }
375 return(0);
376}
377
378/*
379 * Retire the given physical map from service. Should only be called if
380 * the map contains no valid mappings.
5bce55a9
MD
381 *
382 * No requirements.
e4a473f1
MD
383 */
384void
385pmap_destroy(pmap_t pmap)
386{
e4a473f1
MD
387 if (pmap == NULL)
388 return;
389
5bce55a9
MD
390 lwkt_gettoken(&vm_token);
391 if (--pmap->pm_count == 0) {
e4a473f1
MD
392 pmap_release(pmap);
393 panic("destroying a pmap is not yet implemented");
394 }
5bce55a9 395 lwkt_reltoken(&vm_token);
e4a473f1
MD
396}
397
398/*
399 * Add a reference to the specified pmap.
5bce55a9
MD
400 *
401 * No requirements.
e4a473f1
MD
402 */
403void
404pmap_reference(pmap_t pmap)
405{
5bce55a9
MD
406 if (pmap) {
407 lwkt_gettoken(&vm_token);
408 ++pmap->pm_count;
409 lwkt_reltoken(&vm_token);
e4a473f1
MD
410 }
411}
412
413/************************************************************************
135d7199
MD
414 * VMSPACE MANAGEMENT *
415 ************************************************************************
416 *
417 * The VMSPACE management we do in our virtual kernel must be reflected
418 * in the real kernel. This is accomplished by making vmspace system
419 * calls to the real kernel.
420 */
421void
422cpu_vmspace_alloc(struct vmspace *vm)
423{
424 int r;
425 void *rp;
426
427#define LAST_EXTENT (VM_MAX_USER_ADDRESS - 0x80000000)
428
4e7c41c5 429 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0)
135d7199
MD
430 panic("vmspace_create() failed");
431
4e7c41c5 432 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
135d7199
MD
433 PROT_READ|PROT_WRITE,
434 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
435 MemImageFd, 0);
436 if (rp == MAP_FAILED)
437 panic("vmspace_mmap: failed1");
571989b5
MD
438 vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
439 MADV_NOSYNC, 0);
4e7c41c5 440 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
135d7199
MD
441 PROT_READ|PROT_WRITE,
442 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
443 MemImageFd, 0x40000000);
444 if (rp == MAP_FAILED)
445 panic("vmspace_mmap: failed2");
571989b5
MD
446 vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
447 MADV_NOSYNC, 0);
4e7c41c5 448 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
135d7199
MD
449 PROT_READ|PROT_WRITE,
450 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
451 MemImageFd, 0x80000000);
571989b5
MD
452 vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
453 MADV_NOSYNC, 0);
135d7199
MD
454 if (rp == MAP_FAILED)
455 panic("vmspace_mmap: failed3");
456
4e7c41c5
MD
457 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
458 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
459 if (r < 0)
460 panic("vmspace_mcontrol: failed1");
4e7c41c5
MD
461 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
462 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
463 if (r < 0)
464 panic("vmspace_mcontrol: failed2");
4e7c41c5
MD
465 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
466 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
467 if (r < 0)
468 panic("vmspace_mcontrol: failed3");
469}
470
471void
472cpu_vmspace_free(struct vmspace *vm)
473{
4e7c41c5 474 if (vmspace_destroy(&vm->vm_pmap) < 0)
135d7199
MD
475 panic("vmspace_destroy() failed");
476}
477
478/************************************************************************
e4a473f1
MD
479 * Procedures which operate directly on the kernel PMAP *
480 ************************************************************************/
481
482/*
483 * This maps the requested page table and gives us access to it.
eec2b734
MD
484 *
485 * This routine can be called from a potentially preempting interrupt
486 * thread or from a normal thread.
e4a473f1
MD
487 */
488static vpte_t *
71152ac6 489get_ptbase(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
490{
491 struct mdglobaldata *gd = mdcpu;
492
493 if (pmap == &kernel_pmap) {
71152ac6
MD
494 KKASSERT(va >= KvaStart && va < KvaEnd);
495 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 496 } else if (pmap->pm_pdir == gd->gd_PT1pdir) {
24eb47e0
MD
497 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
498 *gd->gd_PT1pde = pmap->pm_pdirpte;
499 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
da23a592
MD
500 atomic_set_cpumask(&pmap->pm_cpucachemask,
501 gd->mi.gd_cpumask);
24eb47e0 502 }
71152ac6 503 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1 504 } else if (pmap->pm_pdir == gd->gd_PT2pdir) {
24eb47e0
MD
505 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
506 *gd->gd_PT2pde = pmap->pm_pdirpte;
507 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
da23a592
MD
508 atomic_set_cpumask(&pmap->pm_cpucachemask,
509 gd->mi.gd_cpumask);
24eb47e0 510 }
71152ac6 511 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
512 }
513
514 /*
eec2b734
MD
515 * If we aren't running from a potentially preempting interrupt,
516 * load a new page table directory into the page table cache
e4a473f1 517 */
eec2b734
MD
518 if (gd->mi.gd_intr_nesting_level == 0 &&
519 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0) {
520 /*
521 * Choose one or the other and map the page table
522 * in the KVA space reserved for it.
523 */
524 if ((gd->gd_PTflip = 1 - gd->gd_PTflip) == 0) {
525 gd->gd_PT1pdir = pmap->pm_pdir;
526 *gd->gd_PT1pde = pmap->pm_pdirpte;
527 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
da23a592
MD
528 atomic_set_cpumask(&pmap->pm_cpucachemask,
529 gd->mi.gd_cpumask);
eec2b734
MD
530 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
531 } else {
532 gd->gd_PT2pdir = pmap->pm_pdir;
533 *gd->gd_PT2pde = pmap->pm_pdirpte;
534 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
da23a592
MD
535 atomic_set_cpumask(&pmap->pm_cpucachemask,
536 gd->mi.gd_cpumask);
eec2b734
MD
537 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
538 }
539 }
e4a473f1 540
eec2b734
MD
541 /*
542 * If we are running from a preempting interrupt use a private
543 * map. The caller must be in a critical section.
544 */
545 KKASSERT(IN_CRITICAL_SECT(curthread));
546 if (pmap->pm_pdir == gd->gd_PT3pdir) {
547 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
548 *gd->gd_PT3pde = pmap->pm_pdirpte;
549 madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL);
da23a592
MD
550 atomic_set_cpumask(&pmap->pm_cpucachemask,
551 gd->mi.gd_cpumask);
eec2b734 552 }
e4a473f1 553 } else {
eec2b734
MD
554 gd->gd_PT3pdir = pmap->pm_pdir;
555 *gd->gd_PT3pde = pmap->pm_pdirpte;
556 madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL);
da23a592
MD
557 atomic_set_cpumask(&pmap->pm_cpucachemask,
558 gd->mi.gd_cpumask);
e4a473f1 559 }
eec2b734 560 return(gd->gd_PT3map + (va >> PAGE_SHIFT));
e4a473f1
MD
561}
562
563static vpte_t *
71152ac6 564get_ptbase1(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
565{
566 struct mdglobaldata *gd = mdcpu;
567
568 if (pmap == &kernel_pmap) {
71152ac6
MD
569 KKASSERT(va >= KvaStart && va < KvaEnd);
570 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 571 } else if (pmap->pm_pdir == gd->gd_PT1pdir) {
d5b116a0
MD
572 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
573 *gd->gd_PT1pde = pmap->pm_pdirpte;
574 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
da23a592
MD
575 atomic_set_cpumask(&pmap->pm_cpucachemask,
576 gd->mi.gd_cpumask);
d5b116a0 577 }
71152ac6 578 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1
MD
579 }
580 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
581 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
582 gd->gd_PT1pdir = pmap->pm_pdir;
583 *gd->gd_PT1pde = pmap->pm_pdirpte;
584 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
71152ac6 585 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1
MD
586}
587
588static vpte_t *
71152ac6 589get_ptbase2(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
590{
591 struct mdglobaldata *gd = mdcpu;
592
593 if (pmap == &kernel_pmap) {
71152ac6
MD
594 KKASSERT(va >= KvaStart && va < KvaEnd);
595 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 596 } else if (pmap->pm_pdir == gd->gd_PT2pdir) {
d5b116a0
MD
597 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
598 *gd->gd_PT2pde = pmap->pm_pdirpte;
599 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
da23a592
MD
600 atomic_set_cpumask(&pmap->pm_cpucachemask,
601 gd->mi.gd_cpumask);
d5b116a0 602 }
71152ac6 603 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
604 }
605 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
606 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
607 gd->gd_PT2pdir = pmap->pm_pdir;
608 *gd->gd_PT2pde = pmap->pm_pdirpte;
609 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
71152ac6 610 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
611}
612
613/*
614 * Return a pointer to the page table entry for the specified va in the
615 * specified pmap. NULL is returned if there is no valid page table page
616 * for the VA.
617 */
618static __inline vpte_t *
619pmap_pte(struct pmap *pmap, vm_offset_t va)
620{
621 vpte_t *ptep;
622
71152ac6 623 ptep = &pmap->pm_pdir[va >> SEG_SHIFT];
e4a473f1
MD
624 if (*ptep & VPTE_PS)
625 return(ptep);
626 if (*ptep)
71152ac6 627 return (get_ptbase(pmap, va));
e4a473f1
MD
628 return(NULL);
629}
630
631
632/*
633 * Enter a mapping into kernel_pmap. Mappings created in this fashion
d5b116a0
MD
634 * are not managed. Mappings must be immediately accessible on all cpus.
635 *
636 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the
637 * real pmap and handle related races before storing the new vpte.
e4a473f1
MD
638 */
639void
640pmap_kenter(vm_offset_t va, vm_paddr_t pa)
641{
642 vpte_t *ptep;
643 vpte_t npte;
e4a473f1
MD
644
645 KKASSERT(va >= KvaStart && va < KvaEnd);
646 npte = (vpte_t)pa | VPTE_R | VPTE_W | VPTE_V;
71152ac6 647 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
648 if (*ptep & VPTE_V)
649 pmap_inval_pte(ptep, &kernel_pmap, va);
650 *ptep = npte;
e4a473f1
MD
651}
652
d5b116a0
MD
653/*
654 * Synchronize a kvm mapping originally made for the private use on
655 * some other cpu so it can be used on all cpus.
656 *
657 * XXX add MADV_RESYNC to improve performance.
658 */
6f7b98e0
MD
659void
660pmap_kenter_sync(vm_offset_t va)
661{
d5b116a0 662 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
6f7b98e0
MD
663}
664
d5b116a0
MD
665/*
666 * Synchronize a kvm mapping originally made for the private use on
667 * some other cpu so it can be used on our cpu. Turns out to be the
668 * same madvise() call, because we have to sync the real pmaps anyway.
669 *
670 * XXX add MADV_RESYNC to improve performance.
671 */
6f7b98e0
MD
672void
673pmap_kenter_sync_quick(vm_offset_t va)
674{
675 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
676}
677
d5b116a0 678#if 0
6f7b98e0 679/*
d5b116a0
MD
680 * Make a previously read-only kernel mapping R+W (not implemented by
681 * virtual kernels).
9ad680a3
MD
682 */
683void
684pmap_kmodify_rw(vm_offset_t va)
685{
686 *pmap_kpte(va) |= VPTE_R | VPTE_W;
687 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
688}
689
d5b116a0
MD
690/*
691 * Make a kernel mapping non-cacheable (not applicable to virtual kernels)
692 */
9ad680a3
MD
693void
694pmap_kmodify_nc(vm_offset_t va)
695{
9ad680a3
MD
696 *pmap_kpte(va) |= VPTE_N;
697 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
9ad680a3
MD
698}
699
d5b116a0
MD
700#endif
701
9ad680a3 702/*
6f7b98e0
MD
703 * Map a contiguous range of physical memory to a KVM
704 */
705vm_offset_t
8e5e6f1b 706pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot)
6f7b98e0 707{
8e5e6f1b
AH
708 vm_offset_t sva, virt;
709
710 sva = virt = *virtp;
6f7b98e0
MD
711 while (start < end) {
712 pmap_kenter(virt, start);
713 virt += PAGE_SIZE;
714 start += PAGE_SIZE;
715 }
8e5e6f1b
AH
716 *virtp = virt;
717 return (sva);
6f7b98e0
MD
718}
719
720vpte_t *
721pmap_kpte(vm_offset_t va)
722{
723 vpte_t *ptep;
724
725 KKASSERT(va >= KvaStart && va < KvaEnd);
71152ac6 726 ptep = KernelPTA + (va >> PAGE_SHIFT);
6f7b98e0
MD
727 return(ptep);
728}
729
e4a473f1 730/*
d5b116a0
MD
731 * Enter an unmanaged KVA mapping for the private use of the current
732 * cpu only. pmap_kenter_sync() may be called to make the mapping usable
733 * by other cpus.
734 *
735 * It is illegal for the mapping to be accessed by other cpus unleess
736 * pmap_kenter_sync*() is called.
e4a473f1
MD
737 */
738void
739pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
740{
741 vpte_t *ptep;
742 vpte_t npte;
743
744 KKASSERT(va >= KvaStart && va < KvaEnd);
745
746 npte = (vpte_t)pa | VPTE_R | VPTE_W | VPTE_V;
71152ac6 747 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
748 if (*ptep & VPTE_V)
749 pmap_inval_pte_quick(ptep, &kernel_pmap, va);
750 *ptep = npte;
e4a473f1
MD
751}
752
753/*
754 * Make a temporary mapping for a physical address. This is only intended
755 * to be used for panic dumps.
fb8345e6
MD
756 *
757 * The caller is responsible for calling smp_invltlb().
e4a473f1
MD
758 */
759void *
8e5ea5f7 760pmap_kenter_temporary(vm_paddr_t pa, long i)
e4a473f1 761{
fb8345e6 762 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa);
e4a473f1
MD
763 return ((void *)crashdumpmap);
764}
765
766/*
767 * Remove an unmanaged mapping created with pmap_kenter*().
768 */
769void
770pmap_kremove(vm_offset_t va)
771{
772 vpte_t *ptep;
e4a473f1
MD
773
774 KKASSERT(va >= KvaStart && va < KvaEnd);
775
71152ac6 776 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
777 if (*ptep & VPTE_V)
778 pmap_inval_pte(ptep, &kernel_pmap, va);
779 *ptep = 0;
e4a473f1
MD
780}
781
782/*
d5b116a0
MD
783 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize
784 * only with this cpu.
785 *
786 * Unfortunately because we optimize new entries by testing VPTE_V later
787 * on, we actually still have to synchronize with all the cpus. XXX maybe
788 * store a junk value and test against 0 in the other places instead?
e4a473f1
MD
789 */
790void
791pmap_kremove_quick(vm_offset_t va)
792{
793 vpte_t *ptep;
794
795 KKASSERT(va >= KvaStart && va < KvaEnd);
796
71152ac6 797 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
798 if (*ptep & VPTE_V)
799 pmap_inval_pte(ptep, &kernel_pmap, va); /* NOT _quick */
800 *ptep = 0;
e4a473f1
MD
801}
802
803/*
804 * Extract the physical address from the kernel_pmap that is associated
805 * with the specified virtual address.
806 */
807vm_paddr_t
808pmap_kextract(vm_offset_t va)
809{
810 vpte_t *ptep;
811 vm_paddr_t pa;
812
813 KKASSERT(va >= KvaStart && va < KvaEnd);
814
71152ac6 815 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1
MD
816 pa = (vm_paddr_t)(*ptep & VPTE_FRAME) | (va & PAGE_MASK);
817 return(pa);
818}
819
820/*
821 * Map a set of unmanaged VM pages into KVM.
822 */
823void
824pmap_qenter(vm_offset_t va, struct vm_page **m, int count)
825{
826 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
827 while (count) {
828 vpte_t *ptep;
829
71152ac6 830 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1 831 if (*ptep & VPTE_V)
d5b116a0 832 pmap_inval_pte(ptep, &kernel_pmap, va);
e4a473f1
MD
833 *ptep = (vpte_t)(*m)->phys_addr | VPTE_R | VPTE_W | VPTE_V;
834 --count;
835 ++m;
836 va += PAGE_SIZE;
837 }
e4a473f1
MD
838}
839
840/*
e4a473f1
MD
841 * Undo the effects of pmap_qenter*().
842 */
843void
844pmap_qremove(vm_offset_t va, int count)
845{
846 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
847 while (count) {
848 vpte_t *ptep;
849
71152ac6 850 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1 851 if (*ptep & VPTE_V)
d5b116a0 852 pmap_inval_pte(ptep, &kernel_pmap, va);
e4a473f1
MD
853 *ptep = 0;
854 --count;
855 va += PAGE_SIZE;
856 }
e4a473f1
MD
857}
858
859/************************************************************************
860 * Misc support glue called by machine independant code *
861 ************************************************************************
862 *
863 * These routines are called by machine independant code to operate on
864 * certain machine-dependant aspects of processes, threads, and pmaps.
865 */
866
867/*
868 * Initialize MD portions of the thread structure.
869 */
870void
871pmap_init_thread(thread_t td)
872{
873 /* enforce pcb placement */
874 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1;
875 td->td_savefpu = &td->td_pcb->pcb_save;
876 td->td_sp = (char *)td->td_pcb - 16;
877}
878
879/*
13d13d89 880 * This routine directly affects the fork perf for a process.
e4a473f1
MD
881 */
882void
13d13d89 883pmap_init_proc(struct proc *p)
e4a473f1 884{
e4a473f1
MD
885}
886
887/*
e4a473f1
MD
888 * We pre-allocate all page table pages for kernel virtual memory so
889 * this routine will only be called if KVM has been exhausted.
5bce55a9
MD
890 *
891 * No requirements.
e4a473f1
MD
892 */
893void
a8cf2878 894pmap_growkernel(vm_offset_t kstart, vm_offset_t kend)
e4a473f1 895{
a8cf2878
MD
896 vm_offset_t addr;
897
898 addr = (kend + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
00835518 899
5bce55a9 900 lwkt_gettoken(&vm_token);
00835518
MD
901 if (addr > virtual_end - SEG_SIZE)
902 panic("KVM exhausted");
903 kernel_vm_end = addr;
5bce55a9 904 lwkt_reltoken(&vm_token);
e4a473f1
MD
905}
906
907/*
908 * The modification bit is not tracked for any pages in this range. XXX
909 * such pages in this maps should always use pmap_k*() functions and not
910 * be managed anyhow.
d6c96d4d
MD
911 *
912 * XXX User and kernel address spaces are independant for virtual kernels,
913 * this function only applies to the kernel pmap.
e4a473f1
MD
914 */
915static int
d6c96d4d 916pmap_track_modified(pmap_t pmap, vm_offset_t va)
e4a473f1 917{
d6c96d4d
MD
918 if (pmap != &kernel_pmap)
919 return 1;
e4a473f1
MD
920 if ((va < clean_sva) || (va >= clean_eva))
921 return 1;
922 else
923 return 0;
924}
925
926/************************************************************************
927 * Procedures supporting managed page table pages *
928 ************************************************************************
929 *
930 * These procedures are used to track managed page table pages. These pages
931 * use the page table page's vm_page_t to track PTEs in the page. The
932 * page table pages themselves are arranged in a VM object, pmap->pm_pteobj.
933 *
934 * This allows the system to throw away page table pages for user processes
935 * at will and reinstantiate them on demand.
936 */
937
938/*
939 * This routine works like vm_page_lookup() but also blocks as long as the
940 * page is busy. This routine does not busy the page it returns.
941 *
942 * Unless the caller is managing objects whos pages are in a known state,
943 * the call should be made with a critical section held so the page's object
944 * association remains valid on return.
945 */
946static vm_page_t
947pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
948{
949 vm_page_t m;
950
b12defdc
MD
951 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
952 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp");
953
e4a473f1
MD
954 return(m);
955}
956
957/*
958 * This routine unholds page table pages, and if the hold count
959 * drops to zero, then it decrements the wire count.
eec2b734
MD
960 *
961 * We must recheck that this is the last hold reference after busy-sleeping
962 * on the page.
e4a473f1
MD
963 */
964static int
d5b116a0 965_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
e4a473f1 966{
b12defdc 967 vm_page_busy_wait(m, FALSE, "pmuwpt");
eec2b734
MD
968 KASSERT(m->queue == PQ_NONE,
969 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m));
e4a473f1 970
eec2b734 971 if (m->hold_count == 1) {
e4a473f1 972 /*
d5b116a0 973 * Unmap the page table page.
e4a473f1 974 */
eec2b734 975 KKASSERT(pmap->pm_pdir[m->pindex] != 0);
d5b116a0
MD
976 pmap_inval_pde(&pmap->pm_pdir[m->pindex], pmap,
977 (vm_offset_t)m->pindex << SEG_SHIFT);
eec2b734 978 KKASSERT(pmap->pm_stats.resident_count > 0);
e4a473f1
MD
979 --pmap->pm_stats.resident_count;
980
981 if (pmap->pm_ptphint == m)
982 pmap->pm_ptphint = NULL;
983
984 /*
eec2b734
MD
985 * This was our last hold, the page had better be unwired
986 * after we decrement wire_count.
987 *
988 * FUTURE NOTE: shared page directory page could result in
989 * multiple wire counts.
e4a473f1 990 */
eec2b734 991 vm_page_unhold(m);
e4a473f1 992 --m->wire_count;
eec2b734 993 KKASSERT(m->wire_count == 0);
b12defdc 994 atomic_add_int(&vmstats.v_wire_count, -1);
17cde63e 995 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
eec2b734
MD
996 vm_page_flash(m);
997 vm_page_free_zero(m);
e4a473f1
MD
998 return 1;
999 }
17cde63e 1000 KKASSERT(m->hold_count > 1);
eec2b734 1001 vm_page_unhold(m);
b12defdc
MD
1002 vm_page_wakeup(m);
1003
e4a473f1
MD
1004 return 0;
1005}
1006
1007static __inline int
d5b116a0 1008pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
e4a473f1 1009{
eec2b734
MD
1010 KKASSERT(m->hold_count > 0);
1011 if (m->hold_count > 1) {
1012 vm_page_unhold(m);
e4a473f1 1013 return 0;
eec2b734
MD
1014 } else {
1015 return _pmap_unwire_pte_hold(pmap, m);
1016 }
e4a473f1
MD
1017}
1018
1019/*
1020 * After removing a page table entry, this routine is used to
1021 * conditionally free the page, and manage the hold/wire counts.
1022 */
1023static int
d5b116a0 1024pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
e4a473f1
MD
1025{
1026 unsigned ptepindex;
1027
b12defdc
MD
1028 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj));
1029
e4a473f1
MD
1030 if (mpte == NULL) {
1031 /*
1032 * page table pages in the kernel_pmap are not managed.
1033 */
1034 if (pmap == &kernel_pmap)
1035 return(0);
1036 ptepindex = (va >> PDRSHIFT);
1037 if (pmap->pm_ptphint &&
1038 (pmap->pm_ptphint->pindex == ptepindex)) {
1039 mpte = pmap->pm_ptphint;
1040 } else {
b12defdc 1041 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
e4a473f1 1042 pmap->pm_ptphint = mpte;
b12defdc 1043 vm_page_wakeup(mpte);
e4a473f1
MD
1044 }
1045 }
d5b116a0 1046 return pmap_unwire_pte_hold(pmap, mpte);
e4a473f1
MD
1047}
1048
1049/*
eec2b734
MD
1050 * Attempt to release and free the vm_page backing a page directory page
1051 * in a pmap. Returns 1 on success, 0 on failure (if the procedure had
1052 * to sleep).
e4a473f1
MD
1053 */
1054static int
1055pmap_release_free_page(struct pmap *pmap, vm_page_t p)
1056{
1057 vpte_t *pde = pmap->pm_pdir;
eec2b734 1058
e4a473f1
MD
1059 /*
1060 * This code optimizes the case of freeing non-busy
1061 * page-table pages. Those pages are zero now, and
1062 * might as well be placed directly into the zero queue.
1063 */
b12defdc
MD
1064 if (vm_page_busy_try(p, FALSE)) {
1065 vm_page_sleep_busy(p, FALSE, "pmaprl");
e4a473f1 1066 return 0;
b12defdc 1067 }
eec2b734
MD
1068 KKASSERT(pmap->pm_stats.resident_count > 0);
1069 --pmap->pm_stats.resident_count;
e4a473f1
MD
1070
1071 if (p->hold_count) {
1072 panic("pmap_release: freeing held page table page");
1073 }
1074 /*
1075 * Page directory pages need to have the kernel stuff cleared, so
1076 * they can go into the zero queue also.
1077 *
1078 * In virtual kernels there is no 'kernel stuff'. For the moment
1079 * I just make sure the whole thing has been zero'd even though
1080 * it should already be completely zero'd.
d6c96d4d
MD
1081 *
1082 * pmaps for vkernels do not self-map because they do not share
1083 * their address space with the vkernel. Clearing of pde[] thus
1084 * only applies to page table pages and not to the page directory
1085 * page.
e4a473f1
MD
1086 */
1087 if (p->pindex == pmap->pm_pdindex) {
1088 bzero(pde, VPTE_PAGETABLE_SIZE);
1089 pmap_kremove((vm_offset_t)pmap->pm_pdir);
d6c96d4d 1090 } else {
eec2b734 1091 KKASSERT(pde[p->pindex] != 0);
d5b116a0
MD
1092 pmap_inval_pde(&pde[p->pindex], pmap,
1093 (vm_offset_t)p->pindex << SEG_SHIFT);
e4a473f1
MD
1094 }
1095
1096 /*
1097 * Clear the matching hint
1098 */
1099 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1100 pmap->pm_ptphint = NULL;
1101
1102 /*
1103 * And throw the page away. The page is completely zero'd out so
1104 * optimize the free call.
1105 */
1106 p->wire_count--;
b12defdc 1107 atomic_add_int(&vmstats.v_wire_count, -1);
e4a473f1
MD
1108 vm_page_free_zero(p);
1109 return 1;
1110}
1111
1112/*
1113 * This routine is called if the page table page is not mapped in the page
1114 * table directory.
1115 *
1116 * The routine is broken up into two parts for readability.
eec2b734
MD
1117 *
1118 * It must return a held mpte and map the page directory page as required.
1119 * Because vm_page_grab() can block, we must re-check pm_pdir[ptepindex]
e4a473f1
MD
1120 */
1121static vm_page_t
1122_pmap_allocpte(pmap_t pmap, unsigned ptepindex)
1123{
1124 vm_paddr_t ptepa;
1125 vm_page_t m;
1126
1127 /*
eec2b734
MD
1128 * Find or fabricate a new pagetable page. A busied page will be
1129 * returned. This call may block.
e4a473f1
MD
1130 */
1131 m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1132 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
b12defdc
MD
1133 vm_page_flag_set(m, PG_MAPPED);
1134
e4a473f1
MD
1135 KASSERT(m->queue == PQ_NONE,
1136 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1137
e4a473f1 1138 /*
eec2b734
MD
1139 * Increment the hold count for the page we will be returning to
1140 * the caller.
e4a473f1
MD
1141 */
1142 m->hold_count++;
1143
1144 /*
eec2b734
MD
1145 * It is possible that someone else got in and mapped by the page
1146 * directory page while we were blocked, if so just unbusy and
1147 * return the held page.
1148 */
1149 if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) {
eec2b734
MD
1150 KKASSERT((ptepa & VPTE_FRAME) == VM_PAGE_TO_PHYS(m));
1151 vm_page_wakeup(m);
1152 return(m);
1153 }
54341a3b 1154 vm_page_wire(m);
eec2b734
MD
1155
1156 /*
e4a473f1
MD
1157 * Map the pagetable page into the process address space, if
1158 * it isn't already there.
1159 */
eec2b734 1160 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1161
1162 ptepa = VM_PAGE_TO_PHYS(m);
1163 pmap->pm_pdir[ptepindex] = (vpte_t)ptepa | VPTE_R | VPTE_W | VPTE_V |
1164 VPTE_A | VPTE_M;
1165
1166 /*
1167 * We are likely about to access this page table page, so set the
1168 * page table hint to reduce overhead.
1169 */
1170 pmap->pm_ptphint = m;
1171
e4a473f1
MD
1172 vm_page_wakeup(m);
1173
1174 return (m);
1175}
1176
1177/*
1178 * Determine the page table page required to access the VA in the pmap
1179 * and allocate it if necessary. Return a held vm_page_t for the page.
1180 *
1181 * Only used with user pmaps.
1182 */
1183static vm_page_t
1184pmap_allocpte(pmap_t pmap, vm_offset_t va)
1185{
1186 unsigned ptepindex;
1187 vm_offset_t ptepa;
1188 vm_page_t m;
1189
b12defdc
MD
1190 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj));
1191
e4a473f1
MD
1192 /*
1193 * Calculate pagetable page index
1194 */
1195 ptepindex = va >> PDRSHIFT;
1196
1197 /*
1198 * Get the page directory entry
1199 */
1200 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1201
1202 /*
1203 * This supports switching from a 4MB page to a
1204 * normal 4K page.
1205 */
1206 if (ptepa & VPTE_PS) {
eec2b734 1207 KKASSERT(pmap->pm_pdir[ptepindex] != 0);
d5b116a0
MD
1208 pmap_inval_pde(&pmap->pm_pdir[ptepindex], pmap,
1209 (vm_offset_t)ptepindex << SEG_SHIFT);
e4a473f1 1210 ptepa = 0;
e4a473f1
MD
1211 }
1212
1213 /*
1214 * If the page table page is mapped, we just increment the
1215 * hold count, and activate it.
1216 */
1217 if (ptepa) {
1218 /*
1219 * In order to get the page table page, try the
1220 * hint first.
1221 */
1222 if (pmap->pm_ptphint &&
1223 (pmap->pm_ptphint->pindex == ptepindex)) {
1224 m = pmap->pm_ptphint;
1225 } else {
b12defdc 1226 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
e4a473f1 1227 pmap->pm_ptphint = m;
b12defdc 1228 vm_page_wakeup(m);
e4a473f1
MD
1229 }
1230 m->hold_count++;
1231 return m;
1232 }
1233 /*
1234 * Here if the pte page isn't mapped, or if it has been deallocated.
1235 */
1236 return _pmap_allocpte(pmap, ptepindex);
1237}
1238
1239/************************************************************************
1240 * Managed pages in pmaps *
1241 ************************************************************************
1242 *
1243 * All pages entered into user pmaps and some pages entered into the kernel
1244 * pmap are managed, meaning that pmap_protect() and other related management
1245 * functions work on these pages.
1246 */
1247
1248/*
1249 * free the pv_entry back to the free list. This function may be
1250 * called from an interrupt.
1251 */
1252static __inline void
1253free_pv_entry(pv_entry_t pv)
1254{
1255 pv_entry_count--;
1256 zfree(&pvzone, pv);
1257}
1258
1259/*
1260 * get a new pv_entry, allocating a block from the system
1261 * when needed. This function may be called from an interrupt.
1262 */
1263static pv_entry_t
1264get_pv_entry(void)
1265{
1266 pv_entry_count++;
1267 if (pv_entry_high_water &&
20479584
MD
1268 (pv_entry_count > pv_entry_high_water) &&
1269 (pmap_pagedaemon_waken == 0)) {
e4a473f1
MD
1270 pmap_pagedaemon_waken = 1;
1271 wakeup (&vm_pages_needed);
1272 }
1273 return zalloc(&pvzone);
1274}
1275
1276/*
1277 * This routine is very drastic, but can save the system
1278 * in a pinch.
5bce55a9
MD
1279 *
1280 * No requirements.
e4a473f1
MD
1281 */
1282void
1283pmap_collect(void)
1284{
1285 int i;
1286 vm_page_t m;
1287 static int warningdone=0;
1288
1289 if (pmap_pagedaemon_waken == 0)
1290 return;
5bce55a9 1291 lwkt_gettoken(&vm_token);
20479584 1292 pmap_pagedaemon_waken = 0;
e4a473f1
MD
1293
1294 if (warningdone < 5) {
1295 kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1296 warningdone++;
1297 }
1298
b12defdc 1299 for (i = 0; i < vm_page_array_size; i++) {
e4a473f1 1300 m = &vm_page_array[i];
b12defdc 1301 if (m->wire_count || m->hold_count)
e4a473f1 1302 continue;
b12defdc
MD
1303 if (vm_page_busy_try(m, TRUE) == 0) {
1304 if (m->wire_count == 0 && m->hold_count == 0) {
1305 pmap_remove_all(m);
1306 }
1307 vm_page_wakeup(m);
1308 }
e4a473f1 1309 }
5bce55a9 1310 lwkt_reltoken(&vm_token);
e4a473f1
MD
1311}
1312
1313/*
1314 * If it is the first entry on the list, it is actually
1315 * in the header and we must copy the following entry up
1316 * to the header. Otherwise we must search the list for
1317 * the entry. In either case we free the now unused entry.
b12defdc
MD
1318 *
1319 * caller must hold vm_token
e4a473f1
MD
1320 */
1321static int
d5b116a0 1322pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
e4a473f1
MD
1323{
1324 pv_entry_t pv;
1325 int rtval;
1326
1327 crit_enter();
1328 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1329 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1330 if (pmap == pv->pv_pmap && va == pv->pv_va)
1331 break;
1332 }
1333 } else {
1334 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1335 if (va == pv->pv_va)
1336 break;
1337 }
1338 }
1339
1340 /*
1341 * Note that pv_ptem is NULL if the page table page itself is not
1342 * managed, even if the page being removed IS managed.
1343 */
1344 rtval = 0;
5926987a
MD
1345
1346 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1347 m->md.pv_list_count--;
b12defdc 1348 atomic_add_int(&m->object->agg_pv_list_count, -1);
5926987a
MD
1349 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1350 if (TAILQ_EMPTY(&m->md.pv_list))
1351 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1352 ++pmap->pm_generation;
b12defdc 1353 vm_object_hold(pmap->pm_pteobj);
5926987a 1354 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
b12defdc 1355 vm_object_drop(pmap->pm_pteobj);
5926987a
MD
1356 free_pv_entry(pv);
1357
e4a473f1
MD
1358 crit_exit();
1359 return rtval;
1360}
1361
1362/*
1363 * Create a pv entry for page at pa for (pmap, va). If the page table page
1364 * holding the VA is managed, mpte will be non-NULL.
1365 */
1366static void
1367pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
1368{
1369 pv_entry_t pv;
1370
1371 crit_enter();
1372 pv = get_pv_entry();
1373 pv->pv_va = va;
1374 pv->pv_pmap = pmap;
1375 pv->pv_ptem = mpte;
1376
1377 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1378 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
5926987a 1379 ++pmap->pm_generation;
e4a473f1 1380 m->md.pv_list_count++;
b12defdc 1381 atomic_add_int(&m->object->agg_pv_list_count, 1);
e4a473f1
MD
1382
1383 crit_exit();
1384}
1385
1386/*
1387 * pmap_remove_pte: do the things to unmap a page in a process
1388 */
1389static int
d5b116a0 1390pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va)
e4a473f1
MD
1391{
1392 vpte_t oldpte;
1393 vm_page_t m;
1394
d5b116a0 1395 oldpte = pmap_inval_loadandclear(ptq, pmap, va);
e7f2d7de
MD
1396 if (oldpte & VPTE_WIRED)
1397 --pmap->pm_stats.wired_count;
1398 KKASSERT(pmap->pm_stats.wired_count >= 0);
d6c96d4d
MD
1399
1400#if 0
e4a473f1
MD
1401 /*
1402 * Machines that don't support invlpg, also don't support
1403 * VPTE_G. XXX VPTE_G is disabled for SMP so don't worry about
1404 * the SMP case.
1405 */
1406 if (oldpte & VPTE_G)
6f7b98e0 1407 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
d6c96d4d 1408#endif
eec2b734
MD
1409 KKASSERT(pmap->pm_stats.resident_count > 0);
1410 --pmap->pm_stats.resident_count;
e7f2d7de 1411 if (oldpte & VPTE_MANAGED) {
e4a473f1
MD
1412 m = PHYS_TO_VM_PAGE(oldpte);
1413 if (oldpte & VPTE_M) {
1414#if defined(PMAP_DIAGNOSTIC)
1415 if (pmap_nw_modified((pt_entry_t) oldpte)) {
1416 kprintf(
1417 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1418 va, oldpte);
1419 }
1420#endif
d6c96d4d 1421 if (pmap_track_modified(pmap, va))
e4a473f1
MD
1422 vm_page_dirty(m);
1423 }
1424 if (oldpte & VPTE_A)
1425 vm_page_flag_set(m, PG_REFERENCED);
d5b116a0 1426 return pmap_remove_entry(pmap, m, va);
e4a473f1 1427 } else {
d5b116a0 1428 return pmap_unuse_pt(pmap, va, NULL);
e4a473f1
MD
1429 }
1430
1431 return 0;
1432}
1433
1434/*
1435 * pmap_remove_page:
1436 *
1437 * Remove a single page from a process address space.
1438 *
1439 * This function may not be called from an interrupt if the pmap is
1440 * not kernel_pmap.
1441 */
1442static void
d5b116a0 1443pmap_remove_page(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
1444{
1445 vpte_t *ptq;
1446
1447 /*
1448 * if there is no pte for this address, just skip it!!! Otherwise
1449 * get a local va for mappings for this pmap and remove the entry.
1450 */
1451 if (*pmap_pde(pmap, va) != 0) {
71152ac6 1452 ptq = get_ptbase(pmap, va);
e4a473f1 1453 if (*ptq) {
d5b116a0 1454 pmap_remove_pte(pmap, ptq, va);
e4a473f1
MD
1455 }
1456 }
1457}
1458
1459/*
5bce55a9 1460 * Remove the given range of addresses from the specified map.
e4a473f1 1461 *
5bce55a9
MD
1462 * It is assumed that the start and end are properly rounded to the
1463 * page size.
e4a473f1 1464 *
5bce55a9
MD
1465 * This function may not be called from an interrupt if the pmap is
1466 * not kernel_pmap.
e4a473f1 1467 *
5bce55a9 1468 * No requirements.
e4a473f1
MD
1469 */
1470void
1471pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1472{
1473 vpte_t *ptbase;
1474 vm_offset_t pdnxt;
1475 vm_offset_t ptpaddr;
71152ac6 1476 vm_pindex_t sindex, eindex;
e4a473f1
MD
1477
1478 if (pmap == NULL)
1479 return;
1480
b12defdc 1481 vm_object_hold(pmap->pm_pteobj);
5bce55a9 1482 lwkt_gettoken(&vm_token);
d6c96d4d 1483 KKASSERT(pmap->pm_stats.resident_count >= 0);
5bce55a9
MD
1484 if (pmap->pm_stats.resident_count == 0) {
1485 lwkt_reltoken(&vm_token);
b12defdc 1486 vm_object_drop(pmap->pm_pteobj);
e4a473f1 1487 return;
5bce55a9 1488 }
e4a473f1 1489
e4a473f1
MD
1490 /*
1491 * special handling of removing one page. a very
1492 * common operation and easy to short circuit some
1493 * code.
1494 */
1495 if (((sva + PAGE_SIZE) == eva) &&
1496 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & VPTE_PS) == 0)) {
d5b116a0 1497 pmap_remove_page(pmap, sva);
5bce55a9 1498 lwkt_reltoken(&vm_token);
b12defdc 1499 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1500 return;
1501 }
1502
1503 /*
1504 * Get a local virtual address for the mappings that are being
1505 * worked with.
71152ac6
MD
1506 *
1507 * XXX this is really messy because the kernel pmap is not relative
1508 * to address 0
e4a473f1 1509 */
e4a473f1
MD
1510 sindex = (sva >> PAGE_SHIFT);
1511 eindex = (eva >> PAGE_SHIFT);
1512
1513 for (; sindex < eindex; sindex = pdnxt) {
1514 vpte_t pdirindex;
1515
1516 /*
1517 * Calculate index for next page table.
1518 */
1519 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1520 if (pmap->pm_stats.resident_count == 0)
1521 break;
1522
1523 pdirindex = sindex / NPDEPG;
1524 if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) {
eec2b734 1525 KKASSERT(pmap->pm_pdir[pdirindex] != 0);
e4a473f1 1526 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
d5b116a0
MD
1527 pmap_inval_pde(&pmap->pm_pdir[pdirindex], pmap,
1528 (vm_offset_t)pdirindex << SEG_SHIFT);
e4a473f1
MD
1529 continue;
1530 }
1531
1532 /*
1533 * Weed out invalid mappings. Note: we assume that the page
1534 * directory table is always allocated, and in kernel virtual.
1535 */
1536 if (ptpaddr == 0)
1537 continue;
1538
1539 /*
1540 * Limit our scan to either the end of the va represented
1541 * by the current page table page, or to the end of the
1542 * range being removed.
1543 */
e7f2d7de 1544 if (pdnxt > eindex)
e4a473f1 1545 pdnxt = eindex;
e4a473f1 1546
8790d7d8
MD
1547 /*
1548 * NOTE: pmap_remove_pte() can block.
1549 */
e4a473f1
MD
1550 for (; sindex != pdnxt; sindex++) {
1551 vm_offset_t va;
8790d7d8
MD
1552
1553 ptbase = get_ptbase(pmap, sindex << PAGE_SHIFT);
1554 if (*ptbase == 0)
e4a473f1
MD
1555 continue;
1556 va = i386_ptob(sindex);
d5b116a0 1557 if (pmap_remove_pte(pmap, ptbase, va))
e4a473f1
MD
1558 break;
1559 }
1560 }
5bce55a9 1561 lwkt_reltoken(&vm_token);
b12defdc 1562 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1563}
1564
1565/*
e4a473f1
MD
1566 * Removes this physical page from all physical maps in which it resides.
1567 * Reflects back modify bits to the pager.
1568 *
1569 * This routine may not be called from an interrupt.
5bce55a9
MD
1570 *
1571 * No requirements.
e4a473f1
MD
1572 */
1573static void
1574pmap_remove_all(vm_page_t m)
1575{
e4a473f1
MD
1576 vpte_t *pte, tpte;
1577 pv_entry_t pv;
1578
1579#if defined(PMAP_DIAGNOSTIC)
1580 /*
1581 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1582 * pages!
1583 */
1584 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1585 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m));
1586 }
1587#endif
1588
5bce55a9 1589 lwkt_gettoken(&vm_token);
e4a473f1 1590 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
eec2b734
MD
1591 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
1592 --pv->pv_pmap->pm_stats.resident_count;
e4a473f1
MD
1593
1594 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
e7f2d7de
MD
1595 KKASSERT(pte != NULL);
1596
d5b116a0 1597 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va);
e7f2d7de
MD
1598 if (tpte & VPTE_WIRED)
1599 --pv->pv_pmap->pm_stats.wired_count;
1600 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0);
e4a473f1
MD
1601
1602 if (tpte & VPTE_A)
1603 vm_page_flag_set(m, PG_REFERENCED);
1604
1605 /*
1606 * Update the vm_page_t clean and reference bits.
1607 */
1608 if (tpte & VPTE_M) {
1609#if defined(PMAP_DIAGNOSTIC)
1610 if (pmap_nw_modified((pt_entry_t) tpte)) {
1611 kprintf(
1612 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1613 pv->pv_va, tpte);
1614 }
1615#endif
d6c96d4d 1616 if (pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
1617 vm_page_dirty(m);
1618 }
e4a473f1 1619 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
8790d7d8
MD
1620 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1621 ++pv->pv_pmap->pm_generation;
e4a473f1 1622 m->md.pv_list_count--;
b12defdc 1623 atomic_add_int(&m->object->agg_pv_list_count, -1);
17cde63e
MD
1624 if (TAILQ_EMPTY(&m->md.pv_list))
1625 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
b12defdc 1626 vm_object_hold(pv->pv_pmap->pm_pteobj);
d5b116a0 1627 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
b12defdc 1628 vm_object_drop(pv->pv_pmap->pm_pteobj);
e4a473f1
MD
1629 free_pv_entry(pv);
1630 }
17cde63e 1631 KKASSERT((m->flags & (PG_MAPPED | PG_WRITEABLE)) == 0);
5bce55a9 1632 lwkt_reltoken(&vm_token);
e4a473f1
MD
1633}
1634
1635/*
5bce55a9
MD
1636 * Set the physical protection on the specified range of this map
1637 * as requested.
e4a473f1 1638 *
5bce55a9
MD
1639 * This function may not be called from an interrupt if the map is
1640 * not the kernel_pmap.
e4a473f1 1641 *
5bce55a9 1642 * No requirements.
e4a473f1
MD
1643 */
1644void
1645pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1646{
1647 vpte_t *ptbase;
d5b116a0 1648 vpte_t *ptep;
e4a473f1
MD
1649 vm_offset_t pdnxt, ptpaddr;
1650 vm_pindex_t sindex, eindex;
71152ac6 1651 vm_pindex_t sbase;
e4a473f1
MD
1652
1653 if (pmap == NULL)
1654 return;
1655
1656 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1657 pmap_remove(pmap, sva, eva);
1658 return;
1659 }
1660
1661 if (prot & VM_PROT_WRITE)
1662 return;
1663
5bce55a9 1664 lwkt_gettoken(&vm_token);
71152ac6 1665 ptbase = get_ptbase(pmap, sva);
e4a473f1
MD
1666
1667 sindex = (sva >> PAGE_SHIFT);
1668 eindex = (eva >> PAGE_SHIFT);
71152ac6 1669 sbase = sindex;
e4a473f1
MD
1670
1671 for (; sindex < eindex; sindex = pdnxt) {
1672
1673 unsigned pdirindex;
1674
1675 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1676
1677 pdirindex = sindex / NPDEPG;
d5b116a0
MD
1678
1679 /*
1680 * Clear the modified and writable bits for a 4m page.
1681 * Throw away the modified bit (?)
1682 */
e4a473f1 1683 if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) {
d5b116a0
MD
1684 pmap_clean_pde(&pmap->pm_pdir[pdirindex], pmap,
1685 (vm_offset_t)pdirindex << SEG_SHIFT);
e4a473f1
MD
1686 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1687 continue;
1688 }
1689
1690 /*
1691 * Weed out invalid mappings. Note: we assume that the page
1692 * directory table is always allocated, and in kernel virtual.
1693 */
1694 if (ptpaddr == 0)
1695 continue;
1696
1697 if (pdnxt > eindex) {
1698 pdnxt = eindex;
1699 }
1700
1701 for (; sindex != pdnxt; sindex++) {
d6c96d4d 1702 vpte_t pbits;
e4a473f1
MD
1703 vm_page_t m;
1704
d5b116a0
MD
1705 /*
1706 * Clean managed pages and also check the accessed
1707 * bit. Just remove write perms for unmanaged
1708 * pages. Be careful of races, turning off write
1709 * access will force a fault rather then setting
1710 * the modified bit at an unexpected time.
1711 */
1712 ptep = &ptbase[sindex - sbase];
1713 if (*ptep & VPTE_MANAGED) {
1714 pbits = pmap_clean_pte(ptep, pmap,
1715 i386_ptob(sindex));
e4a473f1
MD
1716 m = NULL;
1717 if (pbits & VPTE_A) {
1718 m = PHYS_TO_VM_PAGE(pbits);
1719 vm_page_flag_set(m, PG_REFERENCED);
8608b858 1720 atomic_clear_long(ptep, VPTE_A);
e4a473f1
MD
1721 }
1722 if (pbits & VPTE_M) {
d6c96d4d 1723 if (pmap_track_modified(pmap, i386_ptob(sindex))) {
e4a473f1
MD
1724 if (m == NULL)
1725 m = PHYS_TO_VM_PAGE(pbits);
1726 vm_page_dirty(m);
e4a473f1
MD
1727 }
1728 }
d5b116a0
MD
1729 } else {
1730 pbits = pmap_setro_pte(ptep, pmap,
1731 i386_ptob(sindex));
e4a473f1
MD
1732 }
1733 }
1734 }
5bce55a9 1735 lwkt_reltoken(&vm_token);
e4a473f1
MD
1736}
1737
1738/*
1739 * Enter a managed page into a pmap. If the page is not wired related pmap
1740 * data can be destroyed at any time for later demand-operation.
1741 *
1742 * Insert the vm_page (m) at virtual address (v) in (pmap), with the
1743 * specified protection, and wire the mapping if requested.
1744 *
1745 * NOTE: This routine may not lazy-evaluate or lose information. The
1746 * page must actually be inserted into the given map NOW.
1747 *
1748 * NOTE: When entering a page at a KVA address, the pmap must be the
1749 * kernel_pmap.
5bce55a9
MD
1750 *
1751 * No requirements.
e4a473f1
MD
1752 */
1753void
1754pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1755 boolean_t wired)
1756{
1757 vm_paddr_t pa;
1758 vpte_t *pte;
1759 vm_paddr_t opa;
8608b858 1760 vpte_t origpte, newpte;
e4a473f1 1761 vm_page_t mpte;
e4a473f1
MD
1762
1763 if (pmap == NULL)
1764 return;
1765
1766 va &= VPTE_FRAME;
1767
b12defdc 1768 vm_object_hold(pmap->pm_pteobj);
5bce55a9
MD
1769 lwkt_gettoken(&vm_token);
1770
e4a473f1
MD
1771 /*
1772 * Get the page table page. The kernel_pmap's page table pages
1773 * are preallocated and have no associated vm_page_t.
1774 */
1775 if (pmap == &kernel_pmap)
1776 mpte = NULL;
1777 else
1778 mpte = pmap_allocpte(pmap, va);
1779
e4a473f1
MD
1780 pte = pmap_pte(pmap, va);
1781
1782 /*
1783 * Page Directory table entry not valid, we need a new PT page
1784 * and pmap_allocpte() didn't give us one. Oops!
1785 */
1786 if (pte == NULL) {
1787 panic("pmap_enter: invalid page directory pmap=%p, va=0x%p\n",
1788 pmap, (void *)va);
1789 }
1790
d5b116a0
MD
1791 /*
1792 * Deal with races on the original mapping (though don't worry
1793 * about VPTE_A races) by cleaning it. This will force a fault
1794 * if an attempt is made to write to the page.
1795 */
e4a473f1 1796 pa = VM_PAGE_TO_PHYS(m) & VPTE_FRAME;
d5b116a0 1797 origpte = pmap_clean_pte(pte, pmap, va);
e4a473f1
MD
1798 opa = origpte & VPTE_FRAME;
1799
1800 if (origpte & VPTE_PS)
1801 panic("pmap_enter: attempted pmap_enter on 4MB page");
1802
1803 /*
1804 * Mapping has not changed, must be protection or wiring change.
1805 */
1806 if (origpte && (opa == pa)) {
1807 /*
1808 * Wiring change, just update stats. We don't worry about
1809 * wiring PT pages as they remain resident as long as there
1810 * are valid mappings in them. Hence, if a user page is wired,
1811 * the PT page will be also.
1812 */
e7f2d7de
MD
1813 if (wired && ((origpte & VPTE_WIRED) == 0))
1814 ++pmap->pm_stats.wired_count;
1815 else if (!wired && (origpte & VPTE_WIRED))
1816 --pmap->pm_stats.wired_count;
1817 KKASSERT(pmap->pm_stats.wired_count >= 0);
e4a473f1 1818
e4a473f1
MD
1819 /*
1820 * Remove the extra pte reference. Note that we cannot
1821 * optimize the RO->RW case because we have adjusted the
1822 * wiring count above and may need to adjust the wiring
1823 * bits below.
1824 */
1825 if (mpte)
1826 mpte->hold_count--;
1827
1828 /*
1829 * We might be turning off write access to the page,
1830 * so we go ahead and sense modify status.
1831 */
e7f2d7de 1832 if (origpte & VPTE_MANAGED) {
d5b116a0
MD
1833 if ((origpte & VPTE_M) &&
1834 pmap_track_modified(pmap, va)) {
e4a473f1
MD
1835 vm_page_t om;
1836 om = PHYS_TO_VM_PAGE(opa);
1837 vm_page_dirty(om);
1838 }
e7f2d7de 1839 pa |= VPTE_MANAGED;
17cde63e 1840 KKASSERT(m->flags & PG_MAPPED);
e4a473f1
MD
1841 }
1842 goto validate;
1843 }
1844 /*
1845 * Mapping has changed, invalidate old range and fall through to
1846 * handle validating new mapping.
1847 */
5926987a 1848 while (opa) {
e4a473f1 1849 int err;
d5b116a0 1850 err = pmap_remove_pte(pmap, pte, va);
e4a473f1 1851 if (err)
d557216f 1852 panic("pmap_enter: pte vanished, va: %p", (void *)va);
5926987a
MD
1853 pte = pmap_pte(pmap, va);
1854 origpte = pmap_clean_pte(pte, pmap, va);
1855 opa = origpte & VPTE_FRAME;
1856 if (opa) {
1857 kprintf("pmap_enter: Warning, raced pmap %p va %p\n",
1858 pmap, (void *)va);
1859 }
e4a473f1
MD
1860 }
1861
1862 /*
1863 * Enter on the PV list if part of our managed memory. Note that we
1864 * raise IPL while manipulating pv_table since pmap_enter can be
1865 * called at interrupt time.
1866 */
1867 if (pmap_initialized &&
1868 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1869 pmap_insert_entry(pmap, va, mpte, m);
e7f2d7de 1870 pa |= VPTE_MANAGED;
17cde63e 1871 vm_page_flag_set(m, PG_MAPPED);
e4a473f1
MD
1872 }
1873
1874 /*
1875 * Increment counters
1876 */
eec2b734 1877 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1878 if (wired)
1879 pmap->pm_stats.wired_count++;
1880
1881validate:
1882 /*
1883 * Now validate mapping with desired protection/wiring.
1884 */
1885 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | VPTE_V);
1886
1887 if (wired)
e7f2d7de 1888 newpte |= VPTE_WIRED;
17cde63e
MD
1889 if (pmap != &kernel_pmap)
1890 newpte |= VPTE_U;
e4a473f1
MD
1891
1892 /*
d5b116a0
MD
1893 * If the mapping or permission bits are different from the
1894 * (now cleaned) original pte, an update is needed. We've
1895 * already downgraded or invalidated the page so all we have
1896 * to do now is update the bits.
1897 *
1898 * XXX should we synchronize RO->RW changes to avoid another
1899 * fault?
e4a473f1 1900 */
d5b116a0 1901 if ((origpte & ~(VPTE_W|VPTE_M|VPTE_A)) != newpte) {
e4a473f1 1902 *pte = newpte | VPTE_A;
17cde63e
MD
1903 if (newpte & VPTE_W)
1904 vm_page_flag_set(m, PG_WRITEABLE);
e4a473f1 1905 }
17cde63e 1906 KKASSERT((newpte & VPTE_MANAGED) == 0 || m->flags & PG_MAPPED);
5bce55a9 1907 lwkt_reltoken(&vm_token);
b12defdc 1908 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1909}
1910
1911/*
17cde63e 1912 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
e4a473f1 1913 *
17cde63e 1914 * Currently this routine may only be used on user pmaps, not kernel_pmap.
e4a473f1 1915 */
1b9d3514 1916void
17cde63e 1917pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
e4a473f1
MD
1918{
1919 vpte_t *pte;
1920 vm_paddr_t pa;
17cde63e 1921 vm_page_t mpte;
135d7199
MD
1922 unsigned ptepindex;
1923 vm_offset_t ptepa;
e4a473f1
MD
1924
1925 KKASSERT(pmap != &kernel_pmap);
e4a473f1
MD
1926
1927 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS);
1928
1929 /*
17cde63e
MD
1930 * Calculate pagetable page (mpte), allocating it if necessary.
1931 *
1932 * A held page table page (mpte), or NULL, is passed onto the
1933 * section following.
e4a473f1
MD
1934 */
1935 ptepindex = va >> PDRSHIFT;
17cde63e 1936
b12defdc 1937 vm_object_hold(pmap->pm_pteobj);
5bce55a9
MD
1938 lwkt_gettoken(&vm_token);
1939
17cde63e 1940 do {
e4a473f1
MD
1941 /*
1942 * Get the page directory entry
1943 */
1944 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1945
1946 /*
1947 * If the page table page is mapped, we just increment
1948 * the hold count, and activate it.
1949 */
1950 if (ptepa) {
1951 if (ptepa & VPTE_PS)
1952 panic("pmap_enter_quick: unexpected mapping into 4MB page");
1953 if (pmap->pm_ptphint &&
1954 (pmap->pm_ptphint->pindex == ptepindex)) {
1955 mpte = pmap->pm_ptphint;
1956 } else {
1957 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1958 pmap->pm_ptphint = mpte;
b12defdc 1959 vm_page_wakeup(mpte);
e4a473f1 1960 }
17cde63e
MD
1961 if (mpte)
1962 mpte->hold_count++;
e4a473f1
MD
1963 } else {
1964 mpte = _pmap_allocpte(pmap, ptepindex);
1965 }
17cde63e 1966 } while (mpte == NULL);
e4a473f1
MD
1967
1968 /*
1969 * Ok, now that the page table page has been validated, get the pte.
1970 * If the pte is already mapped undo mpte's hold_count and
1971 * just return.
1972 */
1973 pte = pmap_pte(pmap, va);
1974 if (*pte) {
17cde63e 1975 pmap_unwire_pte_hold(pmap, mpte);
5bce55a9 1976 lwkt_reltoken(&vm_token);
b12defdc 1977 vm_object_drop(pmap->pm_pteobj);
17cde63e 1978 return;
e4a473f1
MD
1979 }
1980
1981 /*
1982 * Enter on the PV list if part of our managed memory. Note that we
1983 * raise IPL while manipulating pv_table since pmap_enter can be
1984 * called at interrupt time.
1985 */
17cde63e 1986 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
e4a473f1 1987 pmap_insert_entry(pmap, va, mpte, m);
17cde63e
MD
1988 vm_page_flag_set(m, PG_MAPPED);
1989 }
e4a473f1
MD
1990
1991 /*
1992 * Increment counters
1993 */
eec2b734 1994 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1995
1996 pa = VM_PAGE_TO_PHYS(m);
1997
1998 /*
1999 * Now validate mapping with RO protection
2000 */
2001 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
d5b116a0 2002 *pte = (vpte_t)pa | VPTE_V | VPTE_U;
e4a473f1 2003 else
d5b116a0 2004 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED;
17cde63e
MD
2005 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */
2006 /*pmap_inval_flush(&info); don't need for vkernel */
5bce55a9 2007 lwkt_reltoken(&vm_token);
b12defdc 2008 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
2009}
2010
e7f2d7de
MD
2011/*
2012 * Extract the physical address for the translation at the specified
2013 * virtual address in the pmap.
5bce55a9
MD
2014 *
2015 * The caller must hold vm_token if non-blocking operation is desired.
2016 * No requirements.
e7f2d7de 2017 */
6f7b98e0
MD
2018vm_paddr_t
2019pmap_extract(pmap_t pmap, vm_offset_t va)
2020{
2021 vm_paddr_t rtval;
2022 vpte_t pte;
2023
5bce55a9 2024 lwkt_gettoken(&vm_token);
6f7b98e0
MD
2025 if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) {
2026 if (pte & VPTE_PS) {
2027 rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1);
2028 rtval |= va & SEG_MASK;
2029 } else {
71152ac6 2030 pte = *get_ptbase(pmap, va);
6f7b98e0
MD
2031 rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK);
2032 }
5bce55a9
MD
2033 } else {
2034 rtval = 0;
6f7b98e0 2035 }
5bce55a9
MD
2036 lwkt_reltoken(&vm_token);
2037 return(rtval);
6f7b98e0
MD
2038}
2039
e4a473f1
MD
2040#define MAX_INIT_PT (96)
2041
2042/*
2043 * This routine preloads the ptes for a given object into the specified pmap.
2044 * This eliminates the blast of soft faults on process startup and
2045 * immediately after an mmap.
5bce55a9
MD
2046 *
2047 * No requirements.
e4a473f1
MD
2048 */
2049static int pmap_object_init_pt_callback(vm_page_t p, void *data);
2050
2051void
2052pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
2053 vm_object_t object, vm_pindex_t pindex,
2054 vm_size_t size, int limit)
2055{
2056 struct rb_vm_page_scan_info info;
287ebb09 2057 struct lwp *lp;
e4a473f1
MD
2058 int psize;
2059
2060 /*
2061 * We can't preinit if read access isn't set or there is no pmap
2062 * or object.
2063 */
2064 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
2065 return;
2066
2067 /*
2068 * We can't preinit if the pmap is not the current pmap
2069 */
287ebb09
MD
2070 lp = curthread->td_lwp;
2071 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace))
e4a473f1
MD
2072 return;
2073
2074 psize = size >> PAGE_SHIFT;
2075
2076 if ((object->type != OBJT_VNODE) ||
2077 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
2078 (object->resident_page_count > MAX_INIT_PT))) {
2079 return;
2080 }
2081
2082 if (psize + pindex > object->size) {
2083 if (object->size < pindex)
2084 return;
2085 psize = object->size - pindex;
2086 }
2087
2088 if (psize == 0)
2089 return;
2090
2091 /*
2092 * Use a red-black scan to traverse the requested range and load
2093 * any valid pages found into the pmap.
2094 *
2095 * We cannot safely scan the object's memq unless we are in a
2096 * critical section since interrupts can remove pages from objects.
2097 */
2098 info.start_pindex = pindex;
2099 info.end_pindex = pindex + psize - 1;
2100 info.limit = limit;
2101 info.mpte = NULL;
2102 info.addr = addr;
2103 info.pmap = pmap;
2104
b12defdc 2105 vm_object_hold(object);
e4a473f1
MD
2106 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
2107 pmap_object_init_pt_callback, &info);
b12defdc 2108 vm_object_drop(object);
e4a473f1
MD
2109}
2110
5bce55a9
MD
2111/*
2112 * The caller must hold vm_token.
2113 */
e4a473f1
MD
2114static
2115int
2116pmap_object_init_pt_callback(vm_page_t p, void *data)
2117{
2118 struct rb_vm_page_scan_info *info = data;
2119 vm_pindex_t rel_index;
b12defdc 2120
e4a473f1
MD
2121 /*
2122 * don't allow an madvise to blow away our really
2123 * free pages allocating pv entries.
2124 */
2125 if ((info->limit & MAP_PREFAULT_MADVISE) &&
2126 vmstats.v_free_count < vmstats.v_free_reserved) {
2127 return(-1);
2128 }
0d987a03
MD
2129
2130 /*
2131 * Ignore list markers and ignore pages we cannot instantly
2132 * busy (while holding the object token).
2133 */
2134 if (p->flags & PG_MARKER)
2135 return 0;
b12defdc
MD
2136 if (vm_page_busy_try(p, TRUE))
2137 return 0;
e4a473f1 2138 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
b12defdc 2139 (p->flags & PG_FICTITIOUS) == 0) {
e4a473f1
MD
2140 if ((p->queue - p->pc) == PQ_CACHE)
2141 vm_page_deactivate(p);
e4a473f1 2142 rel_index = p->pindex - info->start_pindex;
17cde63e
MD
2143 pmap_enter_quick(info->pmap,
2144 info->addr + i386_ptob(rel_index), p);
e4a473f1 2145 }
b12defdc 2146 vm_page_wakeup(p);
e4a473f1
MD
2147 return(0);
2148}
2149
2150/*
1b9d3514
MD
2151 * Return TRUE if the pmap is in shape to trivially
2152 * pre-fault the specified address.
2153 *
2154 * Returns FALSE if it would be non-trivial or if a
2155 * pte is already loaded into the slot.
5bce55a9
MD
2156 *
2157 * No requirements.
e4a473f1 2158 */
1b9d3514
MD
2159int
2160pmap_prefault_ok(pmap_t pmap, vm_offset_t addr)
e4a473f1 2161{
1b9d3514 2162 vpte_t *pte;
5bce55a9 2163 int ret;
e4a473f1 2164
5bce55a9
MD
2165 lwkt_gettoken(&vm_token);
2166 if ((*pmap_pde(pmap, addr)) == 0) {
2167 ret = 0;
2168 } else {
2169 pte = get_ptbase(pmap, addr);
2170 ret = (*pte) ? 0 : 1;
2171 }
2172 lwkt_reltoken(&vm_token);
2173 return (ret);
e4a473f1
MD
2174}
2175
2176/*
5bce55a9
MD
2177 * Change the wiring attribute for a map/virtual-address pair.
2178 * The mapping must already exist in the pmap.
2179 *
2180 * No other requirements.
e4a473f1
MD
2181 */
2182void
2183pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2184{
2185 vpte_t *pte;
2186
2187 if (pmap == NULL)
2188 return;
2189
5bce55a9 2190 lwkt_gettoken(&vm_token);
71152ac6 2191 pte = get_ptbase(pmap, va);
e4a473f1 2192
e7f2d7de
MD
2193 if (wired && (*pte & VPTE_WIRED) == 0)
2194 ++pmap->pm_stats.wired_count;
2195 else if (!wired && (*pte & VPTE_WIRED))
2196 --pmap->pm_stats.wired_count;
2197 KKASSERT(pmap->pm_stats.wired_count >= 0);
e4a473f1
MD
2198
2199 /*
2200 * Wiring is not a hardware characteristic so there is no need to
2201 * invalidate TLB. However, in an SMP environment we must use
2202 * a locked bus cycle to update the pte (if we are not using
2203 * the pmap_inval_*() API that is)... it's ok to do this for simple
2204 * wiring changes.
2205 */
e4a473f1 2206 if (wired)
8608b858 2207 atomic_set_long(pte, VPTE_WIRED);
e4a473f1 2208 else
8608b858 2209 atomic_clear_long(pte, VPTE_WIRED);
5bce55a9 2210 lwkt_reltoken(&vm_token);
e4a473f1
MD
2211}
2212
2213/*
2214 * Copy the range specified by src_addr/len
2215 * from the source map to the range dst_addr/len
2216 * in the destination map.
2217 *
2218 * This routine is only advisory and need not do anything.
2219 */
2220void
2221pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2222 vm_size_t len, vm_offset_t src_addr)
2223{
e4a473f1
MD
2224 vm_offset_t addr;
2225 vm_offset_t end_addr = src_addr + len;
2226 vm_offset_t pdnxt;
2227 vpte_t *src_frame;
2228 vpte_t *dst_frame;
2229 vm_page_t m;
2230
17cde63e
MD
2231 /*
2232 * XXX BUGGY. Amoung other things srcmpte is assumed to remain
2233 * valid through blocking calls, and that's just not going to
2234 * be the case.
2235 *
2236 * FIXME!
2237 */
2238 return;
2239
e4a473f1
MD
2240 if (dst_addr != src_addr)
2241 return;
2242 if (dst_pmap->pm_pdir == NULL)
2243 return;
2244 if (src_pmap->pm_pdir == NULL)
2245 return;
2246
b12defdc 2247 lwkt_gettoken(&vm_token);
eec2b734 2248
71152ac6
MD
2249 src_frame = get_ptbase1(src_pmap, src_addr);
2250 dst_frame = get_ptbase2(dst_pmap, src_addr);
e4a473f1 2251
e4a473f1
MD
2252 /*
2253 * critical section protection is required to maintain the page/object
2254 * association, interrupts can free pages and remove them from
2255 * their objects.
2256 */
e4a473f1
MD
2257 for (addr = src_addr; addr < end_addr; addr = pdnxt) {
2258 vpte_t *src_pte, *dst_pte;
2259 vm_page_t dstmpte, srcmpte;
2260 vm_offset_t srcptepaddr;
2261 unsigned ptepindex;
2262
2263 if (addr >= VM_MAX_USER_ADDRESS)
2264 panic("pmap_copy: invalid to pmap_copy page tables\n");
2265
2266 /*
2267 * Don't let optional prefaulting of pages make us go
2268 * way below the low water mark of free pages or way
2269 * above high water mark of used pv entries.
2270 */
2271 if (vmstats.v_free_count < vmstats.v_free_reserved ||
2272 pv_entry_count > pv_entry_high_water)
2273 break;
2274
2275 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2276 ptepindex = addr >> PDRSHIFT;
2277
2278 srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2279 if (srcptepaddr == 0)
2280 continue;
2281
2282 if (srcptepaddr & VPTE_PS) {
2283 if (dst_pmap->pm_pdir[ptepindex] == 0) {
8608b858 2284 dst_pmap->pm_pdir[ptepindex] = (vpte_t)srcptepaddr;
e4a473f1
MD
2285 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
2286 }
2287 continue;
2288 }
2289
2290 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
17cde63e
MD
2291 if ((srcmpte == NULL) || (srcmpte->hold_count == 0) ||
2292 (srcmpte->flags & PG_BUSY)) {
e4a473f1 2293 continue;
17cde63e 2294 }
e4a473f1
MD
2295
2296 if (pdnxt > end_addr)
2297 pdnxt = end_addr;
2298
71152ac6
MD
2299 src_pte = src_frame + ((addr - src_addr) >> PAGE_SHIFT);
2300 dst_pte = dst_frame + ((addr - src_addr) >> PAGE_SHIFT);
e4a473f1
MD
2301 while (addr < pdnxt) {
2302 vpte_t ptetemp;
17cde63e 2303
e4a473f1
MD
2304 ptetemp = *src_pte;
2305 /*
2306 * we only virtual copy managed pages
2307 */
e7f2d7de 2308 if ((ptetemp & VPTE_MANAGED) != 0) {
e4a473f1
MD
2309 /*
2310 * We have to check after allocpte for the
2311 * pte still being around... allocpte can
2312 * block.
eec2b734
MD
2313 *
2314 * pmap_allocpte can block, unfortunately
2315 * we have to reload the tables.
e4a473f1
MD
2316 */
2317 dstmpte = pmap_allocpte(dst_pmap, addr);
eec2b734
MD
2318 src_frame = get_ptbase1(src_pmap, src_addr);
2319 dst_frame = get_ptbase2(dst_pmap, src_addr);
2320
17cde63e
MD
2321 if ((*dst_pte == 0) && (ptetemp = *src_pte) &&
2322 (ptetemp & VPTE_MANAGED) != 0) {
e4a473f1 2323 /*
70fc5283
MD
2324 * Clear the modified and accessed
2325 * (referenced) bits during the copy.
d6c96d4d 2326 *
70fc5283
MD
2327 * We do not have to clear the write
2328 * bit to force a fault-on-modify
2329 * because the real kernel's target
2330 * pmap is empty and will fault anyway.
e4a473f1
MD
2331 */
2332 m = PHYS_TO_VM_PAGE(ptetemp);
70fc5283 2333 *dst_pte = ptetemp & ~(VPTE_M | VPTE_A);
eec2b734 2334 ++dst_pmap->pm_stats.resident_count;
e4a473f1
MD
2335 pmap_insert_entry(dst_pmap, addr,
2336 dstmpte, m);
17cde63e 2337 KKASSERT(m->flags & PG_MAPPED);
e4a473f1 2338 } else {
d5b116a0 2339 pmap_unwire_pte_hold(dst_pmap, dstmpte);
e4a473f1
MD
2340 }
2341 if (dstmpte->hold_count >= srcmpte->hold_count)
2342 break;
2343 }
2344 addr += PAGE_SIZE;
2345 src_pte++;
2346 dst_pte++;
2347 }
2348 }
b12defdc 2349 lwkt_reltoken(&vm_token);
e4a473f1
MD
2350}
2351
2352/*
2353 * pmap_zero_page:
2354 *
2355 * Zero the specified PA by mapping the page into KVM and clearing its
2356 * contents.
2357 *
2358 * This function may be called from an interrupt and no locking is
2359 * required.
2360 */
2361void
2362pmap_zero_page(vm_paddr_t phys)
2363{
2364 struct mdglobaldata *gd = mdcpu;
2365
2366 crit_enter();
71152ac6 2367 if (*gd->gd_CMAP3)
e4a473f1 2368 panic("pmap_zero_page: CMAP3 busy");
a3c35df6 2369 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2370 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2371
2372 bzero(gd->gd_CADDR3, PAGE_SIZE);
71152ac6 2373 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2374 crit_exit();
2375}
2376
2377/*
2378 * pmap_page_assertzero:
2379 *
2380 * Assert that a page is empty, panic if it isn't.
2381 */
2382void
2383pmap_page_assertzero(vm_paddr_t phys)
2384{
2385 struct mdglobaldata *gd = mdcpu;
2386 int i;
2387
2388 crit_enter();
71152ac6 2389 if (*gd->gd_CMAP3)
e4a473f1 2390 panic("pmap_zero_page: CMAP3 busy");
71152ac6
MD
2391 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W |
2392 (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2393 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2394 for (i = 0; i < PAGE_SIZE; i += 4) {
2395 if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
2396 panic("pmap_page_assertzero() @ %p not zero!\n",
2397 (void *)gd->gd_CADDR3);
2398 }
2399 }
71152ac6 2400 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2401 crit_exit();
2402}
2403
2404/*
2405 * pmap_zero_page:
2406 *
2407 * Zero part of a physical page by mapping it into memory and clearing
2408 * its contents with bzero.
2409 *
2410 * off and size may not cover an area beyond a single hardware page.
2411 */
2412void
2413pmap_zero_page_area(vm_paddr_t phys, int off, int size)
2414{
2415 struct mdglobaldata *gd = mdcpu;
2416
2417 crit_enter();
71152ac6 2418 if (*gd->gd_CMAP3)
e4a473f1 2419 panic("pmap_zero_page: CMAP3 busy");
71152ac6
MD
2420 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W |
2421 (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2422 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2423
2424 bzero((char *)gd->gd_CADDR3 + off, size);
71152ac6 2425 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2426 crit_exit();
2427}
2428
2429/*
2430 * pmap_copy_page:
2431 *
2432 * Copy the physical page from the source PA to the target PA.
2433 * This function may be called from an interrupt. No locking
2434 * is required.
2435 */
2436void
2437pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
2438{
2439 struct mdglobaldata *gd = mdcpu;
2440
2441 crit_enter();
2442 if (*(int *) gd->gd_CMAP1)
2443 panic("pmap_copy_page: CMAP1 busy");
2444 if (*(int *) gd->gd_CMAP2)
2445 panic("pmap_copy_page: CMAP2 busy");
2446
4e7c41c5 2447 *(int *) gd->gd_CMAP1 = VPTE_V | VPTE_R | (src & PG_FRAME) | VPTE_A;
e4a473f1
MD
2448 *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
2449
6f7b98e0
MD
2450 madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
2451 madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2452
2453 bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
2454
2455 *(int *) gd->gd_CMAP1 = 0;
2456 *(int *) gd->gd_CMAP2 = 0;
2457 crit_exit();
2458}
2459
2460/*
2461 * pmap_copy_page_frag:
2462 *
2463 * Copy the physical page from the source PA to the target PA.
2464 * This function may be called from an interrupt. No locking
2465 * is required.
2466 */
2467void
2468pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
2469{
2470 struct mdglobaldata *gd = mdcpu;
2471
2472 crit_enter();
2473 if (*(int *) gd->gd_CMAP1)
2474 panic("pmap_copy_page: CMAP1 busy");
2475 if (*(int *) gd->gd_CMAP2)
2476 panic("pmap_copy_page: CMAP2 busy");
2477
2478 *(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A;
2479 *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
2480
6f7b98e0
MD
2481 madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
2482 madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2483
2484 bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
2485 (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
2486 bytes);
2487
2488 *(int *) gd->gd_CMAP1 = 0;
2489 *(int *) gd->gd_CMAP2 = 0;
2490 crit_exit();
2491}
2492
2493/*
2494 * Returns true if the pmap's pv is one of the first
2495 * 16 pvs linked to from this page. This count may
2496 * be changed upwards or downwards in the future; it
2497 * is only necessary that true be returned for a small
2498 * subset of pmaps for proper page aging.
5bce55a9
MD
2499 *
2500 * No requirements.
e4a473f1
MD
2501 */
2502boolean_t
2503pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2504{
2505 pv_entry_t pv;
2506 int loops = 0;
2507
2508 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2509 return FALSE;
2510
2511 crit_enter();
5bce55a9 2512 lwkt_gettoken(&vm_token);
e4a473f1
MD
2513
2514 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2515 if (pv->pv_pmap == pmap) {
5bce55a9 2516 lwkt_reltoken(&vm_token);
e4a473f1
MD
2517 crit_exit();
2518 return TRUE;
2519 }
2520 loops++;
2521 if (loops >= 16)
2522 break;
2523 }
5bce55a9 2524 lwkt_reltoken(&vm_token);
e4a473f1
MD
2525 crit_exit();
2526 return (FALSE);
2527}
2528
2529/*
2530 * Remove all pages from specified address space
2531 * this aids process exit speeds. Also, this code
2532 * is special cased for current process only, but
2533 * can have the more generic (and slightly slower)
2534 * mode enabled. This is much faster than pmap_remove
2535 * in the case of running down an entire address space.
5bce55a9
MD
2536 *
2537 * No requirements.
e4a473f1
MD
2538 */
2539void
2540pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2541{
2542 vpte_t *pte, tpte;
2543 pv_entry_t pv, npv;
2544 vm_page_t m;
8790d7d8 2545 int32_t save_generation;
e4a473f1 2546
b12defdc
MD
2547 if (pmap->pm_pteobj)
2548 vm_object_hold(pmap->pm_pteobj);
5bce55a9 2549 lwkt_gettoken(&vm_token);
e4a473f1
MD
2550 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2551 if (pv->pv_va >= eva || pv->pv_va < sva) {
2552 npv = TAILQ_NEXT(pv, pv_plist);
2553 continue;
2554 }
2555
8790d7d8
MD
2556 KKASSERT(pmap == pv->pv_pmap);
2557
2558 pte = pmap_pte(pmap, pv->pv_va);
e4a473f1
MD
2559
2560 /*
2561 * We cannot remove wired pages from a process' mapping
2562 * at this time
2563 */
d5b116a0 2564 if (*pte & VPTE_WIRED) {
e4a473f1
MD
2565 npv = TAILQ_NEXT(pv, pv_plist);
2566 continue;
2567 }
d5b116a0 2568 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va);
e4a473f1
MD
2569
2570 m = PHYS_TO_VM_PAGE(tpte);
2571
2572 KASSERT(m < &vm_page_array[vm_page_array_size],
8608b858 2573 ("pmap_remove_pages: bad tpte %lx", tpte));
e4a473f1 2574
eec2b734
MD
2575 KKASSERT(pmap->pm_stats.resident_count > 0);
2576 --pmap->pm_stats.resident_count;
e4a473f1
MD
2577
2578 /*
2579 * Update the vm_page_t clean and reference bits.
2580 */
2581 if (tpte & VPTE_M) {
2582 vm_page_dirty(m);
2583 }
2584
e4a473f1 2585 npv = TAILQ_NEXT(pv, pv_plist);
8790d7d8
MD
2586 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2587 save_generation = ++pmap->pm_generation;
e4a473f1
MD
2588
2589 m->md.pv_list_count--;
b12defdc 2590 atomic_add_int(&m->object->agg_pv_list_count, -1);
e4a473f1 2591 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
17cde63e 2592 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
e4a473f1 2593 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
e4a473f1 2594
d5b116a0 2595 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
e4a473f1 2596 free_pv_entry(pv);
8790d7d8
MD
2597
2598 /*
2599 * Restart the scan if we blocked during the unuse or free
2600 * calls and other removals were made.
2601 */
2602 if (save_generation != pmap->pm_generation) {
2603 kprintf("Warning: pmap_remove_pages race-A avoided\n");
cd2a0876 2604 npv = TAILQ_FIRST(&pmap->pm_pvlist);
8790d7d8 2605 }
e4a473f1 2606 }
5bce55a9 2607 lwkt_reltoken(&vm_token);
b12defdc
MD
2608 if (pmap->pm_pteobj)
2609 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
2610}
2611
2612/*
d5b116a0 2613 * pmap_testbit tests bits in active mappings of a VM page.
5bce55a9
MD
2614 *
2615 * The caller must hold vm_token
e4a473f1
MD
2616 */
2617static boolean_t
2618pmap_testbit(vm_page_t m, int bit)
2619{
2620 pv_entry_t pv;
2621 vpte_t *pte;
2622
2623 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2624 return FALSE;
2625
2626 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2627 return FALSE;
2628
2629 crit_enter();
2630
2631 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2632 /*
2633 * if the bit being tested is the modified bit, then
2634 * mark clean_map and ptes as never
2635 * modified.
2636 */
2637 if (bit & (VPTE_A|VPTE_M)) {
d6c96d4d 2638 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2639 continue;
2640 }
2641
2642#if defined(PMAP_DIAGNOSTIC)
2643 if (!pv->pv_pmap) {
2644 kprintf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2645 continue;
2646 }
2647#endif
2648 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2649 if (*pte & bit) {
2650 crit_exit();
2651 return TRUE;
2652 }
2653 }
2654 crit_exit();
2655 return (FALSE);
2656}
2657
2658/*
70fc5283
MD
2659 * This routine is used to clear bits in ptes. Certain bits require special
2660 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit.
d5b116a0
MD
2661 *
2662 * This routine is only called with certain VPTE_* bit combinations.
5bce55a9
MD
2663 *
2664 * The caller must hold vm_token
e4a473f1
MD
2665 */
2666static __inline void
d6c96d4d 2667pmap_clearbit(vm_page_t m, int bit)
e4a473f1 2668{
e4a473f1
MD
2669 pv_entry_t pv;
2670 vpte_t *pte;
d6c96d4d 2671 vpte_t pbits;
e4a473f1
MD
2672
2673 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2674 return;
2675
e4a473f1
MD
2676 crit_enter();
2677
2678 /*
2679 * Loop over all current mappings setting/clearing as appropos If
2680 * setting RO do we need to clear the VAC?
2681 */
2682 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2683 /*
2684 * don't write protect pager mappings
2685 */
d6c96d4d
MD
2686 if (bit == VPTE_W) {
2687 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2688 continue;
2689 }
2690
2691#if defined(PMAP_DIAGNOSTIC)
2692 if (!pv->pv_pmap) {
2693 kprintf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2694 continue;
2695 }
2696#endif
2697
2698 /*
2699 * Careful here. We can use a locked bus instruction to
2700 * clear VPTE_A or VPTE_M safely but we need to synchronize
2701 * with the target cpus when we mess with VPTE_W.
d6c96d4d 2702 *
70fc5283
MD
2703 * On virtual kernels we must force a new fault-on-write
2704 * in the real kernel if we clear the Modify bit ourselves,
2705 * otherwise the real kernel will not get a new fault and
2706 * will never set our Modify bit again.
e4a473f1
MD
2707 */
2708 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
d5b116a0 2709 if (*pte & bit) {
d6c96d4d 2710 if (bit == VPTE_W) {
d5b116a0
MD
2711 /*
2712 * We must also clear VPTE_M when clearing
2713 * VPTE_W
2714 */
2715 pbits = pmap_clean_pte(pte, pv->pv_pmap,
2716 pv->pv_va);
2717 if (pbits & VPTE_M)
d6c96d4d 2718 vm_page_dirty(m);
d6c96d4d
MD
2719 } else if (bit == VPTE_M) {
2720 /*
70fc5283
MD
2721 * We do not have to make the page read-only
2722 * when clearing the Modify bit. The real
2723 * kernel will make the real PTE read-only
2724 * or otherwise detect the write and set
2725 * our VPTE_M again simply by us invalidating
2726 * the real kernel VA for the pmap (as we did
2727 * above). This allows the real kernel to
2728 * handle the write fault without forwarding
2729 * the fault to us.
d6c96d4d 2730 */
8608b858 2731 atomic_clear_long(pte, VPTE_M);
d5b116a0
MD
2732 } else if ((bit & (VPTE_W|VPTE_M)) == (VPTE_W|VPTE_M)) {
2733 /*
2734 * We've been asked to clear W & M, I guess
2735 * the caller doesn't want us to update
2736 * the dirty status of the VM page.
2737 */
2738 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va);
d6c96d4d 2739 } else {
d5b116a0
MD
2740 /*
2741 * We've been asked to clear bits that do
2742 * not interact with hardware.
2743 */
8608b858 2744 atomic_clear_long(pte, bit);
e4a473f1
MD
2745 }
2746 }
2747 }
e4a473f1
MD
2748 crit_exit();
2749}
2750
2751/*
5bce55a9 2752 * Lower the permission for all mappings to a given page.
e4a473f1 2753 *
5bce55a9 2754 * No requirements.
e4a473f1
MD
2755 */
2756void
2757pmap_page_protect(vm_page_t m, vm_prot_t prot)
2758{
2759 if ((prot & VM_PROT_WRITE) == 0) {
5bce55a9 2760 lwkt_gettoken(&vm_token);
e4a473f1 2761 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
d6c96d4d 2762 pmap_clearbit(m, VPTE_W);
17cde63e 2763 vm_page_flag_clear(m, PG_WRITEABLE);
e4a473f1
MD
2764 } else {
2765 pmap_remove_all(m);
2766 }
5bce55a9 2767 lwkt_reltoken(&vm_token);
e4a473f1
MD
2768 }
2769}
2770
2771vm_paddr_t
cfd17028 2772pmap_phys_address(vm_pindex_t ppn)
e4a473f1
MD
2773{
2774 return (i386_ptob(ppn));
2775}
2776
2777/*
5bce55a9
MD
2778 * Return a count of reference bits for a page, clearing those bits.
2779 * It is not necessary for every reference bit to be cleared, but it
2780 * is necessary that 0 only be returned when there are truly no
2781 * reference bits set.
e4a473f1 2782 *
5bce55a9
MD
2783 * XXX: The exact number of bits to check and clear is a matter that
2784 * should be tested and standardized at some point in the future for
2785 * optimal aging of shared pages.
e4a473f1 2786 *
5bce55a9 2787 * No requirements.
e4a473f1
MD
2788 */
2789int
2790pmap_ts_referenced(vm_page_t m)
2791{
2792 pv_entry_t pv, pvf, pvn;
2793 vpte_t *pte;
2794 int rtval = 0;
2795
2796 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2797 return (rtval);
2798
2799 crit_enter();
5bce55a9 2800 lwkt_gettoken(&vm_token);
e4a473f1
MD
2801
2802 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2803
2804 pvf = pv;
2805
2806 do {
2807 pvn = TAILQ_NEXT(pv, pv_list);
2808
2809 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2810
2811 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2812
d6c96d4d 2813 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2814 continue;
2815
2816 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2817
2818 if (pte && (*pte & VPTE_A)) {
2819#ifdef SMP
8608b858 2820 atomic_clear_long(pte, VPTE_A);
e4a473f1 2821#else
8608b858 2822 atomic_clear_long_nonlocked(pte, VPTE_A);
e4a473f1
MD
2823#endif
2824 rtval++;
2825 if (rtval > 4) {
2826 break;
2827 }
2828 }
2829 } while ((pv = pvn) != NULL && pv != pvf);
2830 }
5bce55a9 2831 lwkt_reltoken(&vm_token);
e4a473f1
MD
2832 crit_exit();
2833
2834 return (rtval);
2835}
2836
2837/*
5bce55a9
MD
2838 * Return whether or not the specified physical page was modified
2839 * in any physical maps.
e4a473f1 2840 *
5bce55a9 2841 * No requirements.
e4a473f1
MD
2842 */
2843boolean_t
2844pmap_is_modified(vm_page_t m)
2845{
5bce55a9
MD
2846 boolean_t res;
2847
2848 lwkt_gettoken(&vm_token);
2849 res = pmap_testbit(m, VPTE_M);
2850 lwkt_reltoken(&vm_token);
2851 return (res);
e4a473f1
MD
2852}
2853
2854/*
5bce55a9
MD
2855 * Clear the modify bits on the specified physical page.
2856 *
2857 * No requirements.
e4a473f1
MD
2858 */
2859void
2860pmap_clear_modify(vm_page_t m)
2861{
5bce55a9 2862 lwkt_gettoken(&vm_token);
d6c96d4d 2863 pmap_clearbit(m, VPTE_M);
5bce55a9 2864 lwkt_reltoken(&vm_token);
e4a473f1
MD
2865}
2866
2867/*
5bce55a9 2868 * Clear the reference bit on the specified physical page.
e4a473f1 2869 *
5bce55a9 2870 * No requirements.
e4a473f1
MD
2871 */
2872void
2873pmap_clear_reference(vm_page_t m)
2874{
5bce55a9 2875 lwkt_gettoken(&vm_token);
d6c96d4d 2876 pmap_clearbit(m, VPTE_A);
5bce55a9 2877 lwkt_reltoken(&vm_token);
e4a473f1
MD
2878}
2879
2880/*
2881 * Miscellaneous support routines follow
2882 */
2883
2884static void
2885i386_protection_init(void)
2886{
2887 int *kp, prot;
2888
2889 kp = protection_codes;
2890 for (prot = 0; prot < 8; prot++) {
2891 if (prot & VM_PROT_READ)
2892 *kp |= VPTE_R;
2893 if (prot & VM_PROT_WRITE)
2894 *kp |= VPTE_W;
2895 if (prot & VM_PROT_EXECUTE)
2896 *kp |= VPTE_X;
2897 ++kp;
2898 }
2899}
2900
d5b116a0
MD
2901#if 0
2902
e4a473f1
MD
2903/*
2904 * Map a set of physical memory pages into the kernel virtual
2905 * address space. Return a pointer to where it is mapped. This
2906 * routine is intended to be used for mapping device memory,
2907 * NOT real memory.
2908 *
2909 * NOTE: we can't use pgeflag unless we invalidate the pages one at
2910 * a time.
2911 */
2912void *
2913pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2914{
2915 vm_offset_t va, tmpva, offset;
2916 vpte_t *pte;
2917
2918 offset = pa & PAGE_MASK;
2919 size = roundup(offset + size, PAGE_SIZE);
2920
9388fcaa 2921 va = kmem_alloc_nofault(&kernel_map, size, PAGE_SIZE);
e4a473f1
MD
2922 if (!va)
2923 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2924
2925 pa = pa & VPTE_FRAME;
2926 for (tmpva = va; size > 0;) {
2927 pte = KernelPTA + (tmpva >> PAGE_SHIFT);
2928 *pte = pa | VPTE_R | VPTE_W | VPTE_V; /* | pgeflag; */
2929 size -= PAGE_SIZE;
2930 tmpva += PAGE_SIZE;
2931 pa += PAGE_SIZE;
2932 }
2933 cpu_invltlb();
2934 smp_invltlb();
2935
2936 return ((void *)(va + offset));
2937}
2938
2939void
2940pmap_unmapdev(vm_offset_t va, vm_size_t size)
2941{
2942 vm_offset_t base, offset;
2943
2944 base = va & VPTE_FRAME;
2945 offset = va & PAGE_MASK;
2946 size = roundup(offset + size, PAGE_SIZE);
2947 pmap_qremove(va, size >> PAGE_SHIFT);
2948 kmem_free(&kernel_map, base, size);
2949}
2950
d5b116a0
MD
2951#endif
2952
e4a473f1 2953/*
5bce55a9
MD
2954 * Perform the pmap work for mincore
2955 *
2956 * No requirements.
e4a473f1
MD
2957 */
2958int
2959pmap_mincore(pmap_t pmap, vm_offset_t addr)
2960{
2961 vpte_t *ptep, pte;
2962 vm_page_t m;
2963 int val = 0;
5bce55a9
MD
2964
2965 lwkt_gettoken(&vm_token);
e4a473f1
MD
2966
2967 ptep = pmap_pte(pmap, addr);
2968 if (ptep == 0) {
5bce55a9 2969 lwkt_reltoken(&vm_token);
e4a473f1
MD
2970 return 0;
2971 }
2972
2973 if ((pte = *ptep) != 0) {
8608b858 2974 vm_paddr_t pa;
e4a473f1
MD
2975
2976 val = MINCORE_INCORE;
2977 if ((pte & VPTE_MANAGED) == 0)
5bce55a9 2978 goto done;
e4a473f1
MD
2979
2980 pa = pte & VPTE_FRAME;
2981
2982 m = PHYS_TO_VM_PAGE(pa);
2983
2984 /*
2985 * Modified by us
2986 */
2987 if (pte & VPTE_M)
2988 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2989 /*
2990 * Modified by someone
2991 */
2992 else if (m->dirty || pmap_is_modified(m))
2993 val |= MINCORE_MODIFIED_OTHER;
2994 /*
2995 * Referenced by us
2996 */
2997 if (pte & VPTE_A)
2998 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2999
3000 /*
3001 * Referenced by someone
3002 */
3003 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
3004 val |= MINCORE_REFERENCED_OTHER;
3005 vm_page_flag_set(m, PG_REFERENCED);
3006 }
3007 }
5bce55a9
MD
3008done:
3009 lwkt_reltoken(&vm_token);
e4a473f1
MD
3010 return val;
3011}
3012
b12defdc
MD
3013/*
3014 * Caller must hold vmspace->vm_map.token for oldvm and newvm
3015 */
e4a473f1 3016void
e3161323 3017pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
e4a473f1 3018{
e3161323 3019 struct vmspace *oldvm;
287ebb09 3020 struct lwp *lp;
e4a473f1 3021
e3161323 3022 oldvm = p->p_vmspace;
287ebb09 3023 crit_enter();
e3161323 3024 if (oldvm != newvm) {
e3161323 3025 p->p_vmspace = newvm;
287ebb09 3026 KKASSERT(p->p_nthreads == 1);
3e291793 3027 lp = RB_ROOT(&p->p_lwp_tree);
287ebb09
MD
3028 pmap_setlwpvm(lp, newvm);
3029 if (adjrefs) {
3030 sysref_get(&newvm->vm_sysref);
3031 sysref_put(&oldvm->vm_sysref);
3032 }
3033 }
3034 crit_exit();
3035}
3036
3037void
3038pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
3039{
3040 struct vmspace *oldvm;
3041 struct pmap *pmap;
3042
3043 crit_enter();
3044 oldvm = lp->lwp_vmspace;
3045
3046 if (oldvm != newvm) {
3047 lp->lwp_vmspace = newvm;
3048 if (curthread->td_lwp == lp) {
e3161323 3049 pmap = vmspace_pmap(newvm);
e4a473f1 3050#if defined(SMP)
da23a592 3051 atomic_set_cpumask(&pmap->pm_active, mycpu->gd_cpumask);
e4a473f1 3052#else
e3161323 3053 pmap->pm_active |= 1;
e4a473f1
MD
3054#endif
3055#if defined(SWTCH_OPTIM_STATS)
e3161323 3056 tlb_flush_count++;
e4a473f1 3057#endif
e3161323 3058 pmap = vmspace_pmap(oldvm);
e4a473f1 3059#if defined(SMP)
da23a592 3060 atomic_clear_cpumask(&pmap->pm_active, mycpu->gd_cpumask);
e4a473f1 3061#else
da23a592 3062 pmap->pm_active &= ~(cpumask_t)1;
e4a473f1 3063#endif
e3161323 3064 }
e3161323
MD
3065 }
3066 crit_exit();
e4a473f1
MD
3067}
3068
287ebb09 3069
e4a473f1
MD
3070vm_offset_t
3071pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
3072{
3073
3074 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3075 return addr;
3076 }
3077
3078 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3079 return addr;
3080}
3081
722871d3
MD
3082/*
3083 * Used by kmalloc/kfree, page already exists at va
3084 */
3085vm_page_t
3086pmap_kvtom(vm_offset_t va)
3087{
3088 vpte_t *ptep;
3089
3090 KKASSERT(va >= KvaStart && va < KvaEnd);
3091 ptep = KernelPTA + (va >> PAGE_SHIFT);
3092 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME));
3093}