kernel: Make SMP support default (and non-optional).
[dragonfly.git] / sys / platform / vkernel / platform / pmap.c
CommitLineData
e4a473f1 1/*
5bce55a9
MD
2 * (MPSAFE)
3 *
e4a473f1
MD
4 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
5 * Copyright (c) 1991 Regents of the University of California.
6 * All rights reserved.
7 * Copyright (c) 1994 John S. Dyson
8 * All rights reserved.
9 * Copyright (c) 1994 David Greenman
10 * All rights reserved.
11 * Copyright (c) 2004-2006 Matthew Dillon
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * 3. Neither the name of The DragonFly Project nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific, prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
36 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
37 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
38 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
42 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
d6c96d4d
MD
43 */
44/*
45 * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting
46 * the PTE in the page table, because a cpu synchronization might be required.
47 * The actual invalidation is delayed until the following call or flush. In
48 * the VKERNEL build this function is called prior to adjusting the PTE and
49 * invalidates the table synchronously (not delayed), and is not SMP safe
50 * as a consequence.
e4a473f1
MD
51 */
52
53#include <sys/types.h>
54#include <sys/systm.h>
55#include <sys/kernel.h>
56#include <sys/stat.h>
57#include <sys/mman.h>
58#include <sys/vkernel.h>
59#include <sys/proc.h>
60#include <sys/thread.h>
61#include <sys/user.h>
135d7199 62#include <sys/vmspace.h>
e4a473f1
MD
63
64#include <vm/pmap.h>
65#include <vm/vm_page.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_kern.h>
68#include <vm/vm_object.h>
69#include <vm/vm_zone.h>
70#include <vm/vm_pageout.h>
71
72#include <machine/md_var.h>
73#include <machine/pcb.h>
74#include <machine/pmap_inval.h>
75#include <machine/globaldata.h>
76
e3161323 77#include <sys/sysref2.h>
b12defdc 78#include <sys/spinlock2.h>
e3161323 79
e4a473f1
MD
80#include <assert.h>
81
82struct pmap kernel_pmap;
83
84static struct vm_zone pvzone;
85static struct vm_object pvzone_obj;
86static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list);
87static int pv_entry_count;
88static int pv_entry_max;
89static int pv_entry_high_water;
90static int pmap_pagedaemon_waken;
91static boolean_t pmap_initialized = FALSE;
92static int protection_codes[8];
93
94static void i386_protection_init(void);
95static void pmap_remove_all(vm_page_t m);
96static int pmap_release_free_page(struct pmap *pmap, vm_page_t p);
97
98#define MINPV 2048
99#ifndef PMAP_SHPGPERPROC
100#define PMAP_SHPGPERPROC 200
101#endif
102
103#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
104
105#define pte_prot(m, p) \
106 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)])
107
108void
109pmap_init(void)
110{
111 int i;
112 struct pv_entry *pvinit;
113
114 for (i = 0; i < vm_page_array_size; i++) {
115 vm_page_t m;
116
117 m = &vm_page_array[i];
118 TAILQ_INIT(&m->md.pv_list);
119 m->md.pv_list_count = 0;
120 }
121
122 i = vm_page_array_size;
123 if (i < MINPV)
124 i = MINPV;
125 pvinit = (struct pv_entry *)kmem_alloc(&kernel_map, i*sizeof(*pvinit));
126 zbootinit(&pvzone, "PV ENTRY", sizeof(*pvinit), pvinit, i);
127 pmap_initialized = TRUE;
128}
129
130void
131pmap_init2(void)
132{
133 int shpgperproc = PMAP_SHPGPERPROC;
134
135 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
136 pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
137 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
138 pv_entry_high_water = 9 * (pv_entry_max / 10);
139 zinitna(&pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
140}
141
142/*
143 * Bootstrap the kernel_pmap so it can be used with pmap_enter().
144 *
71152ac6
MD
145 * NOTE! pm_pdir for the kernel pmap is offset so VA's translate
146 * directly into PTD indexes (PTA is also offset for the same reason).
147 * This is necessary because, for now, KVA is not mapped at address 0.
148 *
e4a473f1
MD
149 * Page table pages are not managed like they are in normal pmaps, so
150 * no pteobj is needed.
151 */
152void
153pmap_bootstrap(void)
154{
71152ac6 155 vm_pindex_t i = (vm_offset_t)KernelPTD >> PAGE_SHIFT;
e4a473f1 156
b12defdc
MD
157 /*
158 * The kernel_pmap's pm_pteobj is used only for locking and not
159 * for mmu pages.
160 */
71152ac6 161 kernel_pmap.pm_pdir = KernelPTD - (KvaStart >> SEG_SHIFT);
e4a473f1
MD
162 kernel_pmap.pm_pdirpte = KernelPTA[i];
163 kernel_pmap.pm_count = 1;
c2fb025d 164 kernel_pmap.pm_active = (cpumask_t)-1 & ~CPUMASK_LOCK;
b12defdc 165 kernel_pmap.pm_pteobj = &kernel_object;
e4a473f1 166 TAILQ_INIT(&kernel_pmap.pm_pvlist);
b12defdc
MD
167 TAILQ_INIT(&kernel_pmap.pm_pvlist_free);
168 spin_init(&kernel_pmap.pm_spin);
169 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok");
e4a473f1
MD
170 i386_protection_init();
171}
172
173/*
174 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we
175 * just dummy it up so it works well enough for fork().
176 *
177 * In DragonFly, process pmaps may only be used to manipulate user address
178 * space, never kernel address space.
179 */
180void
181pmap_pinit0(struct pmap *pmap)
182{
183 pmap_pinit(pmap);
184}
185
186/************************************************************************
187 * Procedures to manage whole physical maps *
188 ************************************************************************
189 *
190 * Initialize a preallocated and zeroed pmap structure,
191 * such as one in a vmspace structure.
192 */
193void
194pmap_pinit(struct pmap *pmap)
195{
196 vm_page_t ptdpg;
197 int npages;
198
199 /*
200 * No need to allocate page table space yet but we do need a valid
201 * page directory table.
202 */
203 if (pmap->pm_pdir == NULL) {
204 pmap->pm_pdir =
8608b858 205 (vpte_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE);
e4a473f1
MD
206 }
207
208 /*
209 * allocate object for the pte array and page directory
210 */
211 npages = VPTE_PAGETABLE_SIZE +
212 (VM_MAX_USER_ADDRESS / PAGE_SIZE) * sizeof(vpte_t);
213 npages = (npages + PAGE_MASK) / PAGE_SIZE;
214
215 if (pmap->pm_pteobj == NULL)
216 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, npages);
217 pmap->pm_pdindex = npages - 1;
218
219 /*
220 * allocate the page directory page
221 */
222 ptdpg = vm_page_grab(pmap->pm_pteobj, pmap->pm_pdindex,
d2d8515b 223 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_ZERO);
54341a3b 224 vm_page_wire(ptdpg);
e4a473f1
MD
225
226 /* not usually mapped */
b12defdc
MD
227 vm_page_flag_clear(ptdpg, PG_MAPPED);
228 vm_page_wakeup(ptdpg);
e4a473f1
MD
229
230 pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
135d7199 231 pmap->pm_pdirpte = KernelPTA[(vm_offset_t)pmap->pm_pdir >> PAGE_SHIFT];
e4a473f1
MD
232
233 pmap->pm_count = 1;
234 pmap->pm_active = 0;
235 pmap->pm_ptphint = NULL;
24eb47e0 236 pmap->pm_cpucachemask = 0;
e4a473f1 237 TAILQ_INIT(&pmap->pm_pvlist);
b12defdc
MD
238 TAILQ_INIT(&pmap->pm_pvlist_free);
239 spin_init(&pmap->pm_spin);
240 lwkt_token_init(&pmap->pm_token, "pmap_tok");
e4a473f1 241 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
eec2b734 242 pmap->pm_stats.resident_count = 1;
e4a473f1
MD
243}
244
e3161323
MD
245/*
246 * Clean up a pmap structure so it can be physically freed
5bce55a9
MD
247 *
248 * No requirements.
e3161323
MD
249 */
250void
251pmap_puninit(pmap_t pmap)
252{
253 if (pmap->pm_pdir) {
254 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE);
255 pmap->pm_pdir = NULL;
256 }
257 if (pmap->pm_pteobj) {
258 vm_object_deallocate(pmap->pm_pteobj);
259 pmap->pm_pteobj = NULL;
260 }
261}
262
263
e4a473f1
MD
264/*
265 * Wire in kernel global address entries. To avoid a race condition
266 * between pmap initialization and pmap_growkernel, this procedure
267 * adds the pmap to the master list (which growkernel scans to update),
268 * then copies the template.
269 *
270 * In a virtual kernel there are no kernel global address entries.
5bce55a9
MD
271 *
272 * No requirements.
e4a473f1
MD
273 */
274void
275pmap_pinit2(struct pmap *pmap)
276{
b12defdc 277 spin_lock(&pmap_spin);
e4a473f1 278 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode);
b12defdc 279 spin_unlock(&pmap_spin);
e4a473f1
MD
280}
281
282/*
283 * Release all resources held by the given physical map.
284 *
285 * Should only be called if the map contains no valid mappings.
5bce55a9 286 *
b12defdc 287 * Caller must hold pmap->pm_token
e4a473f1
MD
288 */
289static int pmap_release_callback(struct vm_page *p, void *data);
290
291void
292pmap_release(struct pmap *pmap)
293{
aaf8b91f 294 struct mdglobaldata *gd = mdcpu;
e4a473f1
MD
295 vm_object_t object = pmap->pm_pteobj;
296 struct rb_vm_page_scan_info info;
297
298 KKASSERT(pmap != &kernel_pmap);
299
300#if defined(DIAGNOSTIC)
301 if (object->ref_count != 1)
302 panic("pmap_release: pteobj reference count != 1");
aaf8b91f
MD
303#endif
304 /*
305 * Once we destroy the page table, the mapping becomes invalid.
24eb47e0
MD
306 * Don't waste time doing a madvise to invalidate the mapping, just
307 * set cpucachemask to 0.
aaf8b91f
MD
308 */
309 if (pmap->pm_pdir == gd->gd_PT1pdir) {
310 gd->gd_PT1pdir = NULL;
311 *gd->gd_PT1pde = 0;
312 /* madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); */
313 }
314 if (pmap->pm_pdir == gd->gd_PT2pdir) {
315 gd->gd_PT2pdir = NULL;
316 *gd->gd_PT2pde = 0;
317 /* madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); */
318 }
eec2b734
MD
319 if (pmap->pm_pdir == gd->gd_PT3pdir) {
320 gd->gd_PT3pdir = NULL;
321 *gd->gd_PT3pde = 0;
322 /* madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL); */
323 }
e4a473f1
MD
324
325 info.pmap = pmap;
326 info.object = object;
b12defdc
MD
327
328 spin_lock(&pmap_spin);
e4a473f1 329 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode);
b12defdc 330 spin_unlock(&pmap_spin);
e4a473f1 331
b12defdc 332 vm_object_hold(object);
e4a473f1 333 do {
e4a473f1
MD
334 info.error = 0;
335 info.mpte = NULL;
336 info.limit = object->generation;
337
338 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
339 pmap_release_callback, &info);
340 if (info.error == 0 && info.mpte) {
341 if (!pmap_release_free_page(pmap, info.mpte))
342 info.error = 1;
343 }
e4a473f1 344 } while (info.error);
b12defdc 345 vm_object_drop(object);
00835518
MD
346
347 /*
348 * Leave the KVA reservation for pm_pdir cached for later reuse.
349 */
e7f2d7de 350 pmap->pm_pdirpte = 0;
24eb47e0 351 pmap->pm_cpucachemask = 0;
e4a473f1
MD
352}
353
eec2b734
MD
354/*
355 * Callback to release a page table page backing a directory
356 * entry.
357 */
e4a473f1
MD
358static int
359pmap_release_callback(struct vm_page *p, void *data)
360{
361 struct rb_vm_page_scan_info *info = data;
362
363 if (p->pindex == info->pmap->pm_pdindex) {
364 info->mpte = p;
365 return(0);
366 }
367 if (!pmap_release_free_page(info->pmap, p)) {
368 info->error = 1;
369 return(-1);
370 }
371 if (info->object->generation != info->limit) {
372 info->error = 1;
373 return(-1);
374 }
375 return(0);
376}
377
e4a473f1
MD
378/*
379 * Add a reference to the specified pmap.
5bce55a9
MD
380 *
381 * No requirements.
e4a473f1
MD
382 */
383void
384pmap_reference(pmap_t pmap)
385{
5bce55a9
MD
386 if (pmap) {
387 lwkt_gettoken(&vm_token);
388 ++pmap->pm_count;
389 lwkt_reltoken(&vm_token);
e4a473f1
MD
390 }
391}
392
135d7199
MD
393/************************************************************************
394 * VMSPACE MANAGEMENT *
395 ************************************************************************
396 *
397 * The VMSPACE management we do in our virtual kernel must be reflected
398 * in the real kernel. This is accomplished by making vmspace system
399 * calls to the real kernel.
400 */
401void
402cpu_vmspace_alloc(struct vmspace *vm)
403{
404 int r;
405 void *rp;
406
407#define LAST_EXTENT (VM_MAX_USER_ADDRESS - 0x80000000)
408
4e7c41c5 409 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0)
135d7199
MD
410 panic("vmspace_create() failed");
411
4e7c41c5 412 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
135d7199
MD
413 PROT_READ|PROT_WRITE,
414 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
415 MemImageFd, 0);
416 if (rp == MAP_FAILED)
417 panic("vmspace_mmap: failed1");
571989b5
MD
418 vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
419 MADV_NOSYNC, 0);
4e7c41c5 420 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
135d7199
MD
421 PROT_READ|PROT_WRITE,
422 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
423 MemImageFd, 0x40000000);
424 if (rp == MAP_FAILED)
425 panic("vmspace_mmap: failed2");
571989b5
MD
426 vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
427 MADV_NOSYNC, 0);
4e7c41c5 428 rp = vmspace_mmap(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
135d7199
MD
429 PROT_READ|PROT_WRITE,
430 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
431 MemImageFd, 0x80000000);
571989b5
MD
432 vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
433 MADV_NOSYNC, 0);
135d7199
MD
434 if (rp == MAP_FAILED)
435 panic("vmspace_mmap: failed3");
436
4e7c41c5
MD
437 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000,
438 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
439 if (r < 0)
440 panic("vmspace_mcontrol: failed1");
4e7c41c5
MD
441 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000,
442 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
443 if (r < 0)
444 panic("vmspace_mcontrol: failed2");
4e7c41c5
MD
445 r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT,
446 MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte);
135d7199
MD
447 if (r < 0)
448 panic("vmspace_mcontrol: failed3");
449}
450
451void
452cpu_vmspace_free(struct vmspace *vm)
453{
4e7c41c5 454 if (vmspace_destroy(&vm->vm_pmap) < 0)
135d7199
MD
455 panic("vmspace_destroy() failed");
456}
457
e4a473f1
MD
458/************************************************************************
459 * Procedures which operate directly on the kernel PMAP *
460 ************************************************************************/
461
462/*
463 * This maps the requested page table and gives us access to it.
eec2b734
MD
464 *
465 * This routine can be called from a potentially preempting interrupt
466 * thread or from a normal thread.
e4a473f1
MD
467 */
468static vpte_t *
71152ac6 469get_ptbase(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
470{
471 struct mdglobaldata *gd = mdcpu;
472
473 if (pmap == &kernel_pmap) {
71152ac6
MD
474 KKASSERT(va >= KvaStart && va < KvaEnd);
475 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 476 } else if (pmap->pm_pdir == gd->gd_PT1pdir) {
24eb47e0
MD
477 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
478 *gd->gd_PT1pde = pmap->pm_pdirpte;
479 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
da23a592
MD
480 atomic_set_cpumask(&pmap->pm_cpucachemask,
481 gd->mi.gd_cpumask);
24eb47e0 482 }
71152ac6 483 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1 484 } else if (pmap->pm_pdir == gd->gd_PT2pdir) {
24eb47e0
MD
485 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
486 *gd->gd_PT2pde = pmap->pm_pdirpte;
487 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
da23a592
MD
488 atomic_set_cpumask(&pmap->pm_cpucachemask,
489 gd->mi.gd_cpumask);
24eb47e0 490 }
71152ac6 491 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
492 }
493
494 /*
eec2b734
MD
495 * If we aren't running from a potentially preempting interrupt,
496 * load a new page table directory into the page table cache
e4a473f1 497 */
eec2b734
MD
498 if (gd->mi.gd_intr_nesting_level == 0 &&
499 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0) {
500 /*
501 * Choose one or the other and map the page table
502 * in the KVA space reserved for it.
503 */
504 if ((gd->gd_PTflip = 1 - gd->gd_PTflip) == 0) {
505 gd->gd_PT1pdir = pmap->pm_pdir;
506 *gd->gd_PT1pde = pmap->pm_pdirpte;
507 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
da23a592
MD
508 atomic_set_cpumask(&pmap->pm_cpucachemask,
509 gd->mi.gd_cpumask);
eec2b734
MD
510 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
511 } else {
512 gd->gd_PT2pdir = pmap->pm_pdir;
513 *gd->gd_PT2pde = pmap->pm_pdirpte;
514 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
da23a592
MD
515 atomic_set_cpumask(&pmap->pm_cpucachemask,
516 gd->mi.gd_cpumask);
eec2b734
MD
517 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
518 }
519 }
e4a473f1 520
eec2b734
MD
521 /*
522 * If we are running from a preempting interrupt use a private
523 * map. The caller must be in a critical section.
524 */
525 KKASSERT(IN_CRITICAL_SECT(curthread));
526 if (pmap->pm_pdir == gd->gd_PT3pdir) {
527 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
528 *gd->gd_PT3pde = pmap->pm_pdirpte;
529 madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL);
da23a592
MD
530 atomic_set_cpumask(&pmap->pm_cpucachemask,
531 gd->mi.gd_cpumask);
eec2b734 532 }
e4a473f1 533 } else {
eec2b734
MD
534 gd->gd_PT3pdir = pmap->pm_pdir;
535 *gd->gd_PT3pde = pmap->pm_pdirpte;
536 madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL);
da23a592
MD
537 atomic_set_cpumask(&pmap->pm_cpucachemask,
538 gd->mi.gd_cpumask);
e4a473f1 539 }
eec2b734 540 return(gd->gd_PT3map + (va >> PAGE_SHIFT));
e4a473f1
MD
541}
542
543static vpte_t *
71152ac6 544get_ptbase1(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
545{
546 struct mdglobaldata *gd = mdcpu;
547
548 if (pmap == &kernel_pmap) {
71152ac6
MD
549 KKASSERT(va >= KvaStart && va < KvaEnd);
550 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 551 } else if (pmap->pm_pdir == gd->gd_PT1pdir) {
d5b116a0
MD
552 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
553 *gd->gd_PT1pde = pmap->pm_pdirpte;
554 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
da23a592
MD
555 atomic_set_cpumask(&pmap->pm_cpucachemask,
556 gd->mi.gd_cpumask);
d5b116a0 557 }
71152ac6 558 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1
MD
559 }
560 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
561 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
562 gd->gd_PT1pdir = pmap->pm_pdir;
563 *gd->gd_PT1pde = pmap->pm_pdirpte;
564 madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL);
71152ac6 565 return(gd->gd_PT1map + (va >> PAGE_SHIFT));
e4a473f1
MD
566}
567
568static vpte_t *
71152ac6 569get_ptbase2(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
570{
571 struct mdglobaldata *gd = mdcpu;
572
573 if (pmap == &kernel_pmap) {
71152ac6
MD
574 KKASSERT(va >= KvaStart && va < KvaEnd);
575 return(KernelPTA + (va >> PAGE_SHIFT));
e4a473f1 576 } else if (pmap->pm_pdir == gd->gd_PT2pdir) {
d5b116a0
MD
577 if ((pmap->pm_cpucachemask & gd->mi.gd_cpumask) == 0) {
578 *gd->gd_PT2pde = pmap->pm_pdirpte;
579 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
da23a592
MD
580 atomic_set_cpumask(&pmap->pm_cpucachemask,
581 gd->mi.gd_cpumask);
d5b116a0 582 }
71152ac6 583 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
584 }
585 KKASSERT(gd->mi.gd_intr_nesting_level == 0 &&
586 (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0);
587 gd->gd_PT2pdir = pmap->pm_pdir;
588 *gd->gd_PT2pde = pmap->pm_pdirpte;
589 madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL);
71152ac6 590 return(gd->gd_PT2map + (va >> PAGE_SHIFT));
e4a473f1
MD
591}
592
593/*
594 * Return a pointer to the page table entry for the specified va in the
595 * specified pmap. NULL is returned if there is no valid page table page
596 * for the VA.
597 */
598static __inline vpte_t *
599pmap_pte(struct pmap *pmap, vm_offset_t va)
600{
601 vpte_t *ptep;
602
71152ac6 603 ptep = &pmap->pm_pdir[va >> SEG_SHIFT];
e4a473f1
MD
604 if (*ptep & VPTE_PS)
605 return(ptep);
606 if (*ptep)
71152ac6 607 return (get_ptbase(pmap, va));
e4a473f1
MD
608 return(NULL);
609}
610
611
612/*
613 * Enter a mapping into kernel_pmap. Mappings created in this fashion
d5b116a0
MD
614 * are not managed. Mappings must be immediately accessible on all cpus.
615 *
616 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the
617 * real pmap and handle related races before storing the new vpte.
e4a473f1
MD
618 */
619void
620pmap_kenter(vm_offset_t va, vm_paddr_t pa)
621{
622 vpte_t *ptep;
623 vpte_t npte;
e4a473f1
MD
624
625 KKASSERT(va >= KvaStart && va < KvaEnd);
626 npte = (vpte_t)pa | VPTE_R | VPTE_W | VPTE_V;
71152ac6 627 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
628 if (*ptep & VPTE_V)
629 pmap_inval_pte(ptep, &kernel_pmap, va);
630 *ptep = npte;
e4a473f1
MD
631}
632
d5b116a0
MD
633/*
634 * Synchronize a kvm mapping originally made for the private use on
635 * some other cpu so it can be used on all cpus.
636 *
637 * XXX add MADV_RESYNC to improve performance.
638 */
6f7b98e0
MD
639void
640pmap_kenter_sync(vm_offset_t va)
641{
d5b116a0 642 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
6f7b98e0
MD
643}
644
d5b116a0
MD
645/*
646 * Synchronize a kvm mapping originally made for the private use on
647 * some other cpu so it can be used on our cpu. Turns out to be the
648 * same madvise() call, because we have to sync the real pmaps anyway.
649 *
650 * XXX add MADV_RESYNC to improve performance.
651 */
6f7b98e0
MD
652void
653pmap_kenter_sync_quick(vm_offset_t va)
654{
655 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
656}
657
d5b116a0 658#if 0
9ad680a3 659/*
d5b116a0
MD
660 * Make a previously read-only kernel mapping R+W (not implemented by
661 * virtual kernels).
9ad680a3
MD
662 */
663void
664pmap_kmodify_rw(vm_offset_t va)
665{
666 *pmap_kpte(va) |= VPTE_R | VPTE_W;
667 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
668}
669
d5b116a0
MD
670/*
671 * Make a kernel mapping non-cacheable (not applicable to virtual kernels)
672 */
9ad680a3
MD
673void
674pmap_kmodify_nc(vm_offset_t va)
675{
9ad680a3
MD
676 *pmap_kpte(va) |= VPTE_N;
677 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
9ad680a3
MD
678}
679
d5b116a0
MD
680#endif
681
6f7b98e0
MD
682/*
683 * Map a contiguous range of physical memory to a KVM
684 */
685vm_offset_t
8e5e6f1b 686pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot)
6f7b98e0 687{
8e5e6f1b
AH
688 vm_offset_t sva, virt;
689
690 sva = virt = *virtp;
6f7b98e0
MD
691 while (start < end) {
692 pmap_kenter(virt, start);
693 virt += PAGE_SIZE;
694 start += PAGE_SIZE;
695 }
8e5e6f1b
AH
696 *virtp = virt;
697 return (sva);
6f7b98e0
MD
698}
699
700vpte_t *
701pmap_kpte(vm_offset_t va)
702{
703 vpte_t *ptep;
704
705 KKASSERT(va >= KvaStart && va < KvaEnd);
71152ac6 706 ptep = KernelPTA + (va >> PAGE_SHIFT);
6f7b98e0
MD
707 return(ptep);
708}
709
e4a473f1 710/*
d5b116a0
MD
711 * Enter an unmanaged KVA mapping for the private use of the current
712 * cpu only. pmap_kenter_sync() may be called to make the mapping usable
713 * by other cpus.
714 *
715 * It is illegal for the mapping to be accessed by other cpus unleess
716 * pmap_kenter_sync*() is called.
e4a473f1
MD
717 */
718void
719pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
720{
721 vpte_t *ptep;
722 vpte_t npte;
723
724 KKASSERT(va >= KvaStart && va < KvaEnd);
725
726 npte = (vpte_t)pa | VPTE_R | VPTE_W | VPTE_V;
71152ac6 727 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
728 if (*ptep & VPTE_V)
729 pmap_inval_pte_quick(ptep, &kernel_pmap, va);
730 *ptep = npte;
e4a473f1
MD
731}
732
733/*
734 * Make a temporary mapping for a physical address. This is only intended
735 * to be used for panic dumps.
fb8345e6
MD
736 *
737 * The caller is responsible for calling smp_invltlb().
e4a473f1
MD
738 */
739void *
8e5ea5f7 740pmap_kenter_temporary(vm_paddr_t pa, long i)
e4a473f1 741{
fb8345e6 742 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa);
e4a473f1
MD
743 return ((void *)crashdumpmap);
744}
745
746/*
747 * Remove an unmanaged mapping created with pmap_kenter*().
748 */
749void
750pmap_kremove(vm_offset_t va)
751{
752 vpte_t *ptep;
e4a473f1
MD
753
754 KKASSERT(va >= KvaStart && va < KvaEnd);
755
71152ac6 756 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
757 if (*ptep & VPTE_V)
758 pmap_inval_pte(ptep, &kernel_pmap, va);
759 *ptep = 0;
e4a473f1
MD
760}
761
762/*
d5b116a0
MD
763 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize
764 * only with this cpu.
765 *
766 * Unfortunately because we optimize new entries by testing VPTE_V later
767 * on, we actually still have to synchronize with all the cpus. XXX maybe
768 * store a junk value and test against 0 in the other places instead?
e4a473f1
MD
769 */
770void
771pmap_kremove_quick(vm_offset_t va)
772{
773 vpte_t *ptep;
774
775 KKASSERT(va >= KvaStart && va < KvaEnd);
776
71152ac6 777 ptep = KernelPTA + (va >> PAGE_SHIFT);
d5b116a0
MD
778 if (*ptep & VPTE_V)
779 pmap_inval_pte(ptep, &kernel_pmap, va); /* NOT _quick */
780 *ptep = 0;
e4a473f1
MD
781}
782
783/*
784 * Extract the physical address from the kernel_pmap that is associated
785 * with the specified virtual address.
786 */
787vm_paddr_t
788pmap_kextract(vm_offset_t va)
789{
790 vpte_t *ptep;
791 vm_paddr_t pa;
792
793 KKASSERT(va >= KvaStart && va < KvaEnd);
794
71152ac6 795 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1
MD
796 pa = (vm_paddr_t)(*ptep & VPTE_FRAME) | (va & PAGE_MASK);
797 return(pa);
798}
799
800/*
801 * Map a set of unmanaged VM pages into KVM.
802 */
803void
804pmap_qenter(vm_offset_t va, struct vm_page **m, int count)
805{
806 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
807 while (count) {
808 vpte_t *ptep;
809
71152ac6 810 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1 811 if (*ptep & VPTE_V)
d5b116a0 812 pmap_inval_pte(ptep, &kernel_pmap, va);
e4a473f1
MD
813 *ptep = (vpte_t)(*m)->phys_addr | VPTE_R | VPTE_W | VPTE_V;
814 --count;
815 ++m;
816 va += PAGE_SIZE;
817 }
e4a473f1
MD
818}
819
e4a473f1
MD
820/*
821 * Undo the effects of pmap_qenter*().
822 */
823void
824pmap_qremove(vm_offset_t va, int count)
825{
826 KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd);
827 while (count) {
828 vpte_t *ptep;
829
71152ac6 830 ptep = KernelPTA + (va >> PAGE_SHIFT);
e4a473f1 831 if (*ptep & VPTE_V)
d5b116a0 832 pmap_inval_pte(ptep, &kernel_pmap, va);
e4a473f1
MD
833 *ptep = 0;
834 --count;
835 va += PAGE_SIZE;
836 }
e4a473f1
MD
837}
838
839/************************************************************************
840 * Misc support glue called by machine independant code *
841 ************************************************************************
842 *
843 * These routines are called by machine independant code to operate on
844 * certain machine-dependant aspects of processes, threads, and pmaps.
845 */
846
847/*
848 * Initialize MD portions of the thread structure.
849 */
850void
851pmap_init_thread(thread_t td)
852{
853 /* enforce pcb placement */
854 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1;
855 td->td_savefpu = &td->td_pcb->pcb_save;
856 td->td_sp = (char *)td->td_pcb - 16;
857}
858
859/*
13d13d89 860 * This routine directly affects the fork perf for a process.
e4a473f1
MD
861 */
862void
13d13d89 863pmap_init_proc(struct proc *p)
e4a473f1 864{
e4a473f1
MD
865}
866
e4a473f1
MD
867/*
868 * We pre-allocate all page table pages for kernel virtual memory so
869 * this routine will only be called if KVM has been exhausted.
5bce55a9
MD
870 *
871 * No requirements.
e4a473f1
MD
872 */
873void
a8cf2878 874pmap_growkernel(vm_offset_t kstart, vm_offset_t kend)
e4a473f1 875{
a8cf2878
MD
876 vm_offset_t addr;
877
878 addr = (kend + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
00835518 879
5bce55a9 880 lwkt_gettoken(&vm_token);
00835518
MD
881 if (addr > virtual_end - SEG_SIZE)
882 panic("KVM exhausted");
883 kernel_vm_end = addr;
5bce55a9 884 lwkt_reltoken(&vm_token);
e4a473f1
MD
885}
886
887/*
888 * The modification bit is not tracked for any pages in this range. XXX
889 * such pages in this maps should always use pmap_k*() functions and not
890 * be managed anyhow.
d6c96d4d
MD
891 *
892 * XXX User and kernel address spaces are independant for virtual kernels,
893 * this function only applies to the kernel pmap.
e4a473f1
MD
894 */
895static int
d6c96d4d 896pmap_track_modified(pmap_t pmap, vm_offset_t va)
e4a473f1 897{
d6c96d4d
MD
898 if (pmap != &kernel_pmap)
899 return 1;
e4a473f1
MD
900 if ((va < clean_sva) || (va >= clean_eva))
901 return 1;
902 else
903 return 0;
904}
905
906/************************************************************************
907 * Procedures supporting managed page table pages *
908 ************************************************************************
909 *
910 * These procedures are used to track managed page table pages. These pages
911 * use the page table page's vm_page_t to track PTEs in the page. The
912 * page table pages themselves are arranged in a VM object, pmap->pm_pteobj.
913 *
914 * This allows the system to throw away page table pages for user processes
915 * at will and reinstantiate them on demand.
916 */
917
918/*
919 * This routine works like vm_page_lookup() but also blocks as long as the
920 * page is busy. This routine does not busy the page it returns.
921 *
922 * Unless the caller is managing objects whos pages are in a known state,
923 * the call should be made with a critical section held so the page's object
924 * association remains valid on return.
925 */
926static vm_page_t
927pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
928{
929 vm_page_t m;
930
b12defdc
MD
931 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
932 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp");
933
e4a473f1
MD
934 return(m);
935}
936
937/*
938 * This routine unholds page table pages, and if the hold count
939 * drops to zero, then it decrements the wire count.
eec2b734
MD
940 *
941 * We must recheck that this is the last hold reference after busy-sleeping
942 * on the page.
e4a473f1
MD
943 */
944static int
d5b116a0 945_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
e4a473f1 946{
b12defdc 947 vm_page_busy_wait(m, FALSE, "pmuwpt");
eec2b734
MD
948 KASSERT(m->queue == PQ_NONE,
949 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m));
e4a473f1 950
eec2b734 951 if (m->hold_count == 1) {
e4a473f1 952 /*
d5b116a0 953 * Unmap the page table page.
e4a473f1 954 */
eec2b734 955 KKASSERT(pmap->pm_pdir[m->pindex] != 0);
d5b116a0
MD
956 pmap_inval_pde(&pmap->pm_pdir[m->pindex], pmap,
957 (vm_offset_t)m->pindex << SEG_SHIFT);
eec2b734 958 KKASSERT(pmap->pm_stats.resident_count > 0);
e4a473f1
MD
959 --pmap->pm_stats.resident_count;
960
961 if (pmap->pm_ptphint == m)
962 pmap->pm_ptphint = NULL;
963
964 /*
eec2b734
MD
965 * This was our last hold, the page had better be unwired
966 * after we decrement wire_count.
967 *
968 * FUTURE NOTE: shared page directory page could result in
969 * multiple wire counts.
e4a473f1 970 */
eec2b734 971 vm_page_unhold(m);
e4a473f1 972 --m->wire_count;
eec2b734 973 KKASSERT(m->wire_count == 0);
b12defdc 974 atomic_add_int(&vmstats.v_wire_count, -1);
17cde63e 975 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
eec2b734
MD
976 vm_page_flash(m);
977 vm_page_free_zero(m);
e4a473f1
MD
978 return 1;
979 }
17cde63e 980 KKASSERT(m->hold_count > 1);
eec2b734 981 vm_page_unhold(m);
b12defdc
MD
982 vm_page_wakeup(m);
983
e4a473f1
MD
984 return 0;
985}
986
987static __inline int
d5b116a0 988pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
e4a473f1 989{
eec2b734
MD
990 KKASSERT(m->hold_count > 0);
991 if (m->hold_count > 1) {
992 vm_page_unhold(m);
e4a473f1 993 return 0;
eec2b734
MD
994 } else {
995 return _pmap_unwire_pte_hold(pmap, m);
996 }
e4a473f1
MD
997}
998
999/*
1000 * After removing a page table entry, this routine is used to
1001 * conditionally free the page, and manage the hold/wire counts.
1002 */
1003static int
d5b116a0 1004pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
e4a473f1
MD
1005{
1006 unsigned ptepindex;
1007
b12defdc
MD
1008 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj));
1009
e4a473f1
MD
1010 if (mpte == NULL) {
1011 /*
1012 * page table pages in the kernel_pmap are not managed.
1013 */
1014 if (pmap == &kernel_pmap)
1015 return(0);
1016 ptepindex = (va >> PDRSHIFT);
1017 if (pmap->pm_ptphint &&
1018 (pmap->pm_ptphint->pindex == ptepindex)) {
1019 mpte = pmap->pm_ptphint;
1020 } else {
b12defdc 1021 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
e4a473f1 1022 pmap->pm_ptphint = mpte;
b12defdc 1023 vm_page_wakeup(mpte);
e4a473f1
MD
1024 }
1025 }
d5b116a0 1026 return pmap_unwire_pte_hold(pmap, mpte);
e4a473f1
MD
1027}
1028
1029/*
eec2b734
MD
1030 * Attempt to release and free the vm_page backing a page directory page
1031 * in a pmap. Returns 1 on success, 0 on failure (if the procedure had
1032 * to sleep).
e4a473f1
MD
1033 */
1034static int
1035pmap_release_free_page(struct pmap *pmap, vm_page_t p)
1036{
1037 vpte_t *pde = pmap->pm_pdir;
eec2b734 1038
e4a473f1
MD
1039 /*
1040 * This code optimizes the case of freeing non-busy
1041 * page-table pages. Those pages are zero now, and
1042 * might as well be placed directly into the zero queue.
1043 */
b12defdc
MD
1044 if (vm_page_busy_try(p, FALSE)) {
1045 vm_page_sleep_busy(p, FALSE, "pmaprl");
e4a473f1 1046 return 0;
b12defdc 1047 }
eec2b734
MD
1048 KKASSERT(pmap->pm_stats.resident_count > 0);
1049 --pmap->pm_stats.resident_count;
e4a473f1
MD
1050
1051 if (p->hold_count) {
1052 panic("pmap_release: freeing held page table page");
1053 }
1054 /*
1055 * Page directory pages need to have the kernel stuff cleared, so
1056 * they can go into the zero queue also.
1057 *
1058 * In virtual kernels there is no 'kernel stuff'. For the moment
1059 * I just make sure the whole thing has been zero'd even though
1060 * it should already be completely zero'd.
d6c96d4d
MD
1061 *
1062 * pmaps for vkernels do not self-map because they do not share
1063 * their address space with the vkernel. Clearing of pde[] thus
1064 * only applies to page table pages and not to the page directory
1065 * page.
e4a473f1
MD
1066 */
1067 if (p->pindex == pmap->pm_pdindex) {
1068 bzero(pde, VPTE_PAGETABLE_SIZE);
1069 pmap_kremove((vm_offset_t)pmap->pm_pdir);
d6c96d4d 1070 } else {
eec2b734 1071 KKASSERT(pde[p->pindex] != 0);
d5b116a0
MD
1072 pmap_inval_pde(&pde[p->pindex], pmap,
1073 (vm_offset_t)p->pindex << SEG_SHIFT);
e4a473f1
MD
1074 }
1075
1076 /*
1077 * Clear the matching hint
1078 */
1079 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1080 pmap->pm_ptphint = NULL;
1081
1082 /*
1083 * And throw the page away. The page is completely zero'd out so
1084 * optimize the free call.
1085 */
1086 p->wire_count--;
b12defdc 1087 atomic_add_int(&vmstats.v_wire_count, -1);
e4a473f1
MD
1088 vm_page_free_zero(p);
1089 return 1;
1090}
1091
1092/*
1093 * This routine is called if the page table page is not mapped in the page
1094 * table directory.
1095 *
1096 * The routine is broken up into two parts for readability.
eec2b734
MD
1097 *
1098 * It must return a held mpte and map the page directory page as required.
1099 * Because vm_page_grab() can block, we must re-check pm_pdir[ptepindex]
e4a473f1
MD
1100 */
1101static vm_page_t
1102_pmap_allocpte(pmap_t pmap, unsigned ptepindex)
1103{
1104 vm_paddr_t ptepa;
1105 vm_page_t m;
1106
1107 /*
eec2b734
MD
1108 * Find or fabricate a new pagetable page. A busied page will be
1109 * returned. This call may block.
e4a473f1
MD
1110 */
1111 m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1112 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
b12defdc
MD
1113 vm_page_flag_set(m, PG_MAPPED);
1114
e4a473f1
MD
1115 KASSERT(m->queue == PQ_NONE,
1116 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1117
e4a473f1 1118 /*
eec2b734
MD
1119 * Increment the hold count for the page we will be returning to
1120 * the caller.
e4a473f1
MD
1121 */
1122 m->hold_count++;
1123
eec2b734
MD
1124 /*
1125 * It is possible that someone else got in and mapped by the page
1126 * directory page while we were blocked, if so just unbusy and
1127 * return the held page.
1128 */
1129 if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) {
eec2b734
MD
1130 KKASSERT((ptepa & VPTE_FRAME) == VM_PAGE_TO_PHYS(m));
1131 vm_page_wakeup(m);
1132 return(m);
1133 }
54341a3b 1134 vm_page_wire(m);
eec2b734 1135
e4a473f1
MD
1136 /*
1137 * Map the pagetable page into the process address space, if
1138 * it isn't already there.
1139 */
eec2b734 1140 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1141
1142 ptepa = VM_PAGE_TO_PHYS(m);
1143 pmap->pm_pdir[ptepindex] = (vpte_t)ptepa | VPTE_R | VPTE_W | VPTE_V |
1144 VPTE_A | VPTE_M;
1145
1146 /*
1147 * We are likely about to access this page table page, so set the
1148 * page table hint to reduce overhead.
1149 */
1150 pmap->pm_ptphint = m;
1151
e4a473f1
MD
1152 vm_page_wakeup(m);
1153
1154 return (m);
1155}
1156
1157/*
1158 * Determine the page table page required to access the VA in the pmap
1159 * and allocate it if necessary. Return a held vm_page_t for the page.
1160 *
1161 * Only used with user pmaps.
1162 */
1163static vm_page_t
1164pmap_allocpte(pmap_t pmap, vm_offset_t va)
1165{
1166 unsigned ptepindex;
1167 vm_offset_t ptepa;
1168 vm_page_t m;
1169
b12defdc
MD
1170 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj));
1171
e4a473f1
MD
1172 /*
1173 * Calculate pagetable page index
1174 */
1175 ptepindex = va >> PDRSHIFT;
1176
1177 /*
1178 * Get the page directory entry
1179 */
1180 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1181
1182 /*
1183 * This supports switching from a 4MB page to a
1184 * normal 4K page.
1185 */
1186 if (ptepa & VPTE_PS) {
eec2b734 1187 KKASSERT(pmap->pm_pdir[ptepindex] != 0);
d5b116a0
MD
1188 pmap_inval_pde(&pmap->pm_pdir[ptepindex], pmap,
1189 (vm_offset_t)ptepindex << SEG_SHIFT);
e4a473f1 1190 ptepa = 0;
e4a473f1
MD
1191 }
1192
1193 /*
1194 * If the page table page is mapped, we just increment the
1195 * hold count, and activate it.
1196 */
1197 if (ptepa) {
1198 /*
1199 * In order to get the page table page, try the
1200 * hint first.
1201 */
1202 if (pmap->pm_ptphint &&
1203 (pmap->pm_ptphint->pindex == ptepindex)) {
1204 m = pmap->pm_ptphint;
1205 } else {
b12defdc 1206 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
e4a473f1 1207 pmap->pm_ptphint = m;
b12defdc 1208 vm_page_wakeup(m);
e4a473f1
MD
1209 }
1210 m->hold_count++;
1211 return m;
1212 }
1213 /*
1214 * Here if the pte page isn't mapped, or if it has been deallocated.
1215 */
1216 return _pmap_allocpte(pmap, ptepindex);
1217}
1218
1219/************************************************************************
1220 * Managed pages in pmaps *
1221 ************************************************************************
1222 *
1223 * All pages entered into user pmaps and some pages entered into the kernel
1224 * pmap are managed, meaning that pmap_protect() and other related management
1225 * functions work on these pages.
1226 */
1227
1228/*
1229 * free the pv_entry back to the free list. This function may be
1230 * called from an interrupt.
1231 */
1232static __inline void
1233free_pv_entry(pv_entry_t pv)
1234{
1235 pv_entry_count--;
1236 zfree(&pvzone, pv);
1237}
1238
1239/*
1240 * get a new pv_entry, allocating a block from the system
1241 * when needed. This function may be called from an interrupt.
1242 */
1243static pv_entry_t
1244get_pv_entry(void)
1245{
1246 pv_entry_count++;
1247 if (pv_entry_high_water &&
20479584
MD
1248 (pv_entry_count > pv_entry_high_water) &&
1249 (pmap_pagedaemon_waken == 0)) {
e4a473f1
MD
1250 pmap_pagedaemon_waken = 1;
1251 wakeup (&vm_pages_needed);
1252 }
1253 return zalloc(&pvzone);
1254}
1255
1256/*
1257 * This routine is very drastic, but can save the system
1258 * in a pinch.
5bce55a9
MD
1259 *
1260 * No requirements.
e4a473f1
MD
1261 */
1262void
1263pmap_collect(void)
1264{
1265 int i;
1266 vm_page_t m;
1267 static int warningdone=0;
1268
1269 if (pmap_pagedaemon_waken == 0)
1270 return;
5bce55a9 1271 lwkt_gettoken(&vm_token);
20479584 1272 pmap_pagedaemon_waken = 0;
e4a473f1
MD
1273
1274 if (warningdone < 5) {
1275 kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1276 warningdone++;
1277 }
1278
b12defdc 1279 for (i = 0; i < vm_page_array_size; i++) {
e4a473f1 1280 m = &vm_page_array[i];
b12defdc 1281 if (m->wire_count || m->hold_count)
e4a473f1 1282 continue;
b12defdc
MD
1283 if (vm_page_busy_try(m, TRUE) == 0) {
1284 if (m->wire_count == 0 && m->hold_count == 0) {
1285 pmap_remove_all(m);
1286 }
1287 vm_page_wakeup(m);
1288 }
e4a473f1 1289 }
5bce55a9 1290 lwkt_reltoken(&vm_token);
e4a473f1
MD
1291}
1292
1293/*
1294 * If it is the first entry on the list, it is actually
1295 * in the header and we must copy the following entry up
1296 * to the header. Otherwise we must search the list for
1297 * the entry. In either case we free the now unused entry.
b12defdc
MD
1298 *
1299 * caller must hold vm_token
e4a473f1
MD
1300 */
1301static int
d5b116a0 1302pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
e4a473f1
MD
1303{
1304 pv_entry_t pv;
1305 int rtval;
1306
1307 crit_enter();
1308 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1309 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1310 if (pmap == pv->pv_pmap && va == pv->pv_va)
1311 break;
1312 }
1313 } else {
1314 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1315 if (va == pv->pv_va)
1316 break;
1317 }
1318 }
1319
1320 /*
1321 * Note that pv_ptem is NULL if the page table page itself is not
1322 * managed, even if the page being removed IS managed.
1323 */
1324 rtval = 0;
5926987a
MD
1325
1326 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1327 m->md.pv_list_count--;
b12defdc 1328 atomic_add_int(&m->object->agg_pv_list_count, -1);
5926987a
MD
1329 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1330 if (TAILQ_EMPTY(&m->md.pv_list))
1331 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1332 ++pmap->pm_generation;
b12defdc 1333 vm_object_hold(pmap->pm_pteobj);
5926987a 1334 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
b12defdc 1335 vm_object_drop(pmap->pm_pteobj);
5926987a
MD
1336 free_pv_entry(pv);
1337
e4a473f1
MD
1338 crit_exit();
1339 return rtval;
1340}
1341
1342/*
1343 * Create a pv entry for page at pa for (pmap, va). If the page table page
1344 * holding the VA is managed, mpte will be non-NULL.
1345 */
1346static void
1347pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
1348{
1349 pv_entry_t pv;
1350
1351 crit_enter();
1352 pv = get_pv_entry();
1353 pv->pv_va = va;
1354 pv->pv_pmap = pmap;
1355 pv->pv_ptem = mpte;
1356
1357 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1358 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
5926987a 1359 ++pmap->pm_generation;
e4a473f1 1360 m->md.pv_list_count++;
b12defdc 1361 atomic_add_int(&m->object->agg_pv_list_count, 1);
e4a473f1
MD
1362
1363 crit_exit();
1364}
1365
1366/*
1367 * pmap_remove_pte: do the things to unmap a page in a process
1368 */
1369static int
d5b116a0 1370pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va)
e4a473f1
MD
1371{
1372 vpte_t oldpte;
1373 vm_page_t m;
1374
d5b116a0 1375 oldpte = pmap_inval_loadandclear(ptq, pmap, va);
e7f2d7de
MD
1376 if (oldpte & VPTE_WIRED)
1377 --pmap->pm_stats.wired_count;
1378 KKASSERT(pmap->pm_stats.wired_count >= 0);
d6c96d4d
MD
1379
1380#if 0
e4a473f1
MD
1381 /*
1382 * Machines that don't support invlpg, also don't support
1383 * VPTE_G. XXX VPTE_G is disabled for SMP so don't worry about
1384 * the SMP case.
1385 */
1386 if (oldpte & VPTE_G)
6f7b98e0 1387 madvise((void *)va, PAGE_SIZE, MADV_INVAL);
d6c96d4d 1388#endif
eec2b734
MD
1389 KKASSERT(pmap->pm_stats.resident_count > 0);
1390 --pmap->pm_stats.resident_count;
e7f2d7de 1391 if (oldpte & VPTE_MANAGED) {
e4a473f1
MD
1392 m = PHYS_TO_VM_PAGE(oldpte);
1393 if (oldpte & VPTE_M) {
1394#if defined(PMAP_DIAGNOSTIC)
1395 if (pmap_nw_modified((pt_entry_t) oldpte)) {
1396 kprintf(
1397 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1398 va, oldpte);
1399 }
1400#endif
d6c96d4d 1401 if (pmap_track_modified(pmap, va))
e4a473f1
MD
1402 vm_page_dirty(m);
1403 }
1404 if (oldpte & VPTE_A)
1405 vm_page_flag_set(m, PG_REFERENCED);
d5b116a0 1406 return pmap_remove_entry(pmap, m, va);
e4a473f1 1407 } else {
d5b116a0 1408 return pmap_unuse_pt(pmap, va, NULL);
e4a473f1
MD
1409 }
1410
1411 return 0;
1412}
1413
1414/*
1415 * pmap_remove_page:
1416 *
1417 * Remove a single page from a process address space.
1418 *
1419 * This function may not be called from an interrupt if the pmap is
1420 * not kernel_pmap.
1421 */
1422static void
d5b116a0 1423pmap_remove_page(struct pmap *pmap, vm_offset_t va)
e4a473f1
MD
1424{
1425 vpte_t *ptq;
1426
1427 /*
1428 * if there is no pte for this address, just skip it!!! Otherwise
1429 * get a local va for mappings for this pmap and remove the entry.
1430 */
1431 if (*pmap_pde(pmap, va) != 0) {
71152ac6 1432 ptq = get_ptbase(pmap, va);
e4a473f1 1433 if (*ptq) {
d5b116a0 1434 pmap_remove_pte(pmap, ptq, va);
e4a473f1
MD
1435 }
1436 }
1437}
1438
1439/*
5bce55a9 1440 * Remove the given range of addresses from the specified map.
e4a473f1 1441 *
5bce55a9
MD
1442 * It is assumed that the start and end are properly rounded to the
1443 * page size.
e4a473f1 1444 *
5bce55a9
MD
1445 * This function may not be called from an interrupt if the pmap is
1446 * not kernel_pmap.
e4a473f1 1447 *
5bce55a9 1448 * No requirements.
e4a473f1
MD
1449 */
1450void
1451pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1452{
1453 vpte_t *ptbase;
1454 vm_offset_t pdnxt;
1455 vm_offset_t ptpaddr;
71152ac6 1456 vm_pindex_t sindex, eindex;
e4a473f1
MD
1457
1458 if (pmap == NULL)
1459 return;
1460
b12defdc 1461 vm_object_hold(pmap->pm_pteobj);
5bce55a9 1462 lwkt_gettoken(&vm_token);
d6c96d4d 1463 KKASSERT(pmap->pm_stats.resident_count >= 0);
5bce55a9
MD
1464 if (pmap->pm_stats.resident_count == 0) {
1465 lwkt_reltoken(&vm_token);
b12defdc 1466 vm_object_drop(pmap->pm_pteobj);
e4a473f1 1467 return;
5bce55a9 1468 }
e4a473f1 1469
e4a473f1
MD
1470 /*
1471 * special handling of removing one page. a very
1472 * common operation and easy to short circuit some
1473 * code.
1474 */
1475 if (((sva + PAGE_SIZE) == eva) &&
1476 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & VPTE_PS) == 0)) {
d5b116a0 1477 pmap_remove_page(pmap, sva);
5bce55a9 1478 lwkt_reltoken(&vm_token);
b12defdc 1479 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1480 return;
1481 }
1482
1483 /*
1484 * Get a local virtual address for the mappings that are being
1485 * worked with.
71152ac6
MD
1486 *
1487 * XXX this is really messy because the kernel pmap is not relative
1488 * to address 0
e4a473f1 1489 */
e4a473f1
MD
1490 sindex = (sva >> PAGE_SHIFT);
1491 eindex = (eva >> PAGE_SHIFT);
1492
1493 for (; sindex < eindex; sindex = pdnxt) {
1494 vpte_t pdirindex;
1495
1496 /*
1497 * Calculate index for next page table.
1498 */
1499 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1500 if (pmap->pm_stats.resident_count == 0)
1501 break;
1502
1503 pdirindex = sindex / NPDEPG;
1504 if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) {
eec2b734 1505 KKASSERT(pmap->pm_pdir[pdirindex] != 0);
e4a473f1 1506 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
d5b116a0
MD
1507 pmap_inval_pde(&pmap->pm_pdir[pdirindex], pmap,
1508 (vm_offset_t)pdirindex << SEG_SHIFT);
e4a473f1
MD
1509 continue;
1510 }
1511
1512 /*
1513 * Weed out invalid mappings. Note: we assume that the page
1514 * directory table is always allocated, and in kernel virtual.
1515 */
1516 if (ptpaddr == 0)
1517 continue;
1518
1519 /*
1520 * Limit our scan to either the end of the va represented
1521 * by the current page table page, or to the end of the
1522 * range being removed.
1523 */
e7f2d7de 1524 if (pdnxt > eindex)
e4a473f1 1525 pdnxt = eindex;
e4a473f1 1526
8790d7d8
MD
1527 /*
1528 * NOTE: pmap_remove_pte() can block.
1529 */
e4a473f1
MD
1530 for (; sindex != pdnxt; sindex++) {
1531 vm_offset_t va;
8790d7d8
MD
1532
1533 ptbase = get_ptbase(pmap, sindex << PAGE_SHIFT);
1534 if (*ptbase == 0)
e4a473f1
MD
1535 continue;
1536 va = i386_ptob(sindex);
d5b116a0 1537 if (pmap_remove_pte(pmap, ptbase, va))
e4a473f1
MD
1538 break;
1539 }
1540 }
5bce55a9 1541 lwkt_reltoken(&vm_token);
b12defdc 1542 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1543}
1544
1545/*
e4a473f1
MD
1546 * Removes this physical page from all physical maps in which it resides.
1547 * Reflects back modify bits to the pager.
1548 *
1549 * This routine may not be called from an interrupt.
5bce55a9
MD
1550 *
1551 * No requirements.
e4a473f1
MD
1552 */
1553static void
1554pmap_remove_all(vm_page_t m)
1555{
e4a473f1
MD
1556 vpte_t *pte, tpte;
1557 pv_entry_t pv;
1558
1559#if defined(PMAP_DIAGNOSTIC)
1560 /*
1561 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1562 * pages!
1563 */
1564 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1565 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m));
1566 }
1567#endif
1568
5bce55a9 1569 lwkt_gettoken(&vm_token);
e4a473f1 1570 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
eec2b734
MD
1571 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
1572 --pv->pv_pmap->pm_stats.resident_count;
e4a473f1
MD
1573
1574 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
e7f2d7de
MD
1575 KKASSERT(pte != NULL);
1576
d5b116a0 1577 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va);
e7f2d7de
MD
1578 if (tpte & VPTE_WIRED)
1579 --pv->pv_pmap->pm_stats.wired_count;
1580 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0);
e4a473f1
MD
1581
1582 if (tpte & VPTE_A)
1583 vm_page_flag_set(m, PG_REFERENCED);
1584
1585 /*
1586 * Update the vm_page_t clean and reference bits.
1587 */
1588 if (tpte & VPTE_M) {
1589#if defined(PMAP_DIAGNOSTIC)
1590 if (pmap_nw_modified((pt_entry_t) tpte)) {
1591 kprintf(
1592 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1593 pv->pv_va, tpte);
1594 }
1595#endif
d6c96d4d 1596 if (pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
1597 vm_page_dirty(m);
1598 }
e4a473f1 1599 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
8790d7d8
MD
1600 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1601 ++pv->pv_pmap->pm_generation;
e4a473f1 1602 m->md.pv_list_count--;
b12defdc 1603 atomic_add_int(&m->object->agg_pv_list_count, -1);
17cde63e
MD
1604 if (TAILQ_EMPTY(&m->md.pv_list))
1605 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
b12defdc 1606 vm_object_hold(pv->pv_pmap->pm_pteobj);
d5b116a0 1607 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
b12defdc 1608 vm_object_drop(pv->pv_pmap->pm_pteobj);
e4a473f1
MD
1609 free_pv_entry(pv);
1610 }
17cde63e 1611 KKASSERT((m->flags & (PG_MAPPED | PG_WRITEABLE)) == 0);
5bce55a9 1612 lwkt_reltoken(&vm_token);
e4a473f1
MD
1613}
1614
1615/*
5bce55a9
MD
1616 * Set the physical protection on the specified range of this map
1617 * as requested.
e4a473f1 1618 *
5bce55a9
MD
1619 * This function may not be called from an interrupt if the map is
1620 * not the kernel_pmap.
e4a473f1 1621 *
5bce55a9 1622 * No requirements.
e4a473f1
MD
1623 */
1624void
1625pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1626{
1627 vpte_t *ptbase;
d5b116a0 1628 vpte_t *ptep;
e4a473f1
MD
1629 vm_offset_t pdnxt, ptpaddr;
1630 vm_pindex_t sindex, eindex;
71152ac6 1631 vm_pindex_t sbase;
e4a473f1
MD
1632
1633 if (pmap == NULL)
1634 return;
1635
1636 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1637 pmap_remove(pmap, sva, eva);
1638 return;
1639 }
1640
1641 if (prot & VM_PROT_WRITE)
1642 return;
1643
5bce55a9 1644 lwkt_gettoken(&vm_token);
71152ac6 1645 ptbase = get_ptbase(pmap, sva);
e4a473f1
MD
1646
1647 sindex = (sva >> PAGE_SHIFT);
1648 eindex = (eva >> PAGE_SHIFT);
71152ac6 1649 sbase = sindex;
e4a473f1
MD
1650
1651 for (; sindex < eindex; sindex = pdnxt) {
1652
1653 unsigned pdirindex;
1654
1655 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1656
1657 pdirindex = sindex / NPDEPG;
d5b116a0
MD
1658
1659 /*
1660 * Clear the modified and writable bits for a 4m page.
1661 * Throw away the modified bit (?)
1662 */
e4a473f1 1663 if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) {
d5b116a0
MD
1664 pmap_clean_pde(&pmap->pm_pdir[pdirindex], pmap,
1665 (vm_offset_t)pdirindex << SEG_SHIFT);
e4a473f1
MD
1666 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1667 continue;
1668 }
1669
1670 /*
1671 * Weed out invalid mappings. Note: we assume that the page
1672 * directory table is always allocated, and in kernel virtual.
1673 */
1674 if (ptpaddr == 0)
1675 continue;
1676
1677 if (pdnxt > eindex) {
1678 pdnxt = eindex;
1679 }
1680
1681 for (; sindex != pdnxt; sindex++) {
d6c96d4d 1682 vpte_t pbits;
e4a473f1
MD
1683 vm_page_t m;
1684
d5b116a0
MD
1685 /*
1686 * Clean managed pages and also check the accessed
1687 * bit. Just remove write perms for unmanaged
1688 * pages. Be careful of races, turning off write
1689 * access will force a fault rather then setting
1690 * the modified bit at an unexpected time.
1691 */
1692 ptep = &ptbase[sindex - sbase];
1693 if (*ptep & VPTE_MANAGED) {
1694 pbits = pmap_clean_pte(ptep, pmap,
1695 i386_ptob(sindex));
e4a473f1
MD
1696 m = NULL;
1697 if (pbits & VPTE_A) {
1698 m = PHYS_TO_VM_PAGE(pbits);
1699 vm_page_flag_set(m, PG_REFERENCED);
8608b858 1700 atomic_clear_long(ptep, VPTE_A);
e4a473f1
MD
1701 }
1702 if (pbits & VPTE_M) {
d6c96d4d 1703 if (pmap_track_modified(pmap, i386_ptob(sindex))) {
e4a473f1
MD
1704 if (m == NULL)
1705 m = PHYS_TO_VM_PAGE(pbits);
1706 vm_page_dirty(m);
e4a473f1
MD
1707 }
1708 }
d5b116a0
MD
1709 } else {
1710 pbits = pmap_setro_pte(ptep, pmap,
1711 i386_ptob(sindex));
e4a473f1
MD
1712 }
1713 }
1714 }
5bce55a9 1715 lwkt_reltoken(&vm_token);
e4a473f1
MD
1716}
1717
1718/*
1719 * Enter a managed page into a pmap. If the page is not wired related pmap
1720 * data can be destroyed at any time for later demand-operation.
1721 *
1722 * Insert the vm_page (m) at virtual address (v) in (pmap), with the
1723 * specified protection, and wire the mapping if requested.
1724 *
1725 * NOTE: This routine may not lazy-evaluate or lose information. The
1726 * page must actually be inserted into the given map NOW.
1727 *
1728 * NOTE: When entering a page at a KVA address, the pmap must be the
1729 * kernel_pmap.
5bce55a9
MD
1730 *
1731 * No requirements.
e4a473f1
MD
1732 */
1733void
1734pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
921c891e 1735 boolean_t wired, vm_map_entry_t entry __unused)
e4a473f1
MD
1736{
1737 vm_paddr_t pa;
1738 vpte_t *pte;
1739 vm_paddr_t opa;
8608b858 1740 vpte_t origpte, newpte;
e4a473f1 1741 vm_page_t mpte;
e4a473f1
MD
1742
1743 if (pmap == NULL)
1744 return;
1745
1746 va &= VPTE_FRAME;
1747
b12defdc 1748 vm_object_hold(pmap->pm_pteobj);
5bce55a9
MD
1749 lwkt_gettoken(&vm_token);
1750
e4a473f1
MD
1751 /*
1752 * Get the page table page. The kernel_pmap's page table pages
1753 * are preallocated and have no associated vm_page_t.
1754 */
1755 if (pmap == &kernel_pmap)
1756 mpte = NULL;
1757 else
1758 mpte = pmap_allocpte(pmap, va);
1759
e4a473f1
MD
1760 pte = pmap_pte(pmap, va);
1761
1762 /*
1763 * Page Directory table entry not valid, we need a new PT page
1764 * and pmap_allocpte() didn't give us one. Oops!
1765 */
1766 if (pte == NULL) {
ed20d0e3 1767 panic("pmap_enter: invalid page directory pmap=%p, va=0x%p",
e4a473f1
MD
1768 pmap, (void *)va);
1769 }
1770
d5b116a0
MD
1771 /*
1772 * Deal with races on the original mapping (though don't worry
1773 * about VPTE_A races) by cleaning it. This will force a fault
1774 * if an attempt is made to write to the page.
1775 */
e4a473f1 1776 pa = VM_PAGE_TO_PHYS(m) & VPTE_FRAME;
d5b116a0 1777 origpte = pmap_clean_pte(pte, pmap, va);
e4a473f1
MD
1778 opa = origpte & VPTE_FRAME;
1779
1780 if (origpte & VPTE_PS)
1781 panic("pmap_enter: attempted pmap_enter on 4MB page");
1782
1783 /*
1784 * Mapping has not changed, must be protection or wiring change.
1785 */
1786 if (origpte && (opa == pa)) {
1787 /*
1788 * Wiring change, just update stats. We don't worry about
1789 * wiring PT pages as they remain resident as long as there
1790 * are valid mappings in them. Hence, if a user page is wired,
1791 * the PT page will be also.
1792 */
e7f2d7de
MD
1793 if (wired && ((origpte & VPTE_WIRED) == 0))
1794 ++pmap->pm_stats.wired_count;
1795 else if (!wired && (origpte & VPTE_WIRED))
1796 --pmap->pm_stats.wired_count;
1797 KKASSERT(pmap->pm_stats.wired_count >= 0);
e4a473f1 1798
e4a473f1
MD
1799 /*
1800 * Remove the extra pte reference. Note that we cannot
1801 * optimize the RO->RW case because we have adjusted the
1802 * wiring count above and may need to adjust the wiring
1803 * bits below.
1804 */
1805 if (mpte)
1806 mpte->hold_count--;
1807
1808 /*
1809 * We might be turning off write access to the page,
1810 * so we go ahead and sense modify status.
1811 */
e7f2d7de 1812 if (origpte & VPTE_MANAGED) {
d5b116a0
MD
1813 if ((origpte & VPTE_M) &&
1814 pmap_track_modified(pmap, va)) {
e4a473f1
MD
1815 vm_page_t om;
1816 om = PHYS_TO_VM_PAGE(opa);
1817 vm_page_dirty(om);
1818 }
e7f2d7de 1819 pa |= VPTE_MANAGED;
17cde63e 1820 KKASSERT(m->flags & PG_MAPPED);
e4a473f1
MD
1821 }
1822 goto validate;
1823 }
1824 /*
1825 * Mapping has changed, invalidate old range and fall through to
1826 * handle validating new mapping.
1827 */
5926987a 1828 while (opa) {
e4a473f1 1829 int err;
d5b116a0 1830 err = pmap_remove_pte(pmap, pte, va);
e4a473f1 1831 if (err)
d557216f 1832 panic("pmap_enter: pte vanished, va: %p", (void *)va);
5926987a
MD
1833 pte = pmap_pte(pmap, va);
1834 origpte = pmap_clean_pte(pte, pmap, va);
1835 opa = origpte & VPTE_FRAME;
1836 if (opa) {
1837 kprintf("pmap_enter: Warning, raced pmap %p va %p\n",
1838 pmap, (void *)va);
1839 }
e4a473f1
MD
1840 }
1841
1842 /*
1843 * Enter on the PV list if part of our managed memory. Note that we
1844 * raise IPL while manipulating pv_table since pmap_enter can be
1845 * called at interrupt time.
1846 */
1847 if (pmap_initialized &&
1848 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1849 pmap_insert_entry(pmap, va, mpte, m);
e7f2d7de 1850 pa |= VPTE_MANAGED;
17cde63e 1851 vm_page_flag_set(m, PG_MAPPED);
e4a473f1
MD
1852 }
1853
1854 /*
1855 * Increment counters
1856 */
eec2b734 1857 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1858 if (wired)
1859 pmap->pm_stats.wired_count++;
1860
1861validate:
1862 /*
1863 * Now validate mapping with desired protection/wiring.
1864 */
1865 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | VPTE_V);
1866
1867 if (wired)
e7f2d7de 1868 newpte |= VPTE_WIRED;
17cde63e
MD
1869 if (pmap != &kernel_pmap)
1870 newpte |= VPTE_U;
e4a473f1
MD
1871
1872 /*
d5b116a0
MD
1873 * If the mapping or permission bits are different from the
1874 * (now cleaned) original pte, an update is needed. We've
1875 * already downgraded or invalidated the page so all we have
1876 * to do now is update the bits.
1877 *
1878 * XXX should we synchronize RO->RW changes to avoid another
1879 * fault?
e4a473f1 1880 */
d5b116a0 1881 if ((origpte & ~(VPTE_W|VPTE_M|VPTE_A)) != newpte) {
e4a473f1 1882 *pte = newpte | VPTE_A;
17cde63e
MD
1883 if (newpte & VPTE_W)
1884 vm_page_flag_set(m, PG_WRITEABLE);
e4a473f1 1885 }
17cde63e 1886 KKASSERT((newpte & VPTE_MANAGED) == 0 || m->flags & PG_MAPPED);
5bce55a9 1887 lwkt_reltoken(&vm_token);
b12defdc 1888 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1889}
1890
1891/*
17cde63e 1892 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
e4a473f1 1893 *
17cde63e 1894 * Currently this routine may only be used on user pmaps, not kernel_pmap.
e4a473f1 1895 */
1b9d3514 1896void
17cde63e 1897pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
e4a473f1
MD
1898{
1899 vpte_t *pte;
1900 vm_paddr_t pa;
17cde63e 1901 vm_page_t mpte;
135d7199
MD
1902 unsigned ptepindex;
1903 vm_offset_t ptepa;
e4a473f1
MD
1904
1905 KKASSERT(pmap != &kernel_pmap);
e4a473f1
MD
1906
1907 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS);
1908
1909 /*
17cde63e
MD
1910 * Calculate pagetable page (mpte), allocating it if necessary.
1911 *
1912 * A held page table page (mpte), or NULL, is passed onto the
1913 * section following.
e4a473f1
MD
1914 */
1915 ptepindex = va >> PDRSHIFT;
17cde63e 1916
b12defdc 1917 vm_object_hold(pmap->pm_pteobj);
5bce55a9
MD
1918 lwkt_gettoken(&vm_token);
1919
17cde63e 1920 do {
e4a473f1
MD
1921 /*
1922 * Get the page directory entry
1923 */
1924 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1925
1926 /*
1927 * If the page table page is mapped, we just increment
1928 * the hold count, and activate it.
1929 */
1930 if (ptepa) {
1931 if (ptepa & VPTE_PS)
1932 panic("pmap_enter_quick: unexpected mapping into 4MB page");
1933 if (pmap->pm_ptphint &&
1934 (pmap->pm_ptphint->pindex == ptepindex)) {
1935 mpte = pmap->pm_ptphint;
1936 } else {
1937 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1938 pmap->pm_ptphint = mpte;
b12defdc 1939 vm_page_wakeup(mpte);
e4a473f1 1940 }
17cde63e
MD
1941 if (mpte)
1942 mpte->hold_count++;
e4a473f1
MD
1943 } else {
1944 mpte = _pmap_allocpte(pmap, ptepindex);
1945 }
17cde63e 1946 } while (mpte == NULL);
e4a473f1
MD
1947
1948 /*
1949 * Ok, now that the page table page has been validated, get the pte.
1950 * If the pte is already mapped undo mpte's hold_count and
1951 * just return.
1952 */
1953 pte = pmap_pte(pmap, va);
1954 if (*pte) {
17cde63e 1955 pmap_unwire_pte_hold(pmap, mpte);
5bce55a9 1956 lwkt_reltoken(&vm_token);
b12defdc 1957 vm_object_drop(pmap->pm_pteobj);
17cde63e 1958 return;
e4a473f1
MD
1959 }
1960
1961 /*
1962 * Enter on the PV list if part of our managed memory. Note that we
1963 * raise IPL while manipulating pv_table since pmap_enter can be
1964 * called at interrupt time.
1965 */
17cde63e 1966 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
e4a473f1 1967 pmap_insert_entry(pmap, va, mpte, m);
17cde63e
MD
1968 vm_page_flag_set(m, PG_MAPPED);
1969 }
e4a473f1
MD
1970
1971 /*
1972 * Increment counters
1973 */
eec2b734 1974 ++pmap->pm_stats.resident_count;
e4a473f1
MD
1975
1976 pa = VM_PAGE_TO_PHYS(m);
1977
1978 /*
1979 * Now validate mapping with RO protection
1980 */
1981 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
d5b116a0 1982 *pte = (vpte_t)pa | VPTE_V | VPTE_U;
e4a473f1 1983 else
d5b116a0 1984 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED;
17cde63e
MD
1985 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */
1986 /*pmap_inval_flush(&info); don't need for vkernel */
5bce55a9 1987 lwkt_reltoken(&vm_token);
b12defdc 1988 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
1989}
1990
e7f2d7de
MD
1991/*
1992 * Extract the physical address for the translation at the specified
1993 * virtual address in the pmap.
5bce55a9
MD
1994 *
1995 * The caller must hold vm_token if non-blocking operation is desired.
1996 * No requirements.
e7f2d7de 1997 */
6f7b98e0
MD
1998vm_paddr_t
1999pmap_extract(pmap_t pmap, vm_offset_t va)
2000{
2001 vm_paddr_t rtval;
2002 vpte_t pte;
2003
5bce55a9 2004 lwkt_gettoken(&vm_token);
6f7b98e0
MD
2005 if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) {
2006 if (pte & VPTE_PS) {
2007 rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1);
2008 rtval |= va & SEG_MASK;
2009 } else {
71152ac6 2010 pte = *get_ptbase(pmap, va);
6f7b98e0
MD
2011 rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK);
2012 }
5bce55a9
MD
2013 } else {
2014 rtval = 0;
6f7b98e0 2015 }
5bce55a9
MD
2016 lwkt_reltoken(&vm_token);
2017 return(rtval);
6f7b98e0
MD
2018}
2019
e4a473f1
MD
2020#define MAX_INIT_PT (96)
2021
2022/*
2023 * This routine preloads the ptes for a given object into the specified pmap.
2024 * This eliminates the blast of soft faults on process startup and
2025 * immediately after an mmap.
5bce55a9
MD
2026 *
2027 * No requirements.
e4a473f1
MD
2028 */
2029static int pmap_object_init_pt_callback(vm_page_t p, void *data);
2030
2031void
2032pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
2033 vm_object_t object, vm_pindex_t pindex,
2034 vm_size_t size, int limit)
2035{
2036 struct rb_vm_page_scan_info info;
287ebb09 2037 struct lwp *lp;
e4a473f1
MD
2038 int psize;
2039
2040 /*
2041 * We can't preinit if read access isn't set or there is no pmap
2042 * or object.
2043 */
2044 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
2045 return;
2046
2047 /*
2048 * We can't preinit if the pmap is not the current pmap
2049 */
287ebb09
MD
2050 lp = curthread->td_lwp;
2051 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace))
e4a473f1
MD
2052 return;
2053
2054 psize = size >> PAGE_SHIFT;
2055
2056 if ((object->type != OBJT_VNODE) ||
2057 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
2058 (object->resident_page_count > MAX_INIT_PT))) {
2059 return;
2060 }
2061
2062 if (psize + pindex > object->size) {
2063 if (object->size < pindex)
2064 return;
2065 psize = object->size - pindex;
2066 }
2067
2068 if (psize == 0)
2069 return;
2070
2071 /*
2072 * Use a red-black scan to traverse the requested range and load
2073 * any valid pages found into the pmap.
2074 *
2075 * We cannot safely scan the object's memq unless we are in a
2076 * critical section since interrupts can remove pages from objects.
2077 */
2078 info.start_pindex = pindex;
2079 info.end_pindex = pindex + psize - 1;
2080 info.limit = limit;
2081 info.mpte = NULL;
2082 info.addr = addr;
2083 info.pmap = pmap;
2084
b12defdc 2085 vm_object_hold(object);
e4a473f1
MD
2086 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
2087 pmap_object_init_pt_callback, &info);
b12defdc 2088 vm_object_drop(object);
e4a473f1
MD
2089}
2090
5bce55a9
MD
2091/*
2092 * The caller must hold vm_token.
2093 */
e4a473f1
MD
2094static
2095int
2096pmap_object_init_pt_callback(vm_page_t p, void *data)
2097{
2098 struct rb_vm_page_scan_info *info = data;
2099 vm_pindex_t rel_index;
b12defdc 2100
e4a473f1
MD
2101 /*
2102 * don't allow an madvise to blow away our really
2103 * free pages allocating pv entries.
2104 */
2105 if ((info->limit & MAP_PREFAULT_MADVISE) &&
2106 vmstats.v_free_count < vmstats.v_free_reserved) {
2107 return(-1);
2108 }
0d987a03
MD
2109
2110 /*
2111 * Ignore list markers and ignore pages we cannot instantly
2112 * busy (while holding the object token).
2113 */
2114 if (p->flags & PG_MARKER)
2115 return 0;
b12defdc
MD
2116 if (vm_page_busy_try(p, TRUE))
2117 return 0;
e4a473f1 2118 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
b12defdc 2119 (p->flags & PG_FICTITIOUS) == 0) {
e4a473f1
MD
2120 if ((p->queue - p->pc) == PQ_CACHE)
2121 vm_page_deactivate(p);
e4a473f1 2122 rel_index = p->pindex - info->start_pindex;
17cde63e
MD
2123 pmap_enter_quick(info->pmap,
2124 info->addr + i386_ptob(rel_index), p);
e4a473f1 2125 }
b12defdc 2126 vm_page_wakeup(p);
e4a473f1
MD
2127 return(0);
2128}
2129
2130/*
1b9d3514
MD
2131 * Return TRUE if the pmap is in shape to trivially
2132 * pre-fault the specified address.
2133 *
2134 * Returns FALSE if it would be non-trivial or if a
2135 * pte is already loaded into the slot.
5bce55a9
MD
2136 *
2137 * No requirements.
e4a473f1 2138 */
1b9d3514
MD
2139int
2140pmap_prefault_ok(pmap_t pmap, vm_offset_t addr)
e4a473f1 2141{
1b9d3514 2142 vpte_t *pte;
5bce55a9 2143 int ret;
e4a473f1 2144
5bce55a9
MD
2145 lwkt_gettoken(&vm_token);
2146 if ((*pmap_pde(pmap, addr)) == 0) {
2147 ret = 0;
2148 } else {
2149 pte = get_ptbase(pmap, addr);
2150 ret = (*pte) ? 0 : 1;
2151 }
2152 lwkt_reltoken(&vm_token);
2153 return (ret);
e4a473f1
MD
2154}
2155
2156/*
5bce55a9
MD
2157 * Change the wiring attribute for a map/virtual-address pair.
2158 * The mapping must already exist in the pmap.
2159 *
2160 * No other requirements.
e4a473f1
MD
2161 */
2162void
921c891e
MD
2163pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired,
2164 vm_map_entry_t entry __unused)
e4a473f1
MD
2165{
2166 vpte_t *pte;
2167
2168 if (pmap == NULL)
2169 return;
2170
5bce55a9 2171 lwkt_gettoken(&vm_token);
71152ac6 2172 pte = get_ptbase(pmap, va);
e4a473f1 2173
e7f2d7de
MD
2174 if (wired && (*pte & VPTE_WIRED) == 0)
2175 ++pmap->pm_stats.wired_count;
2176 else if (!wired && (*pte & VPTE_WIRED))
2177 --pmap->pm_stats.wired_count;
2178 KKASSERT(pmap->pm_stats.wired_count >= 0);
e4a473f1
MD
2179
2180 /*
2181 * Wiring is not a hardware characteristic so there is no need to
2182 * invalidate TLB. However, in an SMP environment we must use
2183 * a locked bus cycle to update the pte (if we are not using
2184 * the pmap_inval_*() API that is)... it's ok to do this for simple
2185 * wiring changes.
2186 */
e4a473f1 2187 if (wired)
8608b858 2188 atomic_set_long(pte, VPTE_WIRED);
e4a473f1 2189 else
8608b858 2190 atomic_clear_long(pte, VPTE_WIRED);
5bce55a9 2191 lwkt_reltoken(&vm_token);
e4a473f1
MD
2192}
2193
2194/*
2195 * Copy the range specified by src_addr/len
2196 * from the source map to the range dst_addr/len
2197 * in the destination map.
2198 *
2199 * This routine is only advisory and need not do anything.
2200 */
2201void
2202pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2203 vm_size_t len, vm_offset_t src_addr)
2204{
e4a473f1
MD
2205 vm_offset_t addr;
2206 vm_offset_t end_addr = src_addr + len;
2207 vm_offset_t pdnxt;
2208 vpte_t *src_frame;
2209 vpte_t *dst_frame;
2210 vm_page_t m;
2211
17cde63e
MD
2212 /*
2213 * XXX BUGGY. Amoung other things srcmpte is assumed to remain
2214 * valid through blocking calls, and that's just not going to
2215 * be the case.
2216 *
2217 * FIXME!
2218 */
2219 return;
2220
e4a473f1
MD
2221 if (dst_addr != src_addr)
2222 return;
2223 if (dst_pmap->pm_pdir == NULL)
2224 return;
2225 if (src_pmap->pm_pdir == NULL)
2226 return;
2227
b12defdc 2228 lwkt_gettoken(&vm_token);
eec2b734 2229
71152ac6
MD
2230 src_frame = get_ptbase1(src_pmap, src_addr);
2231 dst_frame = get_ptbase2(dst_pmap, src_addr);
e4a473f1 2232
e4a473f1
MD
2233 /*
2234 * critical section protection is required to maintain the page/object
2235 * association, interrupts can free pages and remove them from
2236 * their objects.
2237 */
e4a473f1
MD
2238 for (addr = src_addr; addr < end_addr; addr = pdnxt) {
2239 vpte_t *src_pte, *dst_pte;
2240 vm_page_t dstmpte, srcmpte;
2241 vm_offset_t srcptepaddr;
2242 unsigned ptepindex;
2243
2244 if (addr >= VM_MAX_USER_ADDRESS)
ed20d0e3 2245 panic("pmap_copy: invalid to pmap_copy page tables");
e4a473f1
MD
2246
2247 /*
2248 * Don't let optional prefaulting of pages make us go
2249 * way below the low water mark of free pages or way
2250 * above high water mark of used pv entries.
2251 */
2252 if (vmstats.v_free_count < vmstats.v_free_reserved ||
2253 pv_entry_count > pv_entry_high_water)
2254 break;
2255
2256 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2257 ptepindex = addr >> PDRSHIFT;
2258
2259 srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2260 if (srcptepaddr == 0)
2261 continue;
2262
2263 if (srcptepaddr & VPTE_PS) {
2264 if (dst_pmap->pm_pdir[ptepindex] == 0) {
8608b858 2265 dst_pmap->pm_pdir[ptepindex] = (vpte_t)srcptepaddr;
e4a473f1
MD
2266 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
2267 }
2268 continue;
2269 }
2270
2271 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
17cde63e
MD
2272 if ((srcmpte == NULL) || (srcmpte->hold_count == 0) ||
2273 (srcmpte->flags & PG_BUSY)) {
e4a473f1 2274 continue;
17cde63e 2275 }
e4a473f1
MD
2276
2277 if (pdnxt > end_addr)
2278 pdnxt = end_addr;
2279
71152ac6
MD
2280 src_pte = src_frame + ((addr - src_addr) >> PAGE_SHIFT);
2281 dst_pte = dst_frame + ((addr - src_addr) >> PAGE_SHIFT);
e4a473f1
MD
2282 while (addr < pdnxt) {
2283 vpte_t ptetemp;
17cde63e 2284
e4a473f1
MD
2285 ptetemp = *src_pte;
2286 /*
2287 * we only virtual copy managed pages
2288 */
e7f2d7de 2289 if ((ptetemp & VPTE_MANAGED) != 0) {
e4a473f1
MD
2290 /*
2291 * We have to check after allocpte for the
2292 * pte still being around... allocpte can
2293 * block.
eec2b734
MD
2294 *
2295 * pmap_allocpte can block, unfortunately
2296 * we have to reload the tables.
e4a473f1
MD
2297 */
2298 dstmpte = pmap_allocpte(dst_pmap, addr);
eec2b734
MD
2299 src_frame = get_ptbase1(src_pmap, src_addr);
2300 dst_frame = get_ptbase2(dst_pmap, src_addr);
2301
17cde63e
MD
2302 if ((*dst_pte == 0) && (ptetemp = *src_pte) &&
2303 (ptetemp & VPTE_MANAGED) != 0) {
e4a473f1 2304 /*
70fc5283
MD
2305 * Clear the modified and accessed
2306 * (referenced) bits during the copy.
d6c96d4d 2307 *
70fc5283
MD
2308 * We do not have to clear the write
2309 * bit to force a fault-on-modify
2310 * because the real kernel's target
2311 * pmap is empty and will fault anyway.
e4a473f1
MD
2312 */
2313 m = PHYS_TO_VM_PAGE(ptetemp);
70fc5283 2314 *dst_pte = ptetemp & ~(VPTE_M | VPTE_A);
eec2b734 2315 ++dst_pmap->pm_stats.resident_count;
e4a473f1
MD
2316 pmap_insert_entry(dst_pmap, addr,
2317 dstmpte, m);
17cde63e 2318 KKASSERT(m->flags & PG_MAPPED);
e4a473f1 2319 } else {
d5b116a0 2320 pmap_unwire_pte_hold(dst_pmap, dstmpte);
e4a473f1
MD
2321 }
2322 if (dstmpte->hold_count >= srcmpte->hold_count)
2323 break;
2324 }
2325 addr += PAGE_SIZE;
2326 src_pte++;
2327 dst_pte++;
2328 }
2329 }
b12defdc 2330 lwkt_reltoken(&vm_token);
e4a473f1
MD
2331}
2332
2333/*
2334 * pmap_zero_page:
2335 *
2336 * Zero the specified PA by mapping the page into KVM and clearing its
2337 * contents.
2338 *
2339 * This function may be called from an interrupt and no locking is
2340 * required.
2341 */
2342void
2343pmap_zero_page(vm_paddr_t phys)
2344{
2345 struct mdglobaldata *gd = mdcpu;
2346
2347 crit_enter();
71152ac6 2348 if (*gd->gd_CMAP3)
e4a473f1 2349 panic("pmap_zero_page: CMAP3 busy");
a3c35df6 2350 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2351 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2352
2353 bzero(gd->gd_CADDR3, PAGE_SIZE);
71152ac6 2354 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2355 crit_exit();
2356}
2357
2358/*
2359 * pmap_page_assertzero:
2360 *
2361 * Assert that a page is empty, panic if it isn't.
2362 */
2363void
2364pmap_page_assertzero(vm_paddr_t phys)
2365{
2366 struct mdglobaldata *gd = mdcpu;
2367 int i;
2368
2369 crit_enter();
71152ac6 2370 if (*gd->gd_CMAP3)
e4a473f1 2371 panic("pmap_zero_page: CMAP3 busy");
71152ac6
MD
2372 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W |
2373 (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2374 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2375 for (i = 0; i < PAGE_SIZE; i += 4) {
2376 if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
ed20d0e3 2377 panic("pmap_page_assertzero() @ %p not zero!",
e4a473f1
MD
2378 (void *)gd->gd_CADDR3);
2379 }
2380 }
71152ac6 2381 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2382 crit_exit();
2383}
2384
2385/*
2386 * pmap_zero_page:
2387 *
2388 * Zero part of a physical page by mapping it into memory and clearing
2389 * its contents with bzero.
2390 *
2391 * off and size may not cover an area beyond a single hardware page.
2392 */
2393void
2394pmap_zero_page_area(vm_paddr_t phys, int off, int size)
2395{
2396 struct mdglobaldata *gd = mdcpu;
2397
2398 crit_enter();
71152ac6 2399 if (*gd->gd_CMAP3)
e4a473f1 2400 panic("pmap_zero_page: CMAP3 busy");
71152ac6
MD
2401 *gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W |
2402 (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
6f7b98e0 2403 madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2404
2405 bzero((char *)gd->gd_CADDR3 + off, size);
71152ac6 2406 *gd->gd_CMAP3 = 0;
e4a473f1
MD
2407 crit_exit();
2408}
2409
2410/*
2411 * pmap_copy_page:
2412 *
2413 * Copy the physical page from the source PA to the target PA.
2414 * This function may be called from an interrupt. No locking
2415 * is required.
2416 */
2417void
2418pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
2419{
2420 struct mdglobaldata *gd = mdcpu;
2421
2422 crit_enter();
2423 if (*(int *) gd->gd_CMAP1)
2424 panic("pmap_copy_page: CMAP1 busy");
2425 if (*(int *) gd->gd_CMAP2)
2426 panic("pmap_copy_page: CMAP2 busy");
2427
4e7c41c5 2428 *(int *) gd->gd_CMAP1 = VPTE_V | VPTE_R | (src & PG_FRAME) | VPTE_A;
e4a473f1
MD
2429 *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
2430
6f7b98e0
MD
2431 madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
2432 madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2433
2434 bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
2435
2436 *(int *) gd->gd_CMAP1 = 0;
2437 *(int *) gd->gd_CMAP2 = 0;
2438 crit_exit();
2439}
2440
2441/*
2442 * pmap_copy_page_frag:
2443 *
2444 * Copy the physical page from the source PA to the target PA.
2445 * This function may be called from an interrupt. No locking
2446 * is required.
2447 */
2448void
2449pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
2450{
2451 struct mdglobaldata *gd = mdcpu;
2452
2453 crit_enter();
2454 if (*(int *) gd->gd_CMAP1)
2455 panic("pmap_copy_page: CMAP1 busy");
2456 if (*(int *) gd->gd_CMAP2)
2457 panic("pmap_copy_page: CMAP2 busy");
2458
2459 *(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A;
2460 *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
2461
6f7b98e0
MD
2462 madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
2463 madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
e4a473f1
MD
2464
2465 bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
2466 (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
2467 bytes);
2468
2469 *(int *) gd->gd_CMAP1 = 0;
2470 *(int *) gd->gd_CMAP2 = 0;
2471 crit_exit();
2472}
2473
2474/*
2475 * Returns true if the pmap's pv is one of the first
2476 * 16 pvs linked to from this page. This count may
2477 * be changed upwards or downwards in the future; it
2478 * is only necessary that true be returned for a small
2479 * subset of pmaps for proper page aging.
5bce55a9
MD
2480 *
2481 * No requirements.
e4a473f1
MD
2482 */
2483boolean_t
2484pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2485{
2486 pv_entry_t pv;
2487 int loops = 0;
2488
2489 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2490 return FALSE;
2491
2492 crit_enter();
5bce55a9 2493 lwkt_gettoken(&vm_token);
e4a473f1
MD
2494
2495 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2496 if (pv->pv_pmap == pmap) {
5bce55a9 2497 lwkt_reltoken(&vm_token);
e4a473f1
MD
2498 crit_exit();
2499 return TRUE;
2500 }
2501 loops++;
2502 if (loops >= 16)
2503 break;
2504 }
5bce55a9 2505 lwkt_reltoken(&vm_token);
e4a473f1
MD
2506 crit_exit();
2507 return (FALSE);
2508}
2509
2510/*
2511 * Remove all pages from specified address space
2512 * this aids process exit speeds. Also, this code
2513 * is special cased for current process only, but
2514 * can have the more generic (and slightly slower)
2515 * mode enabled. This is much faster than pmap_remove
2516 * in the case of running down an entire address space.
5bce55a9
MD
2517 *
2518 * No requirements.
e4a473f1
MD
2519 */
2520void
2521pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2522{
2523 vpte_t *pte, tpte;
2524 pv_entry_t pv, npv;
2525 vm_page_t m;
8790d7d8 2526 int32_t save_generation;
e4a473f1 2527
b12defdc
MD
2528 if (pmap->pm_pteobj)
2529 vm_object_hold(pmap->pm_pteobj);
5bce55a9 2530 lwkt_gettoken(&vm_token);
e4a473f1
MD
2531 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2532 if (pv->pv_va >= eva || pv->pv_va < sva) {
2533 npv = TAILQ_NEXT(pv, pv_plist);
2534 continue;
2535 }
2536
8790d7d8
MD
2537 KKASSERT(pmap == pv->pv_pmap);
2538
2539 pte = pmap_pte(pmap, pv->pv_va);
e4a473f1
MD
2540
2541 /*
2542 * We cannot remove wired pages from a process' mapping
2543 * at this time
2544 */
d5b116a0 2545 if (*pte & VPTE_WIRED) {
e4a473f1
MD
2546 npv = TAILQ_NEXT(pv, pv_plist);
2547 continue;
2548 }
d5b116a0 2549 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va);
e4a473f1
MD
2550
2551 m = PHYS_TO_VM_PAGE(tpte);
2552
2553 KASSERT(m < &vm_page_array[vm_page_array_size],
8608b858 2554 ("pmap_remove_pages: bad tpte %lx", tpte));
e4a473f1 2555
eec2b734
MD
2556 KKASSERT(pmap->pm_stats.resident_count > 0);
2557 --pmap->pm_stats.resident_count;
e4a473f1
MD
2558
2559 /*
2560 * Update the vm_page_t clean and reference bits.
2561 */
2562 if (tpte & VPTE_M) {
2563 vm_page_dirty(m);
2564 }
2565
e4a473f1 2566 npv = TAILQ_NEXT(pv, pv_plist);
8790d7d8
MD
2567 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2568 save_generation = ++pmap->pm_generation;
e4a473f1
MD
2569
2570 m->md.pv_list_count--;
b12defdc 2571 atomic_add_int(&m->object->agg_pv_list_count, -1);
e4a473f1 2572 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
17cde63e 2573 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
e4a473f1 2574 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
e4a473f1 2575
d5b116a0 2576 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
e4a473f1 2577 free_pv_entry(pv);
8790d7d8
MD
2578
2579 /*
2580 * Restart the scan if we blocked during the unuse or free
2581 * calls and other removals were made.
2582 */
2583 if (save_generation != pmap->pm_generation) {
2584 kprintf("Warning: pmap_remove_pages race-A avoided\n");
cd2a0876 2585 npv = TAILQ_FIRST(&pmap->pm_pvlist);
8790d7d8 2586 }
e4a473f1 2587 }
5bce55a9 2588 lwkt_reltoken(&vm_token);
b12defdc
MD
2589 if (pmap->pm_pteobj)
2590 vm_object_drop(pmap->pm_pteobj);
e4a473f1
MD
2591}
2592
2593/*
d5b116a0 2594 * pmap_testbit tests bits in active mappings of a VM page.
5bce55a9
MD
2595 *
2596 * The caller must hold vm_token
e4a473f1
MD
2597 */
2598static boolean_t
2599pmap_testbit(vm_page_t m, int bit)
2600{
2601 pv_entry_t pv;
2602 vpte_t *pte;
2603
2604 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2605 return FALSE;
2606
2607 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2608 return FALSE;
2609
2610 crit_enter();
2611
2612 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2613 /*
2614 * if the bit being tested is the modified bit, then
2615 * mark clean_map and ptes as never
2616 * modified.
2617 */
2618 if (bit & (VPTE_A|VPTE_M)) {
d6c96d4d 2619 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2620 continue;
2621 }
2622
2623#if defined(PMAP_DIAGNOSTIC)
2624 if (!pv->pv_pmap) {
2625 kprintf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2626 continue;
2627 }
2628#endif
2629 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2630 if (*pte & bit) {
2631 crit_exit();
2632 return TRUE;
2633 }
2634 }
2635 crit_exit();
2636 return (FALSE);
2637}
2638
2639/*
70fc5283
MD
2640 * This routine is used to clear bits in ptes. Certain bits require special
2641 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit.
d5b116a0
MD
2642 *
2643 * This routine is only called with certain VPTE_* bit combinations.
5bce55a9
MD
2644 *
2645 * The caller must hold vm_token
e4a473f1
MD
2646 */
2647static __inline void
d6c96d4d 2648pmap_clearbit(vm_page_t m, int bit)
e4a473f1 2649{
e4a473f1
MD
2650 pv_entry_t pv;
2651 vpte_t *pte;
d6c96d4d 2652 vpte_t pbits;
e4a473f1
MD
2653
2654 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2655 return;
2656
e4a473f1
MD
2657 crit_enter();
2658
2659 /*
2660 * Loop over all current mappings setting/clearing as appropos If
2661 * setting RO do we need to clear the VAC?
2662 */
2663 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2664 /*
2665 * don't write protect pager mappings
2666 */
d6c96d4d
MD
2667 if (bit == VPTE_W) {
2668 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2669 continue;
2670 }
2671
2672#if defined(PMAP_DIAGNOSTIC)
2673 if (!pv->pv_pmap) {
2674 kprintf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2675 continue;
2676 }
2677#endif
2678
2679 /*
2680 * Careful here. We can use a locked bus instruction to
2681 * clear VPTE_A or VPTE_M safely but we need to synchronize
2682 * with the target cpus when we mess with VPTE_W.
d6c96d4d 2683 *
70fc5283
MD
2684 * On virtual kernels we must force a new fault-on-write
2685 * in the real kernel if we clear the Modify bit ourselves,
2686 * otherwise the real kernel will not get a new fault and
2687 * will never set our Modify bit again.
e4a473f1
MD
2688 */
2689 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
d5b116a0 2690 if (*pte & bit) {
d6c96d4d 2691 if (bit == VPTE_W) {
d5b116a0
MD
2692 /*
2693 * We must also clear VPTE_M when clearing
2694 * VPTE_W
2695 */
2696 pbits = pmap_clean_pte(pte, pv->pv_pmap,
2697 pv->pv_va);
2698 if (pbits & VPTE_M)
d6c96d4d 2699 vm_page_dirty(m);
d6c96d4d
MD
2700 } else if (bit == VPTE_M) {
2701 /*
70fc5283
MD
2702 * We do not have to make the page read-only
2703 * when clearing the Modify bit. The real
2704 * kernel will make the real PTE read-only
2705 * or otherwise detect the write and set
2706 * our VPTE_M again simply by us invalidating
2707 * the real kernel VA for the pmap (as we did
2708 * above). This allows the real kernel to
2709 * handle the write fault without forwarding
2710 * the fault to us.
d6c96d4d 2711 */
8608b858 2712 atomic_clear_long(pte, VPTE_M);
d5b116a0
MD
2713 } else if ((bit & (VPTE_W|VPTE_M)) == (VPTE_W|VPTE_M)) {
2714 /*
2715 * We've been asked to clear W & M, I guess
2716 * the caller doesn't want us to update
2717 * the dirty status of the VM page.
2718 */
2719 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va);
d6c96d4d 2720 } else {
d5b116a0
MD
2721 /*
2722 * We've been asked to clear bits that do
2723 * not interact with hardware.
2724 */
8608b858 2725 atomic_clear_long(pte, bit);
e4a473f1
MD
2726 }
2727 }
2728 }
e4a473f1
MD
2729 crit_exit();
2730}
2731
2732/*
5bce55a9 2733 * Lower the permission for all mappings to a given page.
e4a473f1 2734 *
5bce55a9 2735 * No requirements.
e4a473f1
MD
2736 */
2737void
2738pmap_page_protect(vm_page_t m, vm_prot_t prot)
2739{
2740 if ((prot & VM_PROT_WRITE) == 0) {
5bce55a9 2741 lwkt_gettoken(&vm_token);
e4a473f1 2742 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
d6c96d4d 2743 pmap_clearbit(m, VPTE_W);
17cde63e 2744 vm_page_flag_clear(m, PG_WRITEABLE);
e4a473f1
MD
2745 } else {
2746 pmap_remove_all(m);
2747 }
5bce55a9 2748 lwkt_reltoken(&vm_token);
e4a473f1
MD
2749 }
2750}
2751
2752vm_paddr_t
cfd17028 2753pmap_phys_address(vm_pindex_t ppn)
e4a473f1
MD
2754{
2755 return (i386_ptob(ppn));
2756}
2757
2758/*
5bce55a9
MD
2759 * Return a count of reference bits for a page, clearing those bits.
2760 * It is not necessary for every reference bit to be cleared, but it
2761 * is necessary that 0 only be returned when there are truly no
2762 * reference bits set.
e4a473f1 2763 *
5bce55a9
MD
2764 * XXX: The exact number of bits to check and clear is a matter that
2765 * should be tested and standardized at some point in the future for
2766 * optimal aging of shared pages.
e4a473f1 2767 *
5bce55a9 2768 * No requirements.
e4a473f1
MD
2769 */
2770int
2771pmap_ts_referenced(vm_page_t m)
2772{
2773 pv_entry_t pv, pvf, pvn;
2774 vpte_t *pte;
2775 int rtval = 0;
2776
2777 if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2778 return (rtval);
2779
2780 crit_enter();
5bce55a9 2781 lwkt_gettoken(&vm_token);
e4a473f1
MD
2782
2783 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2784
2785 pvf = pv;
2786
2787 do {
2788 pvn = TAILQ_NEXT(pv, pv_list);
2789
2790 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2791
2792 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2793
d6c96d4d 2794 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
e4a473f1
MD
2795 continue;
2796
2797 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2798
2799 if (pte && (*pte & VPTE_A)) {
8608b858 2800 atomic_clear_long(pte, VPTE_A);
e4a473f1
MD
2801 rtval++;
2802 if (rtval > 4) {
2803 break;
2804 }
2805 }
2806 } while ((pv = pvn) != NULL && pv != pvf);
2807 }
5bce55a9 2808 lwkt_reltoken(&vm_token);
e4a473f1
MD
2809 crit_exit();
2810
2811 return (rtval);
2812}
2813
2814/*
5bce55a9
MD
2815 * Return whether or not the specified physical page was modified
2816 * in any physical maps.
e4a473f1 2817 *
5bce55a9 2818 * No requirements.
e4a473f1
MD
2819 */
2820boolean_t
2821pmap_is_modified(vm_page_t m)
2822{
5bce55a9
MD
2823 boolean_t res;
2824
2825 lwkt_gettoken(&vm_token);
2826 res = pmap_testbit(m, VPTE_M);
2827 lwkt_reltoken(&vm_token);
2828 return (res);
e4a473f1
MD
2829}
2830
2831/*
5bce55a9
MD
2832 * Clear the modify bits on the specified physical page.
2833 *
2834 * No requirements.
e4a473f1
MD
2835 */
2836void
2837pmap_clear_modify(vm_page_t m)
2838{
5bce55a9 2839 lwkt_gettoken(&vm_token);
d6c96d4d 2840 pmap_clearbit(m, VPTE_M);
5bce55a9 2841 lwkt_reltoken(&vm_token);
e4a473f1
MD
2842}
2843
2844/*
5bce55a9 2845 * Clear the reference bit on the specified physical page.
e4a473f1 2846 *
5bce55a9 2847 * No requirements.
e4a473f1
MD
2848 */
2849void
2850pmap_clear_reference(vm_page_t m)
2851{
5bce55a9 2852 lwkt_gettoken(&vm_token);
d6c96d4d 2853 pmap_clearbit(m, VPTE_A);
5bce55a9 2854 lwkt_reltoken(&vm_token);
e4a473f1
MD
2855}
2856
2857/*
2858 * Miscellaneous support routines follow
2859 */
2860
2861static void
2862i386_protection_init(void)
2863{
2864 int *kp, prot;
2865
2866 kp = protection_codes;
2867 for (prot = 0; prot < 8; prot++) {
2868 if (prot & VM_PROT_READ)
2869 *kp |= VPTE_R;
2870 if (prot & VM_PROT_WRITE)
2871 *kp |= VPTE_W;
2872 if (prot & VM_PROT_EXECUTE)
2873 *kp |= VPTE_X;
2874 ++kp;
2875 }
2876}
2877
d5b116a0
MD
2878#if 0
2879
e4a473f1
MD
2880/*
2881 * Map a set of physical memory pages into the kernel virtual
2882 * address space. Return a pointer to where it is mapped. This
2883 * routine is intended to be used for mapping device memory,
2884 * NOT real memory.
2885 *
2886 * NOTE: we can't use pgeflag unless we invalidate the pages one at
2887 * a time.
2888 */
2889void *
2890pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2891{
2892 vm_offset_t va, tmpva, offset;
2893 vpte_t *pte;
2894
2895 offset = pa & PAGE_MASK;
2896 size = roundup(offset + size, PAGE_SIZE);
2897
9388fcaa 2898 va = kmem_alloc_nofault(&kernel_map, size, PAGE_SIZE);
e4a473f1
MD
2899 if (!va)
2900 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2901
2902 pa = pa & VPTE_FRAME;
2903 for (tmpva = va; size > 0;) {
2904 pte = KernelPTA + (tmpva >> PAGE_SHIFT);
2905 *pte = pa | VPTE_R | VPTE_W | VPTE_V; /* | pgeflag; */
2906 size -= PAGE_SIZE;
2907 tmpva += PAGE_SIZE;
2908 pa += PAGE_SIZE;
2909 }
2910 cpu_invltlb();
2911 smp_invltlb();
2912
2913 return ((void *)(va + offset));
2914}
2915
2916void
2917pmap_unmapdev(vm_offset_t va, vm_size_t size)
2918{
2919 vm_offset_t base, offset;
2920
2921 base = va & VPTE_FRAME;
2922 offset = va & PAGE_MASK;
2923 size = roundup(offset + size, PAGE_SIZE);
2924 pmap_qremove(va, size >> PAGE_SHIFT);
2925 kmem_free(&kernel_map, base, size);
2926}
2927
d5b116a0
MD
2928#endif
2929
e4a473f1 2930/*
5bce55a9
MD
2931 * Perform the pmap work for mincore
2932 *
2933 * No requirements.
e4a473f1
MD
2934 */
2935int
2936pmap_mincore(pmap_t pmap, vm_offset_t addr)
2937{
2938 vpte_t *ptep, pte;
2939 vm_page_t m;
2940 int val = 0;
5bce55a9
MD
2941
2942 lwkt_gettoken(&vm_token);
e4a473f1
MD
2943
2944 ptep = pmap_pte(pmap, addr);
4090d6ff 2945 if (ptep == NULL) {
5bce55a9 2946 lwkt_reltoken(&vm_token);
e4a473f1
MD
2947 return 0;
2948 }
2949
2950 if ((pte = *ptep) != 0) {
8608b858 2951 vm_paddr_t pa;
e4a473f1
MD
2952
2953 val = MINCORE_INCORE;
2954 if ((pte & VPTE_MANAGED) == 0)
5bce55a9 2955 goto done;
e4a473f1
MD
2956
2957 pa = pte & VPTE_FRAME;
2958
2959 m = PHYS_TO_VM_PAGE(pa);
2960
2961 /*
2962 * Modified by us
2963 */
2964 if (pte & VPTE_M)
2965 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2966 /*
2967 * Modified by someone
2968 */
2969 else if (m->dirty || pmap_is_modified(m))
2970 val |= MINCORE_MODIFIED_OTHER;
2971 /*
2972 * Referenced by us
2973 */
2974 if (pte & VPTE_A)
2975 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2976
2977 /*
2978 * Referenced by someone
2979 */
2980 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
2981 val |= MINCORE_REFERENCED_OTHER;
2982 vm_page_flag_set(m, PG_REFERENCED);
2983 }
2984 }
5bce55a9
MD
2985done:
2986 lwkt_reltoken(&vm_token);
e4a473f1
MD
2987 return val;
2988}
2989
b12defdc
MD
2990/*
2991 * Caller must hold vmspace->vm_map.token for oldvm and newvm
2992 */
e4a473f1 2993void
e3161323 2994pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
e4a473f1 2995{
e3161323 2996 struct vmspace *oldvm;
287ebb09 2997 struct lwp *lp;
e4a473f1 2998
e3161323 2999 oldvm = p->p_vmspace;
287ebb09 3000 crit_enter();
e3161323 3001 if (oldvm != newvm) {
e3161323 3002 p->p_vmspace = newvm;
287ebb09 3003 KKASSERT(p->p_nthreads == 1);
3e291793 3004 lp = RB_ROOT(&p->p_lwp_tree);
287ebb09
MD
3005 pmap_setlwpvm(lp, newvm);
3006 if (adjrefs) {
3007 sysref_get(&newvm->vm_sysref);
3008 sysref_put(&oldvm->vm_sysref);
3009 }
3010 }
3011 crit_exit();
3012}
3013
3014void
3015pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
3016{
3017 struct vmspace *oldvm;
3018 struct pmap *pmap;
3019
3020 crit_enter();
3021 oldvm = lp->lwp_vmspace;
3022
3023 if (oldvm != newvm) {
3024 lp->lwp_vmspace = newvm;
3025 if (curthread->td_lwp == lp) {
e3161323 3026 pmap = vmspace_pmap(newvm);
da23a592 3027 atomic_set_cpumask(&pmap->pm_active, mycpu->gd_cpumask);
e4a473f1 3028#if defined(SWTCH_OPTIM_STATS)
e3161323 3029 tlb_flush_count++;
6f7b98e0 3030#endif
e3161323 3031 pmap = vmspace_pmap(oldvm);
da23a592 3032 atomic_clear_cpumask(&pmap->pm_active, mycpu->gd_cpumask);
e3161323 3033 }
e3161323
MD
3034 }
3035 crit_exit();
e4a473f1
MD
3036}
3037
287ebb09 3038
e4a473f1
MD
3039vm_offset_t
3040pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
3041{
3042
3043 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3044 return addr;
3045 }
3046
3047 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3048 return addr;
3049}
3050
722871d3
MD
3051/*
3052 * Used by kmalloc/kfree, page already exists at va
3053 */
3054vm_page_t
3055pmap_kvtom(vm_offset_t va)
3056{
3057 vpte_t *ptep;
3058
3059 KKASSERT(va >= KvaStart && va < KvaEnd);
3060 ptep = KernelPTA + (va >> PAGE_SHIFT);
3061 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME));
3062}
921c891e
MD
3063
3064void
3065pmap_object_init(vm_object_t object)
3066{
3067 /* empty */
3068}
3069
3070void
3071pmap_object_free(vm_object_t object)
3072{
3073 /* empty */
3074}