Commit | Line | Data |
---|---|---|
984263bc | 1 | /* |
4107b0c0 MD |
2 | * (MPSAFE) |
3 | * | |
984263bc MD |
4 | * Copyright (c) 1991 Regents of the University of California. |
5 | * All rights reserved. | |
6 | * Copyright (c) 1994 John S. Dyson | |
7 | * All rights reserved. | |
8 | * Copyright (c) 1994 David Greenman | |
9 | * All rights reserved. | |
10 | * | |
11 | * This code is derived from software contributed to Berkeley by | |
12 | * the Systems Programming Group of the University of Utah Computer | |
13 | * Science Department and William Jolitz of UUNET Technologies Inc. | |
14 | * | |
15 | * Redistribution and use in source and binary forms, with or without | |
16 | * modification, are permitted provided that the following conditions | |
17 | * are met: | |
18 | * 1. Redistributions of source code must retain the above copyright | |
19 | * notice, this list of conditions and the following disclaimer. | |
20 | * 2. Redistributions in binary form must reproduce the above copyright | |
21 | * notice, this list of conditions and the following disclaimer in the | |
22 | * documentation and/or other materials provided with the distribution. | |
23 | * 3. All advertising materials mentioning features or use of this software | |
24 | * must display the following acknowledgement: | |
25 | * This product includes software developed by the University of | |
26 | * California, Berkeley and its contributors. | |
27 | * 4. Neither the name of the University nor the names of its contributors | |
28 | * may be used to endorse or promote products derived from this software | |
29 | * without specific prior written permission. | |
30 | * | |
31 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
32 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
33 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
34 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
35 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
36 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
37 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
38 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
39 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
40 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
41 | * SUCH DAMAGE. | |
42 | * | |
43 | * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 | |
44 | * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ | |
45 | */ | |
46 | ||
47 | /* | |
4107b0c0 | 48 | * Manages physical address maps. |
984263bc | 49 | * |
b12defdc | 50 | * In most cases we hold page table pages busy in order to manipulate them. |
984263bc | 51 | */ |
5926987a MD |
52 | /* |
53 | * PMAP_DEBUG - see platform/pc32/include/pmap.h | |
54 | */ | |
984263bc MD |
55 | |
56 | #include "opt_disable_pse.h" | |
57 | #include "opt_pmap.h" | |
58 | #include "opt_msgbuf.h" | |
984263bc MD |
59 | |
60 | #include <sys/param.h> | |
61 | #include <sys/systm.h> | |
62 | #include <sys/kernel.h> | |
63 | #include <sys/proc.h> | |
64 | #include <sys/msgbuf.h> | |
65 | #include <sys/vmmeter.h> | |
66 | #include <sys/mman.h> | |
b12defdc | 67 | #include <sys/thread.h> |
984263bc MD |
68 | |
69 | #include <vm/vm.h> | |
70 | #include <vm/vm_param.h> | |
71 | #include <sys/sysctl.h> | |
72 | #include <sys/lock.h> | |
73 | #include <vm/vm_kern.h> | |
74 | #include <vm/vm_page.h> | |
75 | #include <vm/vm_map.h> | |
76 | #include <vm/vm_object.h> | |
77 | #include <vm/vm_extern.h> | |
78 | #include <vm/vm_pageout.h> | |
79 | #include <vm/vm_pager.h> | |
80 | #include <vm/vm_zone.h> | |
81 | ||
82 | #include <sys/user.h> | |
e0e69b7d | 83 | #include <sys/thread2.h> |
e3161323 | 84 | #include <sys/sysref2.h> |
b12defdc | 85 | #include <sys/spinlock2.h> |
90244566 | 86 | #include <vm/vm_page2.h> |
984263bc MD |
87 | |
88 | #include <machine/cputypes.h> | |
89 | #include <machine/md_var.h> | |
90 | #include <machine/specialreg.h> | |
984263bc | 91 | #include <machine/smp.h> |
a9295349 | 92 | #include <machine_base/apic/apicreg.h> |
85100692 | 93 | #include <machine/globaldata.h> |
0f7a3396 MD |
94 | #include <machine/pmap.h> |
95 | #include <machine/pmap_inval.h> | |
984263bc MD |
96 | |
97 | #define PMAP_KEEP_PDIRS | |
98 | #ifndef PMAP_SHPGPERPROC | |
99 | #define PMAP_SHPGPERPROC 200 | |
948209ce | 100 | #define PMAP_PVLIMIT 1400000 /* i386 kvm problems */ |
984263bc MD |
101 | #endif |
102 | ||
103 | #if defined(DIAGNOSTIC) | |
104 | #define PMAP_DIAGNOSTIC | |
105 | #endif | |
106 | ||
107 | #define MINPV 2048 | |
108 | ||
109 | #if !defined(PMAP_DIAGNOSTIC) | |
110 | #define PMAP_INLINE __inline | |
111 | #else | |
112 | #define PMAP_INLINE | |
113 | #endif | |
114 | ||
115 | /* | |
116 | * Get PDEs and PTEs for user/kernel address space | |
117 | */ | |
118 | #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) | |
119 | #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) | |
120 | ||
121 | #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) | |
122 | #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) | |
123 | #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) | |
124 | #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) | |
125 | #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) | |
126 | ||
984263bc MD |
127 | /* |
128 | * Given a map and a machine independent protection code, | |
129 | * convert to a vax protection code. | |
130 | */ | |
639a9b43 MD |
131 | #define pte_prot(m, p) \ |
132 | (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) | |
984263bc MD |
133 | static int protection_codes[8]; |
134 | ||
fbbaeba3 | 135 | struct pmap kernel_pmap; |
54a764e8 MD |
136 | static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); |
137 | ||
e880033d | 138 | vm_paddr_t avail_start; /* PA of first available physical page */ |
6ef943a3 | 139 | vm_paddr_t avail_end; /* PA of last available physical page */ |
e880033d | 140 | vm_offset_t virtual_start; /* VA of first avail page (after kernel bss) */ |
984263bc | 141 | vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ |
791c6551 MD |
142 | vm_offset_t virtual2_start; |
143 | vm_offset_t virtual2_end; | |
c439ad8f MD |
144 | vm_offset_t KvaStart; /* VA start of KVA space */ |
145 | vm_offset_t KvaEnd; /* VA end of KVA space (non-inclusive) */ | |
146 | vm_offset_t KvaSize; /* max size of kernel virtual address space */ | |
984263bc MD |
147 | static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ |
148 | static int pgeflag; /* PG_G or-in */ | |
149 | static int pseflag; /* PG_PS or-in */ | |
150 | ||
151 | static vm_object_t kptobj; | |
152 | ||
153 | static int nkpt; | |
154 | vm_offset_t kernel_vm_end; | |
155 | ||
156 | /* | |
157 | * Data for the pv entry allocation mechanism | |
158 | */ | |
159 | static vm_zone_t pvzone; | |
160 | static struct vm_zone pvzone_store; | |
161 | static struct vm_object pvzone_obj; | |
162 | static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; | |
163 | static int pmap_pagedaemon_waken = 0; | |
164 | static struct pv_entry *pvinit; | |
165 | ||
a93980ab MD |
166 | /* |
167 | * Considering all the issues I'm having with pmap caching, if breakage | |
168 | * continues to occur, and for debugging, I've added a sysctl that will | |
169 | * just do an unconditional invltlb. | |
170 | */ | |
171 | static int dreadful_invltlb; | |
172 | ||
173 | SYSCTL_INT(_vm, OID_AUTO, dreadful_invltlb, | |
9733f757 | 174 | CTLFLAG_RW, &dreadful_invltlb, 0, "Debugging sysctl to force invltlb on pmap operations"); |
a93980ab | 175 | |
984263bc MD |
176 | /* |
177 | * All those kernel PT submaps that BSD is so fond of | |
178 | */ | |
4090d6ff | 179 | pt_entry_t *CMAP1 = NULL, *ptmmap; |
4c0cc8bb | 180 | caddr_t CADDR1 = NULL, ptvmmap = NULL; |
984263bc | 181 | static pt_entry_t *msgbufmap; |
4090d6ff | 182 | struct msgbuf *msgbufp=NULL; |
984263bc MD |
183 | |
184 | /* | |
185 | * Crashdump maps. | |
186 | */ | |
187 | static pt_entry_t *pt_crashdumpmap; | |
188 | static caddr_t crashdumpmap; | |
189 | ||
984263bc | 190 | extern pt_entry_t *SMPpt; |
984263bc | 191 | |
3ae0cd58 RG |
192 | static PMAP_INLINE void free_pv_entry (pv_entry_t pv); |
193 | static unsigned * get_ptbase (pmap_t pmap); | |
194 | static pv_entry_t get_pv_entry (void); | |
195 | static void i386_protection_init (void); | |
5e8d0349 | 196 | static __inline void pmap_clearbit (vm_page_t m, int bit); |
3ae0cd58 | 197 | |
554cf9ac MD |
198 | static void pmap_remove_all (vm_page_t m); |
199 | static void pmap_remove_pte (struct pmap *pmap, unsigned *ptq, | |
0f7a3396 MD |
200 | vm_offset_t sva, pmap_inval_info_t info); |
201 | static void pmap_remove_page (struct pmap *pmap, | |
202 | vm_offset_t va, pmap_inval_info_t info); | |
554cf9ac | 203 | static void pmap_remove_entry (struct pmap *pmap, vm_page_t m, |
0f7a3396 | 204 | vm_offset_t va, pmap_inval_info_t info); |
3ae0cd58 | 205 | static boolean_t pmap_testbit (vm_page_t m, int bit); |
2bb9cc6f MD |
206 | static void pmap_insert_entry (pmap_t pmap, pv_entry_t pv, |
207 | vm_offset_t va, vm_page_t mpte, vm_page_t m); | |
3ae0cd58 RG |
208 | |
209 | static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); | |
210 | ||
211 | static int pmap_release_free_page (pmap_t pmap, vm_page_t p); | |
212 | static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex); | |
213 | static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); | |
214 | static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); | |
554cf9ac | 215 | static void pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); |
984263bc MD |
216 | static vm_offset_t pmap_kmem_choose(vm_offset_t addr); |
217 | ||
3321ee05 MD |
218 | static void pmap_hold(pmap_t pmap); |
219 | static void pmap_drop(pmap_t pmap); | |
220 | static void pmap_wait(pmap_t pmap, int count); | |
221 | ||
984263bc MD |
222 | static unsigned pdir4mb; |
223 | ||
840de426 MD |
224 | /* |
225 | * Move the kernel virtual free pointer to the next | |
226 | * 4MB. This is used to help improve performance | |
227 | * by using a large (4MB) page for much of the kernel | |
228 | * (.text, .data, .bss) | |
229 | */ | |
4107b0c0 MD |
230 | static |
231 | vm_offset_t | |
840de426 MD |
232 | pmap_kmem_choose(vm_offset_t addr) |
233 | { | |
234 | vm_offset_t newaddr = addr; | |
235 | #ifndef DISABLE_PSE | |
236 | if (cpu_feature & CPUID_PSE) { | |
237 | newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); | |
238 | } | |
239 | #endif | |
240 | return newaddr; | |
241 | } | |
242 | ||
984263bc | 243 | /* |
4107b0c0 MD |
244 | * This function returns a pointer to the pte entry in the pmap and has |
245 | * the side effect of potentially retaining a cached mapping of the pmap. | |
e0e69b7d | 246 | * |
4107b0c0 MD |
247 | * The caller must hold vm_token and the returned value is only valid |
248 | * until the caller blocks or releases the token. | |
984263bc | 249 | */ |
4107b0c0 MD |
250 | static |
251 | unsigned * | |
840de426 | 252 | pmap_pte(pmap_t pmap, vm_offset_t va) |
984263bc MD |
253 | { |
254 | unsigned *pdeaddr; | |
255 | ||
4107b0c0 | 256 | ASSERT_LWKT_TOKEN_HELD(&vm_token); |
984263bc MD |
257 | if (pmap) { |
258 | pdeaddr = (unsigned *) pmap_pde(pmap, va); | |
259 | if (*pdeaddr & PG_PS) | |
260 | return pdeaddr; | |
4107b0c0 | 261 | if (*pdeaddr) |
984263bc | 262 | return get_ptbase(pmap) + i386_btop(va); |
984263bc MD |
263 | } |
264 | return (0); | |
265 | } | |
266 | ||
4107b0c0 MD |
267 | /* |
268 | * pmap_pte using the kernel_pmap | |
269 | * | |
270 | * Used for debugging, no requirements. | |
271 | */ | |
272 | unsigned * | |
273 | pmap_kernel_pte(vm_offset_t va) | |
274 | { | |
275 | unsigned *pdeaddr; | |
276 | ||
277 | pdeaddr = (unsigned *) pmap_pde(&kernel_pmap, va); | |
278 | if (*pdeaddr & PG_PS) | |
279 | return pdeaddr; | |
280 | if (*pdeaddr) | |
281 | return (unsigned *)vtopte(va); | |
282 | return(0); | |
283 | } | |
284 | ||
984263bc | 285 | /* |
e0e69b7d MD |
286 | * pmap_pte_quick: |
287 | * | |
c1692ddf MD |
288 | * Super fast pmap_pte routine best used when scanning the pv lists. |
289 | * This eliminates many course-grained invltlb calls. Note that many of | |
290 | * the pv list scans are across different pmaps and it is very wasteful | |
291 | * to do an entire invltlb when checking a single mapping. | |
e0e69b7d | 292 | * |
c1692ddf MD |
293 | * Should only be called while in a critical section. |
294 | * | |
4107b0c0 MD |
295 | * The caller must hold vm_token and the returned value is only valid |
296 | * until the caller blocks or releases the token. | |
984263bc | 297 | */ |
4107b0c0 MD |
298 | static |
299 | unsigned * | |
840de426 | 300 | pmap_pte_quick(pmap_t pmap, vm_offset_t va) |
984263bc | 301 | { |
840de426 MD |
302 | struct mdglobaldata *gd = mdcpu; |
303 | unsigned pde, newpf; | |
304 | ||
4107b0c0 | 305 | ASSERT_LWKT_TOKEN_HELD(&vm_token); |
840de426 MD |
306 | if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) { |
307 | unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; | |
308 | unsigned index = i386_btop(va); | |
309 | /* are we current address space or kernel? */ | |
fbbaeba3 | 310 | if ((pmap == &kernel_pmap) || |
840de426 MD |
311 | (frame == (((unsigned) PTDpde) & PG_FRAME))) { |
312 | return (unsigned *) PTmap + index; | |
313 | } | |
314 | newpf = pde & PG_FRAME; | |
4107b0c0 MD |
315 | if (((*(unsigned *)gd->gd_PMAP1) & PG_FRAME) != newpf) { |
316 | *(unsigned *)gd->gd_PMAP1 = newpf | PG_RW | PG_V; | |
840de426 MD |
317 | cpu_invlpg(gd->gd_PADDR1); |
318 | } | |
06bb314f | 319 | return gd->gd_PADDR1 + (index & (NPTEPG - 1)); |
984263bc | 320 | } |
840de426 | 321 | return (0); |
984263bc MD |
322 | } |
323 | ||
840de426 | 324 | |
984263bc | 325 | /* |
4107b0c0 | 326 | * Bootstrap the system enough to run with virtual memory. |
984263bc | 327 | * |
4107b0c0 MD |
328 | * On the i386 this is called after mapping has already been enabled |
329 | * and just syncs the pmap module with what has already been done. | |
330 | * [We can't call it easily with mapping off since the kernel is not | |
331 | * mapped with PA == VA, hence we would have to relocate every address | |
332 | * from the linked base (virtual) address "KERNBASE" to the actual | |
333 | * (physical) address starting relative to 0] | |
984263bc MD |
334 | */ |
335 | void | |
f123d5a1 | 336 | pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr) |
984263bc MD |
337 | { |
338 | vm_offset_t va; | |
339 | pt_entry_t *pte; | |
85100692 | 340 | struct mdglobaldata *gd; |
984263bc | 341 | int i; |
81c04d07 | 342 | int pg; |
984263bc | 343 | |
c439ad8f MD |
344 | KvaStart = (vm_offset_t)VADDR(PTDPTDI, 0); |
345 | KvaSize = (vm_offset_t)VADDR(APTDPTDI, 0) - KvaStart; | |
346 | KvaEnd = KvaStart + KvaSize; | |
347 | ||
984263bc MD |
348 | avail_start = firstaddr; |
349 | ||
350 | /* | |
e880033d MD |
351 | * XXX The calculation of virtual_start is wrong. It's NKPT*PAGE_SIZE |
352 | * too large. It should instead be correctly calculated in locore.s and | |
984263bc MD |
353 | * not based on 'first' (which is a physical address, not a virtual |
354 | * address, for the start of unused physical memory). The kernel | |
355 | * page tables are NOT double mapped and thus should not be included | |
356 | * in this calculation. | |
357 | */ | |
e880033d MD |
358 | virtual_start = (vm_offset_t) KERNBASE + firstaddr; |
359 | virtual_start = pmap_kmem_choose(virtual_start); | |
c439ad8f | 360 | virtual_end = VADDR(KPTDI+NKPDE-1, NPTEPG-1); |
984263bc MD |
361 | |
362 | /* | |
363 | * Initialize protection array. | |
364 | */ | |
365 | i386_protection_init(); | |
366 | ||
367 | /* | |
368 | * The kernel's pmap is statically allocated so we don't have to use | |
369 | * pmap_create, which is unlikely to work correctly at this part of | |
370 | * the boot sequence (XXX and which no longer exists). | |
b12defdc MD |
371 | * |
372 | * The kernel_pmap's pm_pteobj is used only for locking and not | |
373 | * for mmu pages. | |
984263bc | 374 | */ |
fbbaeba3 MD |
375 | kernel_pmap.pm_pdir = (pd_entry_t *)(KERNBASE + (u_int)IdlePTD); |
376 | kernel_pmap.pm_count = 1; | |
c2fb025d | 377 | kernel_pmap.pm_active = (cpumask_t)-1 & ~CPUMASK_LOCK; |
b12defdc | 378 | kernel_pmap.pm_pteobj = &kernel_object; |
fbbaeba3 | 379 | TAILQ_INIT(&kernel_pmap.pm_pvlist); |
b12defdc MD |
380 | TAILQ_INIT(&kernel_pmap.pm_pvlist_free); |
381 | spin_init(&kernel_pmap.pm_spin); | |
382 | lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); | |
984263bc MD |
383 | nkpt = NKPT; |
384 | ||
385 | /* | |
386 | * Reserve some special page table entries/VA space for temporary | |
387 | * mapping of pages. | |
388 | */ | |
389 | #define SYSMAP(c, p, v, n) \ | |
390 | v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); | |
391 | ||
e880033d | 392 | va = virtual_start; |
4107b0c0 | 393 | pte = (pt_entry_t *) pmap_kernel_pte(va); |
984263bc MD |
394 | |
395 | /* | |
396 | * CMAP1/CMAP2 are used for zeroing and copying pages. | |
397 | */ | |
398 | SYSMAP(caddr_t, CMAP1, CADDR1, 1) | |
984263bc MD |
399 | |
400 | /* | |
401 | * Crashdump maps. | |
402 | */ | |
403 | SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); | |
404 | ||
e731d345 MD |
405 | /* |
406 | * ptvmmap is used for reading arbitrary physical pages via | |
407 | * /dev/mem. | |
408 | */ | |
409 | SYSMAP(caddr_t, ptmmap, ptvmmap, 1) | |
410 | ||
984263bc MD |
411 | /* |
412 | * msgbufp is used to map the system message buffer. | |
413 | * XXX msgbufmap is not used. | |
414 | */ | |
415 | SYSMAP(struct msgbuf *, msgbufmap, msgbufp, | |
416 | atop(round_page(MSGBUF_SIZE))) | |
417 | ||
e880033d | 418 | virtual_start = va; |
984263bc | 419 | |
17a9f566 | 420 | *(int *) CMAP1 = 0; |
984263bc MD |
421 | for (i = 0; i < NKPT; i++) |
422 | PTD[i] = 0; | |
423 | ||
a2a5ad0d MD |
424 | /* |
425 | * PG_G is terribly broken on SMP because we IPI invltlb's in some | |
426 | * cases rather then invl1pg. Actually, I don't even know why it | |
427 | * works under UP because self-referential page table mappings | |
428 | */ | |
429 | #ifdef SMP | |
430 | pgeflag = 0; | |
431 | #else | |
432 | if (cpu_feature & CPUID_PGE) | |
984263bc | 433 | pgeflag = PG_G; |
a2a5ad0d | 434 | #endif |
984263bc MD |
435 | |
436 | /* | |
437 | * Initialize the 4MB page size flag | |
438 | */ | |
439 | pseflag = 0; | |
440 | /* | |
441 | * The 4MB page version of the initial | |
442 | * kernel page mapping. | |
443 | */ | |
444 | pdir4mb = 0; | |
445 | ||
446 | #if !defined(DISABLE_PSE) | |
447 | if (cpu_feature & CPUID_PSE) { | |
448 | unsigned ptditmp; | |
449 | /* | |
450 | * Note that we have enabled PSE mode | |
451 | */ | |
452 | pseflag = PG_PS; | |
453 | ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE)); | |
454 | ptditmp &= ~(NBPDR - 1); | |
455 | ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; | |
456 | pdir4mb = ptditmp; | |
457 | ||
8a8d5d85 MD |
458 | #ifndef SMP |
459 | /* | |
460 | * Enable the PSE mode. If we are SMP we can't do this | |
461 | * now because the APs will not be able to use it when | |
462 | * they boot up. | |
463 | */ | |
464 | load_cr4(rcr4() | CR4_PSE); | |
984263bc | 465 | |
8a8d5d85 MD |
466 | /* |
467 | * We can do the mapping here for the single processor | |
468 | * case. We simply ignore the old page table page from | |
469 | * now on. | |
470 | */ | |
471 | /* | |
472 | * For SMP, we still need 4K pages to bootstrap APs, | |
473 | * PSE will be enabled as soon as all APs are up. | |
474 | */ | |
b5b32410 | 475 | PTD[KPTDI] = (pd_entry_t)ptditmp; |
fbbaeba3 | 476 | kernel_pmap.pm_pdir[KPTDI] = (pd_entry_t)ptditmp; |
0f7a3396 | 477 | cpu_invltlb(); |
8a8d5d85 | 478 | #endif |
984263bc MD |
479 | } |
480 | #endif | |
984263bc | 481 | |
81c04d07 MD |
482 | /* |
483 | * We need to finish setting up the globaldata page for the BSP. | |
484 | * locore has already populated the page table for the mdglobaldata | |
485 | * portion. | |
486 | */ | |
487 | pg = MDGLOBALDATA_BASEALLOC_PAGES; | |
85100692 | 488 | gd = &CPU_prvspace[0].mdglobaldata; |
81c04d07 MD |
489 | gd->gd_CMAP1 = &SMPpt[pg + 0]; |
490 | gd->gd_CMAP2 = &SMPpt[pg + 1]; | |
491 | gd->gd_CMAP3 = &SMPpt[pg + 2]; | |
492 | gd->gd_PMAP1 = &SMPpt[pg + 3]; | |
9388fcaa | 493 | gd->gd_GDMAP1 = &PTD[APTDPTDI]; |
85100692 MD |
494 | gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1; |
495 | gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2; | |
496 | gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; | |
497 | gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1; | |
9388fcaa | 498 | gd->gd_GDADDR1= (unsigned *)VADDR(APTDPTDI, 0); |
984263bc | 499 | |
0f7a3396 | 500 | cpu_invltlb(); |
984263bc MD |
501 | } |
502 | ||
503 | #ifdef SMP | |
504 | /* | |
505 | * Set 4mb pdir for mp startup | |
506 | */ | |
507 | void | |
508 | pmap_set_opt(void) | |
509 | { | |
510 | if (pseflag && (cpu_feature & CPUID_PSE)) { | |
511 | load_cr4(rcr4() | CR4_PSE); | |
72740893 | 512 | if (pdir4mb && mycpu->gd_cpuid == 0) { /* only on BSP */ |
fbbaeba3 | 513 | kernel_pmap.pm_pdir[KPTDI] = |
984263bc MD |
514 | PTD[KPTDI] = (pd_entry_t)pdir4mb; |
515 | cpu_invltlb(); | |
516 | } | |
517 | } | |
518 | } | |
519 | #endif | |
520 | ||
521 | /* | |
4107b0c0 MD |
522 | * Initialize the pmap module, called by vm_init() |
523 | * | |
524 | * Called from the low level boot code only. | |
984263bc MD |
525 | */ |
526 | void | |
e7252eda | 527 | pmap_init(void) |
984263bc MD |
528 | { |
529 | int i; | |
530 | int initial_pvs; | |
531 | ||
532 | /* | |
533 | * object for kernel page table pages | |
534 | */ | |
535 | kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); | |
536 | ||
537 | /* | |
538 | * Allocate memory for random pmap data structures. Includes the | |
539 | * pv_head_table. | |
540 | */ | |
541 | ||
542 | for(i = 0; i < vm_page_array_size; i++) { | |
543 | vm_page_t m; | |
544 | ||
545 | m = &vm_page_array[i]; | |
546 | TAILQ_INIT(&m->md.pv_list); | |
547 | m->md.pv_list_count = 0; | |
548 | } | |
549 | ||
550 | /* | |
551 | * init the pv free list | |
552 | */ | |
553 | initial_pvs = vm_page_array_size; | |
554 | if (initial_pvs < MINPV) | |
555 | initial_pvs = MINPV; | |
556 | pvzone = &pvzone_store; | |
948209ce MD |
557 | pvinit = (void *)kmem_alloc(&kernel_map, |
558 | initial_pvs * sizeof (struct pv_entry)); | |
559 | zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), | |
560 | pvinit, initial_pvs); | |
984263bc MD |
561 | |
562 | /* | |
563 | * Now it is safe to enable pv_table recording. | |
564 | */ | |
565 | pmap_initialized = TRUE; | |
566 | } | |
567 | ||
568 | /* | |
569 | * Initialize the address space (zone) for the pv_entries. Set a | |
570 | * high water mark so that the system can recover from excessive | |
571 | * numbers of pv entries. | |
4107b0c0 MD |
572 | * |
573 | * Called from the low level boot code only. | |
984263bc MD |
574 | */ |
575 | void | |
f123d5a1 | 576 | pmap_init2(void) |
984263bc MD |
577 | { |
578 | int shpgperproc = PMAP_SHPGPERPROC; | |
948209ce | 579 | int entry_max; |
984263bc MD |
580 | |
581 | TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); | |
582 | pv_entry_max = shpgperproc * maxproc + vm_page_array_size; | |
948209ce MD |
583 | |
584 | #ifdef PMAP_PVLIMIT | |
585 | /* | |
586 | * Horrible hack for systems with a lot of memory running i386. | |
587 | * the calculated pv_entry_max can wind up eating a ton of KVM | |
588 | * so put a cap on the number of entries if the user did not | |
589 | * change any of the values. This saves about 44MB of KVM on | |
590 | * boxes with 3+GB of ram. | |
591 | * | |
592 | * On the flip side, this makes it more likely that some setups | |
593 | * will run out of pv entries. Those sysads will have to bump | |
594 | * the limit up with vm.pamp.pv_entries or vm.pmap.shpgperproc. | |
595 | */ | |
596 | if (shpgperproc == PMAP_SHPGPERPROC) { | |
597 | if (pv_entry_max > PMAP_PVLIMIT) | |
598 | pv_entry_max = PMAP_PVLIMIT; | |
599 | } | |
600 | #endif | |
984263bc MD |
601 | TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); |
602 | pv_entry_high_water = 9 * (pv_entry_max / 10); | |
948209ce MD |
603 | |
604 | /* | |
605 | * Subtract out pages already installed in the zone (hack) | |
606 | */ | |
607 | entry_max = pv_entry_max - vm_page_array_size; | |
608 | if (entry_max <= 0) | |
609 | entry_max = 1; | |
610 | ||
611 | zinitna(pvzone, &pvzone_obj, NULL, 0, entry_max, ZONE_INTERRUPT, 1); | |
984263bc MD |
612 | } |
613 | ||
614 | ||
615 | /*************************************************** | |
616 | * Low level helper routines..... | |
617 | ***************************************************/ | |
618 | ||
5926987a MD |
619 | #ifdef PMAP_DEBUG |
620 | ||
621 | static void | |
622 | test_m_maps_pv(vm_page_t m, pv_entry_t pv) | |
623 | { | |
624 | pv_entry_t spv; | |
625 | ||
74b9d1ec | 626 | crit_enter(); |
5926987a MD |
627 | #ifdef PMAP_DEBUG |
628 | KKASSERT(pv->pv_m == m); | |
629 | #endif | |
630 | TAILQ_FOREACH(spv, &m->md.pv_list, pv_list) { | |
74b9d1ec MD |
631 | if (pv == spv) { |
632 | crit_exit(); | |
5926987a | 633 | return; |
74b9d1ec | 634 | } |
5926987a | 635 | } |
74b9d1ec | 636 | crit_exit(); |
ed20d0e3 | 637 | panic("test_m_maps_pv: failed m %p pv %p", m, pv); |
5926987a MD |
638 | } |
639 | ||
640 | static void | |
641 | ptbase_assert(struct pmap *pmap) | |
642 | { | |
643 | unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; | |
644 | ||
645 | /* are we current address space or kernel? */ | |
4107b0c0 | 646 | if (pmap == &kernel_pmap || frame == (((unsigned)PTDpde) & PG_FRAME)) |
5926987a | 647 | return; |
3558dcda | 648 | KKASSERT(frame == (*mdcpu->gd_GDMAP1 & PG_FRAME)); |
5926987a MD |
649 | } |
650 | ||
651 | #else | |
652 | ||
653 | #define test_m_maps_pv(m, pv) | |
654 | #define ptbase_assert(pmap) | |
655 | ||
656 | #endif | |
657 | ||
984263bc MD |
658 | #if defined(PMAP_DIAGNOSTIC) |
659 | ||
660 | /* | |
661 | * This code checks for non-writeable/modified pages. | |
662 | * This should be an invalid condition. | |
663 | */ | |
664 | static int | |
665 | pmap_nw_modified(pt_entry_t ptea) | |
666 | { | |
667 | int pte; | |
668 | ||
669 | pte = (int) ptea; | |
670 | ||
671 | if ((pte & (PG_M|PG_RW)) == PG_M) | |
672 | return 1; | |
673 | else | |
674 | return 0; | |
675 | } | |
676 | #endif | |
677 | ||
678 | ||
679 | /* | |
4107b0c0 MD |
680 | * This routine defines the region(s) of memory that should not be tested |
681 | * for the modified bit. | |
682 | * | |
683 | * No requirements. | |
984263bc MD |
684 | */ |
685 | static PMAP_INLINE int | |
686 | pmap_track_modified(vm_offset_t va) | |
687 | { | |
688 | if ((va < clean_sva) || (va >= clean_eva)) | |
689 | return 1; | |
690 | else | |
691 | return 0; | |
692 | } | |
693 | ||
c1692ddf MD |
694 | /* |
695 | * Retrieve the mapped page table base for a particular pmap. Use our self | |
696 | * mapping for the kernel_pmap or our current pmap. | |
697 | * | |
698 | * For foreign pmaps we use the per-cpu page table map. Since this involves | |
699 | * installing a ptd it's actually (per-process x per-cpu). However, we | |
700 | * still cannot depend on our mapping to survive thread switches because | |
701 | * the process might be threaded and switching to another thread for the | |
702 | * same process on the same cpu will allow that other thread to make its | |
703 | * own mapping. | |
704 | * | |
705 | * This could be a bit confusing but the jist is for something like the | |
706 | * vkernel which uses foreign pmaps all the time this represents a pretty | |
707 | * good cache that avoids unnecessary invltlb()s. | |
4107b0c0 MD |
708 | * |
709 | * The caller must hold vm_token and the returned value is only valid | |
710 | * until the caller blocks or releases the token. | |
c1692ddf | 711 | */ |
984263bc | 712 | static unsigned * |
e0e69b7d | 713 | get_ptbase(pmap_t pmap) |
984263bc MD |
714 | { |
715 | unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; | |
c1692ddf | 716 | struct mdglobaldata *gd = mdcpu; |
984263bc | 717 | |
4107b0c0 MD |
718 | ASSERT_LWKT_TOKEN_HELD(&vm_token); |
719 | ||
5926987a MD |
720 | /* |
721 | * We can use PTmap if the pmap is our current address space or | |
722 | * the kernel address space. | |
723 | */ | |
fbbaeba3 | 724 | if (pmap == &kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { |
984263bc MD |
725 | return (unsigned *) PTmap; |
726 | } | |
e0e69b7d | 727 | |
5926987a | 728 | /* |
c1692ddf MD |
729 | * Otherwise we use the per-cpu alternative page table map. Each |
730 | * cpu gets its own map. Because of this we cannot use this map | |
731 | * from interrupts or threads which can preempt. | |
be3aecf7 MD |
732 | * |
733 | * Even if we already have the map cached we may still have to | |
734 | * invalidate the TLB if another cpu modified a PDE in the map. | |
5926987a | 735 | */ |
c1692ddf MD |
736 | KKASSERT(gd->mi.gd_intr_nesting_level == 0 && |
737 | (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0); | |
e0e69b7d | 738 | |
c1692ddf MD |
739 | if ((*gd->gd_GDMAP1 & PG_FRAME) != frame) { |
740 | *gd->gd_GDMAP1 = frame | PG_RW | PG_V; | |
be3aecf7 MD |
741 | pmap->pm_cached |= gd->mi.gd_cpumask; |
742 | cpu_invltlb(); | |
743 | } else if ((pmap->pm_cached & gd->mi.gd_cpumask) == 0) { | |
744 | pmap->pm_cached |= gd->mi.gd_cpumask; | |
984263bc | 745 | cpu_invltlb(); |
a93980ab MD |
746 | } else if (dreadful_invltlb) { |
747 | cpu_invltlb(); | |
984263bc | 748 | } |
c1692ddf | 749 | return ((unsigned *)gd->gd_GDADDR1); |
984263bc MD |
750 | } |
751 | ||
984263bc | 752 | /* |
e0e69b7d MD |
753 | * pmap_extract: |
754 | * | |
4107b0c0 | 755 | * Extract the physical page address associated with the map/VA pair. |
e0e69b7d | 756 | * |
4107b0c0 | 757 | * The caller may hold vm_token if it desires non-blocking operation. |
984263bc | 758 | */ |
6ef943a3 | 759 | vm_paddr_t |
840de426 | 760 | pmap_extract(pmap_t pmap, vm_offset_t va) |
984263bc MD |
761 | { |
762 | vm_offset_t rtval; | |
763 | vm_offset_t pdirindex; | |
840de426 | 764 | |
4107b0c0 | 765 | lwkt_gettoken(&vm_token); |
984263bc MD |
766 | pdirindex = va >> PDRSHIFT; |
767 | if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { | |
768 | unsigned *pte; | |
769 | if ((rtval & PG_PS) != 0) { | |
770 | rtval &= ~(NBPDR - 1); | |
771 | rtval |= va & (NBPDR - 1); | |
4107b0c0 MD |
772 | } else { |
773 | pte = get_ptbase(pmap) + i386_btop(va); | |
774 | rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); | |
984263bc | 775 | } |
4107b0c0 MD |
776 | } else { |
777 | rtval = 0; | |
984263bc | 778 | } |
4107b0c0 MD |
779 | lwkt_reltoken(&vm_token); |
780 | return rtval; | |
f6bf3af1 MD |
781 | } |
782 | ||
984263bc MD |
783 | /*************************************************** |
784 | * Low level mapping routines..... | |
785 | ***************************************************/ | |
786 | ||
787 | /* | |
4107b0c0 MD |
788 | * Map a wired VM page to a KVA, fully SMP synchronized. |
789 | * | |
790 | * No requirements, non blocking. | |
984263bc | 791 | */ |
24712b90 | 792 | void |
6ef943a3 | 793 | pmap_kenter(vm_offset_t va, vm_paddr_t pa) |
984263bc | 794 | { |
840de426 | 795 | unsigned *pte; |
0f7a3396 MD |
796 | unsigned npte; |
797 | pmap_inval_info info; | |
984263bc | 798 | |
0f7a3396 | 799 | pmap_inval_init(&info); |
984263bc MD |
800 | npte = pa | PG_RW | PG_V | pgeflag; |
801 | pte = (unsigned *)vtopte(va); | |
c2fb025d | 802 | pmap_inval_interlock(&info, &kernel_pmap, va); |
984263bc | 803 | *pte = npte; |
c2fb025d MD |
804 | pmap_inval_deinterlock(&info, &kernel_pmap); |
805 | pmap_inval_done(&info); | |
984263bc MD |
806 | } |
807 | ||
6d1ec6fa | 808 | /* |
4107b0c0 MD |
809 | * Map a wired VM page to a KVA, synchronized on current cpu only. |
810 | * | |
811 | * No requirements, non blocking. | |
6d1ec6fa | 812 | */ |
24712b90 MD |
813 | void |
814 | pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) | |
815 | { | |
816 | unsigned *pte; | |
817 | unsigned npte; | |
818 | ||
819 | npte = pa | PG_RW | PG_V | pgeflag; | |
820 | pte = (unsigned *)vtopte(va); | |
821 | *pte = npte; | |
822 | cpu_invlpg((void *)va); | |
823 | } | |
824 | ||
4107b0c0 MD |
825 | /* |
826 | * Synchronize a previously entered VA on all cpus. | |
827 | * | |
828 | * No requirements, non blocking. | |
829 | */ | |
24712b90 MD |
830 | void |
831 | pmap_kenter_sync(vm_offset_t va) | |
832 | { | |
833 | pmap_inval_info info; | |
834 | ||
835 | pmap_inval_init(&info); | |
c2fb025d MD |
836 | pmap_inval_interlock(&info, &kernel_pmap, va); |
837 | pmap_inval_deinterlock(&info, &kernel_pmap); | |
838 | pmap_inval_done(&info); | |
24712b90 MD |
839 | } |
840 | ||
4107b0c0 MD |
841 | /* |
842 | * Synchronize a previously entered VA on the current cpu only. | |
843 | * | |
844 | * No requirements, non blocking. | |
845 | */ | |
24712b90 MD |
846 | void |
847 | pmap_kenter_sync_quick(vm_offset_t va) | |
848 | { | |
849 | cpu_invlpg((void *)va); | |
850 | } | |
851 | ||
984263bc | 852 | /* |
4107b0c0 MD |
853 | * Remove a page from the kernel pagetables, fully SMP synchronized. |
854 | * | |
855 | * No requirements, non blocking. | |
984263bc | 856 | */ |
24712b90 | 857 | void |
840de426 | 858 | pmap_kremove(vm_offset_t va) |
984263bc | 859 | { |
840de426 | 860 | unsigned *pte; |
0f7a3396 | 861 | pmap_inval_info info; |
984263bc | 862 | |
0f7a3396 | 863 | pmap_inval_init(&info); |
984263bc | 864 | pte = (unsigned *)vtopte(va); |
c2fb025d | 865 | pmap_inval_interlock(&info, &kernel_pmap, va); |
984263bc | 866 | *pte = 0; |
c2fb025d MD |
867 | pmap_inval_deinterlock(&info, &kernel_pmap); |
868 | pmap_inval_done(&info); | |
984263bc MD |
869 | } |
870 | ||
4107b0c0 MD |
871 | /* |
872 | * Remove a page from the kernel pagetables, synchronized on current cpu only. | |
873 | * | |
874 | * No requirements, non blocking. | |
875 | */ | |
24712b90 MD |
876 | void |
877 | pmap_kremove_quick(vm_offset_t va) | |
878 | { | |
879 | unsigned *pte; | |
880 | pte = (unsigned *)vtopte(va); | |
881 | *pte = 0; | |
882 | cpu_invlpg((void *)va); | |
883 | } | |
884 | ||
9ad680a3 | 885 | /* |
4107b0c0 MD |
886 | * Adjust the permissions of a page in the kernel page table, |
887 | * synchronized on the current cpu only. | |
888 | * | |
889 | * No requirements, non blocking. | |
9ad680a3 MD |
890 | */ |
891 | void | |
892 | pmap_kmodify_rw(vm_offset_t va) | |
893 | { | |
4107b0c0 | 894 | atomic_set_int(vtopte(va), PG_RW); |
9ad680a3 MD |
895 | cpu_invlpg((void *)va); |
896 | } | |
897 | ||
4107b0c0 MD |
898 | /* |
899 | * Adjust the permissions of a page in the kernel page table, | |
900 | * synchronized on the current cpu only. | |
901 | * | |
902 | * No requirements, non blocking. | |
903 | */ | |
9ad680a3 MD |
904 | void |
905 | pmap_kmodify_nc(vm_offset_t va) | |
906 | { | |
4107b0c0 | 907 | atomic_set_int(vtopte(va), PG_N); |
9ad680a3 MD |
908 | cpu_invlpg((void *)va); |
909 | } | |
910 | ||
984263bc | 911 | /* |
4107b0c0 | 912 | * Map a range of physical addresses into kernel virtual address space. |
984263bc | 913 | * |
4107b0c0 | 914 | * No requirements, non blocking. |
984263bc MD |
915 | */ |
916 | vm_offset_t | |
8e5e6f1b | 917 | pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) |
984263bc | 918 | { |
8e5e6f1b AH |
919 | vm_offset_t sva, virt; |
920 | ||
921 | sva = virt = *virtp; | |
984263bc MD |
922 | while (start < end) { |
923 | pmap_kenter(virt, start); | |
924 | virt += PAGE_SIZE; | |
925 | start += PAGE_SIZE; | |
926 | } | |
8e5e6f1b AH |
927 | *virtp = virt; |
928 | return (sva); | |
984263bc MD |
929 | } |
930 | ||
984263bc | 931 | /* |
4107b0c0 MD |
932 | * Add a list of wired pages to the kva, fully SMP synchronized. |
933 | * | |
934 | * No requirements, non blocking. | |
984263bc MD |
935 | */ |
936 | void | |
840de426 | 937 | pmap_qenter(vm_offset_t va, vm_page_t *m, int count) |
984263bc MD |
938 | { |
939 | vm_offset_t end_va; | |
940 | ||
941 | end_va = va + count * PAGE_SIZE; | |
942 | ||
943 | while (va < end_va) { | |
944 | unsigned *pte; | |
945 | ||
946 | pte = (unsigned *)vtopte(va); | |
947 | *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; | |
984263bc | 948 | cpu_invlpg((void *)va); |
984263bc MD |
949 | va += PAGE_SIZE; |
950 | m++; | |
951 | } | |
952 | #ifdef SMP | |
0f7a3396 | 953 | smp_invltlb(); /* XXX */ |
984263bc MD |
954 | #endif |
955 | } | |
956 | ||
957 | /* | |
4107b0c0 | 958 | * Remove pages from KVA, fully SMP synchronized. |
7155fc7d | 959 | * |
4107b0c0 | 960 | * No requirements, non blocking. |
984263bc MD |
961 | */ |
962 | void | |
840de426 | 963 | pmap_qremove(vm_offset_t va, int count) |
984263bc MD |
964 | { |
965 | vm_offset_t end_va; | |
966 | ||
967 | end_va = va + count*PAGE_SIZE; | |
968 | ||
969 | while (va < end_va) { | |
970 | unsigned *pte; | |
971 | ||
972 | pte = (unsigned *)vtopte(va); | |
973 | *pte = 0; | |
984263bc | 974 | cpu_invlpg((void *)va); |
984263bc MD |
975 | va += PAGE_SIZE; |
976 | } | |
977 | #ifdef SMP | |
978 | smp_invltlb(); | |
979 | #endif | |
980 | } | |
981 | ||
06ecca5a MD |
982 | /* |
983 | * This routine works like vm_page_lookup() but also blocks as long as the | |
984 | * page is busy. This routine does not busy the page it returns. | |
985 | * | |
b12defdc | 986 | * The caller must hold the object. |
06ecca5a | 987 | */ |
984263bc | 988 | static vm_page_t |
840de426 | 989 | pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) |
984263bc MD |
990 | { |
991 | vm_page_t m; | |
06ecca5a | 992 | |
b12defdc MD |
993 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); |
994 | m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp"); | |
17cde63e | 995 | |
06ecca5a | 996 | return(m); |
984263bc MD |
997 | } |
998 | ||
263e4574 MD |
999 | /* |
1000 | * Create a new thread and optionally associate it with a (new) process. | |
6ef943a3 | 1001 | * NOTE! the new thread's cpu may not equal the current cpu. |
263e4574 | 1002 | */ |
7d0bac62 MD |
1003 | void |
1004 | pmap_init_thread(thread_t td) | |
263e4574 | 1005 | { |
f470d0c8 | 1006 | /* enforce pcb placement */ |
f470d0c8 | 1007 | td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; |
65d6ce10 | 1008 | td->td_savefpu = &td->td_pcb->pcb_save; |
7d0bac62 | 1009 | td->td_sp = (char *)td->td_pcb - 16; |
263e4574 MD |
1010 | } |
1011 | ||
984263bc | 1012 | /* |
984263bc MD |
1013 | * This routine directly affects the fork perf for a process. |
1014 | */ | |
1015 | void | |
13d13d89 | 1016 | pmap_init_proc(struct proc *p) |
984263bc | 1017 | { |
984263bc MD |
1018 | } |
1019 | ||
984263bc MD |
1020 | /*************************************************** |
1021 | * Page table page management routines..... | |
1022 | ***************************************************/ | |
1023 | ||
1024 | /* | |
90244566 MD |
1025 | * This routine unwires page table pages, removing and freeing the page |
1026 | * tale page when the wire count drops to 0. | |
4107b0c0 MD |
1027 | * |
1028 | * The caller must hold vm_token. | |
1029 | * This function can block. | |
984263bc MD |
1030 | */ |
1031 | static int | |
90244566 | 1032 | _pmap_unwire_pte(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) |
840de426 | 1033 | { |
17cde63e MD |
1034 | /* |
1035 | * Wait until we can busy the page ourselves. We cannot have | |
1036 | * any active flushes if we block. | |
1037 | */ | |
b12defdc | 1038 | vm_page_busy_wait(m, FALSE, "pmuwpt"); |
eec2b734 | 1039 | KASSERT(m->queue == PQ_NONE, |
90244566 | 1040 | ("_pmap_unwire_pte: %p->queue != PQ_NONE", m)); |
984263bc | 1041 | |
90244566 | 1042 | if (m->wire_count == 1) { |
984263bc | 1043 | /* |
be3aecf7 MD |
1044 | * Unmap the page table page. |
1045 | * | |
1046 | * NOTE: We must clear pm_cached for all cpus, including | |
1047 | * the current one, when clearing a page directory | |
1048 | * entry. | |
984263bc | 1049 | */ |
c2fb025d | 1050 | pmap_inval_interlock(info, pmap, -1); |
2247fe02 | 1051 | KKASSERT(pmap->pm_pdir[m->pindex]); |
984263bc | 1052 | pmap->pm_pdir[m->pindex] = 0; |
be3aecf7 | 1053 | pmap->pm_cached = 0; |
c2fb025d | 1054 | pmap_inval_deinterlock(info, pmap); |
eec2b734 MD |
1055 | |
1056 | KKASSERT(pmap->pm_stats.resident_count > 0); | |
984263bc | 1057 | --pmap->pm_stats.resident_count; |
984263bc MD |
1058 | |
1059 | if (pmap->pm_ptphint == m) | |
1060 | pmap->pm_ptphint = NULL; | |
1061 | ||
1062 | /* | |
eec2b734 MD |
1063 | * This was our last hold, the page had better be unwired |
1064 | * after we decrement wire_count. | |
1065 | * | |
1066 | * FUTURE NOTE: shared page directory page could result in | |
1067 | * multiple wire counts. | |
984263bc | 1068 | */ |
90244566 | 1069 | vm_page_unwire(m, 0); |
17cde63e | 1070 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); |
eec2b734 MD |
1071 | vm_page_flash(m); |
1072 | vm_page_free_zero(m); | |
984263bc | 1073 | return 1; |
17cde63e | 1074 | } else { |
90244566 MD |
1075 | KKASSERT(m->wire_count > 1); |
1076 | if (vm_page_unwire_quick(m)) | |
1077 | panic("pmap_unwire_pte: Insufficient wire_count"); | |
b12defdc | 1078 | vm_page_wakeup(m); |
17cde63e | 1079 | return 0; |
984263bc | 1080 | } |
984263bc MD |
1081 | } |
1082 | ||
4107b0c0 MD |
1083 | /* |
1084 | * The caller must hold vm_token. | |
92ba8d28 | 1085 | * |
4107b0c0 | 1086 | * This function can block. |
92ba8d28 MD |
1087 | * |
1088 | * This function can race the wire_count 2->1 case because the page | |
1089 | * is not busied during the unwire_quick operation. An eventual | |
1090 | * pmap_release() will catch the case. | |
4107b0c0 | 1091 | */ |
984263bc | 1092 | static PMAP_INLINE int |
90244566 | 1093 | pmap_unwire_pte(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) |
984263bc | 1094 | { |
90244566 MD |
1095 | KKASSERT(m->wire_count > 0); |
1096 | if (m->wire_count > 1) { | |
1097 | if (vm_page_unwire_quick(m)) | |
1098 | panic("pmap_unwire_pte: Insufficient wire_count"); | |
984263bc | 1099 | return 0; |
eec2b734 | 1100 | } else { |
90244566 | 1101 | return _pmap_unwire_pte(pmap, m, info); |
eec2b734 | 1102 | } |
984263bc MD |
1103 | } |
1104 | ||
1105 | /* | |
4107b0c0 | 1106 | * After removing a (user) page table entry, this routine is used to |
984263bc | 1107 | * conditionally free the page, and manage the hold/wire counts. |
5926987a | 1108 | * |
4107b0c0 MD |
1109 | * The caller must hold vm_token. |
1110 | * This function can block regardless. | |
984263bc | 1111 | */ |
554cf9ac | 1112 | static void |
0f7a3396 | 1113 | pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte, |
4107b0c0 | 1114 | pmap_inval_info_t info) |
984263bc MD |
1115 | { |
1116 | unsigned ptepindex; | |
4107b0c0 | 1117 | |
b12defdc MD |
1118 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); |
1119 | ||
984263bc | 1120 | if (va >= UPT_MIN_ADDRESS) |
554cf9ac | 1121 | return; |
984263bc MD |
1122 | |
1123 | if (mpte == NULL) { | |
1124 | ptepindex = (va >> PDRSHIFT); | |
b1482674 MD |
1125 | if ((mpte = pmap->pm_ptphint) != NULL && |
1126 | mpte->pindex == ptepindex && | |
1127 | (mpte->flags & PG_BUSY) == 0) { | |
1128 | ; /* use mpte */ | |
984263bc | 1129 | } else { |
b12defdc | 1130 | mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); |
984263bc | 1131 | pmap->pm_ptphint = mpte; |
b12defdc | 1132 | vm_page_wakeup(mpte); |
984263bc MD |
1133 | } |
1134 | } | |
554cf9ac | 1135 | pmap_unwire_pte(pmap, mpte, info); |
984263bc MD |
1136 | } |
1137 | ||
54a764e8 | 1138 | /* |
fbbaeba3 MD |
1139 | * Initialize pmap0/vmspace0. This pmap is not added to pmap_list because |
1140 | * it, and IdlePTD, represents the template used to update all other pmaps. | |
1141 | * | |
1142 | * On architectures where the kernel pmap is not integrated into the user | |
1143 | * process pmap, this pmap represents the process pmap, not the kernel pmap. | |
1144 | * kernel_pmap should be used to directly access the kernel_pmap. | |
4107b0c0 MD |
1145 | * |
1146 | * No requirements. | |
54a764e8 | 1147 | */ |
984263bc | 1148 | void |
840de426 | 1149 | pmap_pinit0(struct pmap *pmap) |
984263bc MD |
1150 | { |
1151 | pmap->pm_pdir = | |
e4846942 | 1152 | (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); |
24712b90 | 1153 | pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD); |
984263bc MD |
1154 | pmap->pm_count = 1; |
1155 | pmap->pm_active = 0; | |
be3aecf7 | 1156 | pmap->pm_cached = 0; |
984263bc MD |
1157 | pmap->pm_ptphint = NULL; |
1158 | TAILQ_INIT(&pmap->pm_pvlist); | |
b12defdc MD |
1159 | TAILQ_INIT(&pmap->pm_pvlist_free); |
1160 | spin_init(&pmap->pm_spin); | |
1161 | lwkt_token_init(&pmap->pm_token, "pmap_tok"); | |
984263bc MD |
1162 | bzero(&pmap->pm_stats, sizeof pmap->pm_stats); |
1163 | } | |
1164 | ||
1165 | /* | |
1166 | * Initialize a preallocated and zeroed pmap structure, | |
1167 | * such as one in a vmspace structure. | |
4107b0c0 MD |
1168 | * |
1169 | * No requirements. | |
984263bc MD |
1170 | */ |
1171 | void | |
840de426 | 1172 | pmap_pinit(struct pmap *pmap) |
984263bc MD |
1173 | { |
1174 | vm_page_t ptdpg; | |
1175 | ||
1176 | /* | |
1177 | * No need to allocate page table space yet but we do need a valid | |
1178 | * page directory table. | |
1179 | */ | |
b5b32410 | 1180 | if (pmap->pm_pdir == NULL) { |
984263bc | 1181 | pmap->pm_pdir = |
e4846942 | 1182 | (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); |
b5b32410 | 1183 | } |
984263bc MD |
1184 | |
1185 | /* | |
c3834cb2 | 1186 | * Allocate an object for the ptes |
984263bc MD |
1187 | */ |
1188 | if (pmap->pm_pteobj == NULL) | |
c3834cb2 | 1189 | pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1); |
984263bc MD |
1190 | |
1191 | /* | |
c3834cb2 MD |
1192 | * Allocate the page directory page, unless we already have |
1193 | * one cached. If we used the cached page the wire_count will | |
1194 | * already be set appropriately. | |
984263bc | 1195 | */ |
c3834cb2 MD |
1196 | if ((ptdpg = pmap->pm_pdirm) == NULL) { |
1197 | ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI, | |
d2d8515b MD |
1198 | VM_ALLOC_NORMAL | VM_ALLOC_RETRY | |
1199 | VM_ALLOC_ZERO); | |
c3834cb2 | 1200 | pmap->pm_pdirm = ptdpg; |
b12defdc MD |
1201 | vm_page_flag_clear(ptdpg, PG_MAPPED); |
1202 | vm_page_wire(ptdpg); | |
d2d8515b | 1203 | KKASSERT(ptdpg->valid == VM_PAGE_BITS_ALL); |
c3834cb2 | 1204 | pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); |
b12defdc | 1205 | vm_page_wakeup(ptdpg); |
c3834cb2 | 1206 | } |
984263bc | 1207 | pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; |
984263bc MD |
1208 | |
1209 | /* install self-referential address mapping entry */ | |
1210 | *(unsigned *) (pmap->pm_pdir + PTDPTDI) = | |
1211 | VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; | |
1212 | ||
1213 | pmap->pm_count = 1; | |
1214 | pmap->pm_active = 0; | |
be3aecf7 | 1215 | pmap->pm_cached = 0; |
984263bc MD |
1216 | pmap->pm_ptphint = NULL; |
1217 | TAILQ_INIT(&pmap->pm_pvlist); | |
b12defdc MD |
1218 | TAILQ_INIT(&pmap->pm_pvlist_free); |
1219 | spin_init(&pmap->pm_spin); | |
1220 | lwkt_token_init(&pmap->pm_token, "pmap_tok"); | |
984263bc | 1221 | bzero(&pmap->pm_stats, sizeof pmap->pm_stats); |
eec2b734 | 1222 | pmap->pm_stats.resident_count = 1; |
984263bc MD |
1223 | } |
1224 | ||
e3161323 | 1225 | /* |
c3834cb2 MD |
1226 | * Clean up a pmap structure so it can be physically freed. This routine |
1227 | * is called by the vmspace dtor function. A great deal of pmap data is | |
1228 | * left passively mapped to improve vmspace management so we have a bit | |
1229 | * of cleanup work to do here. | |
4107b0c0 MD |
1230 | * |
1231 | * No requirements. | |
e3161323 MD |
1232 | */ |
1233 | void | |
1234 | pmap_puninit(pmap_t pmap) | |
1235 | { | |
c3834cb2 MD |
1236 | vm_page_t p; |
1237 | ||
3321ee05 | 1238 | pmap_wait(pmap, -1); |
e3161323 | 1239 | KKASSERT(pmap->pm_active == 0); |
c3834cb2 MD |
1240 | if ((p = pmap->pm_pdirm) != NULL) { |
1241 | KKASSERT(pmap->pm_pdir != NULL); | |
1242 | pmap_kremove((vm_offset_t)pmap->pm_pdir); | |
b12defdc | 1243 | vm_page_busy_wait(p, FALSE, "pgpun"); |
90244566 | 1244 | vm_page_unwire(p, 0); |
c3834cb2 MD |
1245 | vm_page_free_zero(p); |
1246 | pmap->pm_pdirm = NULL; | |
1247 | } | |
e3161323 MD |
1248 | if (pmap->pm_pdir) { |
1249 | kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE); | |
1250 | pmap->pm_pdir = NULL; | |
1251 | } | |
1252 | if (pmap->pm_pteobj) { | |
1253 | vm_object_deallocate(pmap->pm_pteobj); | |
1254 | pmap->pm_pteobj = NULL; | |
1255 | } | |
1256 | } | |
1257 | ||
984263bc MD |
1258 | /* |
1259 | * Wire in kernel global address entries. To avoid a race condition | |
1260 | * between pmap initialization and pmap_growkernel, this procedure | |
54a764e8 MD |
1261 | * adds the pmap to the master list (which growkernel scans to update), |
1262 | * then copies the template. | |
4107b0c0 MD |
1263 | * |
1264 | * No requirements. | |
984263bc MD |
1265 | */ |
1266 | void | |
840de426 | 1267 | pmap_pinit2(struct pmap *pmap) |
984263bc | 1268 | { |
b12defdc MD |
1269 | /* |
1270 | * XXX copies current process, does not fill in MPPTDI | |
1271 | */ | |
1272 | spin_lock(&pmap_spin); | |
54a764e8 | 1273 | TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); |
984263bc | 1274 | bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); |
b12defdc | 1275 | spin_unlock(&pmap_spin); |
984263bc MD |
1276 | } |
1277 | ||
344ad853 | 1278 | /* |
eec2b734 | 1279 | * Attempt to release and free a vm_page in a pmap. Returns 1 on success, |
344ad853 | 1280 | * 0 on failure (if the procedure had to sleep). |
c3834cb2 MD |
1281 | * |
1282 | * When asked to remove the page directory page itself, we actually just | |
1283 | * leave it cached so we do not have to incur the SMP inval overhead of | |
1284 | * removing the kernel mapping. pmap_puninit() will take care of it. | |
4107b0c0 MD |
1285 | * |
1286 | * The caller must hold vm_token. | |
1287 | * This function can block regardless. | |
344ad853 | 1288 | */ |
984263bc | 1289 | static int |
840de426 | 1290 | pmap_release_free_page(struct pmap *pmap, vm_page_t p) |
984263bc MD |
1291 | { |
1292 | unsigned *pde = (unsigned *) pmap->pm_pdir; | |
4107b0c0 | 1293 | |
984263bc MD |
1294 | /* |
1295 | * This code optimizes the case of freeing non-busy | |
1296 | * page-table pages. Those pages are zero now, and | |
1297 | * might as well be placed directly into the zero queue. | |
1298 | */ | |
b12defdc MD |
1299 | if (vm_page_busy_try(p, FALSE)) { |
1300 | vm_page_sleep_busy(p, FALSE, "pmaprl"); | |
984263bc | 1301 | return 0; |
b12defdc | 1302 | } |
984263bc | 1303 | |
eec2b734 | 1304 | KKASSERT(pmap->pm_stats.resident_count > 0); |
2247fe02 | 1305 | KKASSERT(pde[p->pindex]); |
984263bc | 1306 | |
b1482674 MD |
1307 | /* |
1308 | * page table page's wire_count must be 1. Caller is the pmap | |
1309 | * termination code which holds the pm_pteobj, there is a race | |
1310 | * if someone else is trying to hold the VM object in order to | |
1311 | * clean up a wire_count. | |
1312 | */ | |
90244566 | 1313 | if (p->wire_count != 1) { |
b1482674 MD |
1314 | if (pmap->pm_pteobj->hold_count <= 1) |
1315 | panic("pmap_release: freeing wired page table page"); | |
1316 | kprintf("pmap_release_free_page: unwire race detected\n"); | |
1317 | vm_page_wakeup(p); | |
1318 | tsleep(p, 0, "pmapx", 1); | |
1319 | return 0; | |
984263bc | 1320 | } |
b1482674 MD |
1321 | |
1322 | /* | |
1323 | * Remove the page table page from the processes address space. | |
1324 | */ | |
1325 | pmap->pm_cached = 0; | |
1326 | pde[p->pindex] = 0; | |
1327 | --pmap->pm_stats.resident_count; | |
c3834cb2 MD |
1328 | if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) |
1329 | pmap->pm_ptphint = NULL; | |
1330 | ||
984263bc | 1331 | /* |
c3834cb2 MD |
1332 | * We leave the page directory page cached, wired, and mapped in |
1333 | * the pmap until the dtor function (pmap_puninit()) gets called. | |
1334 | * However, still clean it up so we can set PG_ZERO. | |
c1692ddf MD |
1335 | * |
1336 | * The pmap has already been removed from the pmap_list in the | |
1337 | * PTDPTDI case. | |
984263bc MD |
1338 | */ |
1339 | if (p->pindex == PTDPTDI) { | |
1340 | bzero(pde + KPTDI, nkpt * PTESIZE); | |
9388fcaa | 1341 | bzero(pde + MPPTDI, (NPDEPG - MPPTDI) * PTESIZE); |
c3834cb2 MD |
1342 | vm_page_flag_set(p, PG_ZERO); |
1343 | vm_page_wakeup(p); | |
1344 | } else { | |
92ba8d28 MD |
1345 | /* |
1346 | * This case can occur if a pmap_unwire_pte() loses a race | |
1347 | * while the page is unbusied. | |
1348 | */ | |
1349 | /*panic("pmap_release: page should already be gone %p", p);*/ | |
1350 | vm_page_flag_clear(p, PG_MAPPED); | |
90244566 | 1351 | vm_page_unwire(p, 0); |
c3834cb2 | 1352 | vm_page_free_zero(p); |
984263bc | 1353 | } |
984263bc MD |
1354 | return 1; |
1355 | } | |
1356 | ||
1357 | /* | |
4107b0c0 MD |
1358 | * This routine is called if the page table page is not mapped correctly. |
1359 | * | |
1360 | * The caller must hold vm_token. | |
984263bc MD |
1361 | */ |
1362 | static vm_page_t | |
840de426 | 1363 | _pmap_allocpte(pmap_t pmap, unsigned ptepindex) |
984263bc | 1364 | { |
480c83b6 | 1365 | vm_offset_t ptepa; |
984263bc MD |
1366 | vm_page_t m; |
1367 | ||
1368 | /* | |
d2d8515b MD |
1369 | * Find or fabricate a new pagetable page. Setting VM_ALLOC_ZERO |
1370 | * will zero any new page and mark it valid. | |
984263bc MD |
1371 | */ |
1372 | m = vm_page_grab(pmap->pm_pteobj, ptepindex, | |
d2d8515b | 1373 | VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); |
984263bc MD |
1374 | |
1375 | KASSERT(m->queue == PQ_NONE, | |
1376 | ("_pmap_allocpte: %p->queue != PQ_NONE", m)); | |
1377 | ||
eec2b734 | 1378 | /* |
90244566 | 1379 | * Increment the wire count for the page we will be returning to |
eec2b734 MD |
1380 | * the caller. |
1381 | */ | |
90244566 | 1382 | vm_page_wire(m); |
eec2b734 MD |
1383 | |
1384 | /* | |
1385 | * It is possible that someone else got in and mapped by the page | |
1386 | * directory page while we were blocked, if so just unbusy and | |
90244566 | 1387 | * return the wired page. |
eec2b734 MD |
1388 | */ |
1389 | if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) { | |
1390 | KKASSERT((ptepa & PG_FRAME) == VM_PAGE_TO_PHYS(m)); | |
1391 | vm_page_wakeup(m); | |
1392 | return(m); | |
1393 | } | |
1394 | ||
984263bc MD |
1395 | /* |
1396 | * Map the pagetable page into the process address space, if | |
1397 | * it isn't already there. | |
be3aecf7 MD |
1398 | * |
1399 | * NOTE: For safety clear pm_cached for all cpus including the | |
1400 | * current one when adding a PDE to the map. | |
984263bc | 1401 | */ |
eec2b734 | 1402 | ++pmap->pm_stats.resident_count; |
984263bc MD |
1403 | |
1404 | ptepa = VM_PAGE_TO_PHYS(m); | |
1405 | pmap->pm_pdir[ptepindex] = | |
1406 | (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); | |
be3aecf7 | 1407 | pmap->pm_cached = 0; |
984263bc MD |
1408 | |
1409 | /* | |
1410 | * Set the page table hint | |
1411 | */ | |
1412 | pmap->pm_ptphint = m; | |
984263bc MD |
1413 | vm_page_flag_set(m, PG_MAPPED); |
1414 | vm_page_wakeup(m); | |
1415 | ||
1416 | return m; | |
1417 | } | |
1418 | ||
4107b0c0 MD |
1419 | /* |
1420 | * Allocate a page table entry for a va. | |
1421 | * | |
1422 | * The caller must hold vm_token. | |
1423 | */ | |
984263bc | 1424 | static vm_page_t |
840de426 | 1425 | pmap_allocpte(pmap_t pmap, vm_offset_t va) |
984263bc MD |
1426 | { |
1427 | unsigned ptepindex; | |
1428 | vm_offset_t ptepa; | |
b1482674 | 1429 | vm_page_t mpte; |
984263bc | 1430 | |
b12defdc MD |
1431 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); |
1432 | ||
984263bc MD |
1433 | /* |
1434 | * Calculate pagetable page index | |
1435 | */ | |
1436 | ptepindex = va >> PDRSHIFT; | |
1437 | ||
1438 | /* | |
1439 | * Get the page directory entry | |
1440 | */ | |
1441 | ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; | |
1442 | ||
1443 | /* | |
1444 | * This supports switching from a 4MB page to a | |
1445 | * normal 4K page. | |
1446 | */ | |
1447 | if (ptepa & PG_PS) { | |
1448 | pmap->pm_pdir[ptepindex] = 0; | |
1449 | ptepa = 0; | |
0f7a3396 | 1450 | smp_invltlb(); |
54341a3b | 1451 | cpu_invltlb(); |
984263bc MD |
1452 | } |
1453 | ||
1454 | /* | |
1455 | * If the page table page is mapped, we just increment the | |
90244566 | 1456 | * wire count, and activate it. |
984263bc MD |
1457 | */ |
1458 | if (ptepa) { | |
1459 | /* | |
1460 | * In order to get the page table page, try the | |
1461 | * hint first. | |
1462 | */ | |
b1482674 MD |
1463 | if ((mpte = pmap->pm_ptphint) != NULL && |
1464 | (mpte->pindex == ptepindex) && | |
1465 | (mpte->flags & PG_BUSY) == 0) { | |
1466 | vm_page_wire_quick(mpte); | |
984263bc | 1467 | } else { |
b1482674 MD |
1468 | mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); |
1469 | pmap->pm_ptphint = mpte; | |
1470 | vm_page_wire_quick(mpte); | |
1471 | vm_page_wakeup(mpte); | |
984263bc | 1472 | } |
b1482674 | 1473 | return mpte; |
984263bc MD |
1474 | } |
1475 | /* | |
1476 | * Here if the pte page isn't mapped, or if it has been deallocated. | |
1477 | */ | |
1478 | return _pmap_allocpte(pmap, ptepindex); | |
1479 | } | |
1480 | ||
1481 | ||
1482 | /*************************************************** | |
1f804340 | 1483 | * Pmap allocation/deallocation routines. |
984263bc MD |
1484 | ***************************************************/ |
1485 | ||
1486 | /* | |
1487 | * Release any resources held by the given physical map. | |
1488 | * Called when a pmap initialized by pmap_pinit is being released. | |
1489 | * Should only be called if the map contains no valid mappings. | |
4107b0c0 | 1490 | * |
b12defdc | 1491 | * Caller must hold pmap->pm_token |
984263bc | 1492 | */ |
1f804340 MD |
1493 | static int pmap_release_callback(struct vm_page *p, void *data); |
1494 | ||
984263bc | 1495 | void |
840de426 | 1496 | pmap_release(struct pmap *pmap) |
984263bc | 1497 | { |
984263bc | 1498 | vm_object_t object = pmap->pm_pteobj; |
1f804340 | 1499 | struct rb_vm_page_scan_info info; |
984263bc | 1500 | |
4107b0c0 MD |
1501 | KASSERT(pmap->pm_active == 0, |
1502 | ("pmap still active! %08x", pmap->pm_active)); | |
984263bc MD |
1503 | #if defined(DIAGNOSTIC) |
1504 | if (object->ref_count != 1) | |
1505 | panic("pmap_release: pteobj reference count != 1"); | |
1506 | #endif | |
1507 | ||
1f804340 MD |
1508 | info.pmap = pmap; |
1509 | info.object = object; | |
b12defdc MD |
1510 | |
1511 | spin_lock(&pmap_spin); | |
54a764e8 | 1512 | TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); |
b12defdc | 1513 | spin_unlock(&pmap_spin); |
1f804340 | 1514 | |
b12defdc | 1515 | vm_object_hold(object); |
1f804340 | 1516 | do { |
1f804340 MD |
1517 | info.error = 0; |
1518 | info.mpte = NULL; | |
1519 | info.limit = object->generation; | |
1520 | ||
1521 | vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, | |
1522 | pmap_release_callback, &info); | |
1523 | if (info.error == 0 && info.mpte) { | |
1524 | if (!pmap_release_free_page(pmap, info.mpte)) | |
1525 | info.error = 1; | |
984263bc | 1526 | } |
1f804340 | 1527 | } while (info.error); |
2f2d9e58 | 1528 | vm_object_drop(object); |
b12defdc MD |
1529 | |
1530 | pmap->pm_cached = 0; | |
1f804340 MD |
1531 | } |
1532 | ||
4107b0c0 MD |
1533 | /* |
1534 | * The caller must hold vm_token. | |
1535 | */ | |
1f804340 MD |
1536 | static int |
1537 | pmap_release_callback(struct vm_page *p, void *data) | |
1538 | { | |
1539 | struct rb_vm_page_scan_info *info = data; | |
1540 | ||
1541 | if (p->pindex == PTDPTDI) { | |
1542 | info->mpte = p; | |
1543 | return(0); | |
344ad853 | 1544 | } |
1f804340 MD |
1545 | if (!pmap_release_free_page(info->pmap, p)) { |
1546 | info->error = 1; | |
1547 | return(-1); | |
1548 | } | |
1549 | if (info->object->generation != info->limit) { | |
1550 | info->error = 1; | |
1551 | return(-1); | |
1552 | } | |
1553 | return(0); | |
984263bc | 1554 | } |
984263bc MD |
1555 | |
1556 | /* | |
0e5797fe | 1557 | * Grow the number of kernel page table entries, if needed. |
4107b0c0 MD |
1558 | * |
1559 | * No requirements. | |
984263bc MD |
1560 | */ |
1561 | void | |
a8cf2878 | 1562 | pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) |
984263bc | 1563 | { |
a8cf2878 | 1564 | vm_offset_t addr = kend; |
54a764e8 | 1565 | struct pmap *pmap; |
984263bc MD |
1566 | vm_offset_t ptppaddr; |
1567 | vm_page_t nkpg; | |
1568 | pd_entry_t newpdir; | |
1569 | ||
b12defdc | 1570 | vm_object_hold(kptobj); |
984263bc MD |
1571 | if (kernel_vm_end == 0) { |
1572 | kernel_vm_end = KERNBASE; | |
1573 | nkpt = 0; | |
1574 | while (pdir_pde(PTD, kernel_vm_end)) { | |
4107b0c0 MD |
1575 | kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & |
1576 | ~(PAGE_SIZE * NPTEPG - 1); | |
984263bc MD |
1577 | nkpt++; |
1578 | } | |
1579 | } | |
1580 | addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); | |
1581 | while (kernel_vm_end < addr) { | |
1582 | if (pdir_pde(PTD, kernel_vm_end)) { | |
4107b0c0 MD |
1583 | kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & |
1584 | ~(PAGE_SIZE * NPTEPG - 1); | |
984263bc MD |
1585 | continue; |
1586 | } | |
1587 | ||
1588 | /* | |
1589 | * This index is bogus, but out of the way | |
1590 | */ | |
4107b0c0 MD |
1591 | nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_NORMAL | |
1592 | VM_ALLOC_SYSTEM | | |
1593 | VM_ALLOC_INTERRUPT); | |
dc1fd4b3 | 1594 | if (nkpg == NULL) |
984263bc MD |
1595 | panic("pmap_growkernel: no memory to grow kernel"); |
1596 | ||
984263bc MD |
1597 | vm_page_wire(nkpg); |
1598 | ptppaddr = VM_PAGE_TO_PHYS(nkpg); | |
1599 | pmap_zero_page(ptppaddr); | |
1600 | newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); | |
1601 | pdir_pde(PTD, kernel_vm_end) = newpdir; | |
fbbaeba3 | 1602 | *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; |
0e5797fe MD |
1603 | nkpt++; |
1604 | ||
1605 | /* | |
54a764e8 | 1606 | * This update must be interlocked with pmap_pinit2. |
0e5797fe | 1607 | */ |
b12defdc | 1608 | spin_lock(&pmap_spin); |
54a764e8 MD |
1609 | TAILQ_FOREACH(pmap, &pmap_list, pm_pmnode) { |
1610 | *pmap_pde(pmap, kernel_vm_end) = newpdir; | |
1611 | } | |
b12defdc | 1612 | spin_unlock(&pmap_spin); |
54a764e8 MD |
1613 | kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & |
1614 | ~(PAGE_SIZE * NPTEPG - 1); | |
984263bc | 1615 | } |
b12defdc | 1616 | vm_object_drop(kptobj); |
984263bc MD |
1617 | } |
1618 | ||
984263bc | 1619 | /* |
4107b0c0 MD |
1620 | * Add a reference to the specified pmap. |
1621 | * | |
1622 | * No requirements. | |
984263bc MD |
1623 | */ |
1624 | void | |
840de426 | 1625 | pmap_reference(pmap_t pmap) |
984263bc | 1626 | { |
4107b0c0 MD |
1627 | if (pmap) { |
1628 | lwkt_gettoken(&vm_token); | |
1629 | ++pmap->pm_count; | |
1630 | lwkt_reltoken(&vm_token); | |
984263bc MD |
1631 | } |
1632 | } | |
1633 | ||
3321ee05 MD |
1634 | /* |
1635 | * vm_token must be held | |
1636 | */ | |
1637 | static | |
1638 | void | |
1639 | pmap_hold(pmap_t pmap) | |
1640 | { | |
1641 | ++pmap->pm_count; | |
1642 | } | |
1643 | ||
1644 | /* | |
1645 | * vm_token must be held | |
1646 | */ | |
1647 | static | |
1648 | void | |
1649 | pmap_drop(pmap_t pmap) | |
1650 | { | |
1651 | --pmap->pm_count; | |
22c4e116 | 1652 | if (pmap->pm_count == (int)0x80000000) |
3321ee05 MD |
1653 | wakeup(pmap); |
1654 | } | |
1655 | ||
1656 | static | |
1657 | void | |
1658 | pmap_wait(pmap_t pmap, int count) | |
1659 | { | |
1660 | lwkt_gettoken(&vm_token); | |
1661 | pmap->pm_count += count; | |
1662 | if (pmap->pm_count & 0x7FFFFFFF) { | |
1663 | while (pmap->pm_count & 0x7FFFFFFF) { | |
1664 | pmap->pm_count |= 0x80000000; | |
1665 | tsleep(pmap, 0, "pmapd", 0); | |
1666 | pmap->pm_count &= ~0x80000000; | |
1667 | kprintf("pmap_wait: race averted\n"); | |
1668 | } | |
1669 | } | |
1670 | lwkt_reltoken(&vm_token); | |
1671 | } | |
1672 | ||
984263bc | 1673 | /*************************************************** |
4107b0c0 | 1674 | * page management routines. |
984263bc MD |
1675 | ***************************************************/ |
1676 | ||
1677 | /* | |
8a8d5d85 MD |
1678 | * free the pv_entry back to the free list. This function may be |
1679 | * called from an interrupt. | |
4107b0c0 MD |
1680 | * |
1681 | * The caller must hold vm_token. | |
984263bc MD |
1682 | */ |
1683 | static PMAP_INLINE void | |
840de426 | 1684 | free_pv_entry(pv_entry_t pv) |
984263bc | 1685 | { |
2bb9cc6f MD |
1686 | struct mdglobaldata *gd; |
1687 | ||
5926987a MD |
1688 | #ifdef PMAP_DEBUG |
1689 | KKASSERT(pv->pv_m != NULL); | |
1690 | pv->pv_m = NULL; | |
1691 | #endif | |
2bb9cc6f | 1692 | gd = mdcpu; |
984263bc | 1693 | pv_entry_count--; |
2bb9cc6f MD |
1694 | if (gd->gd_freepv == NULL) |
1695 | gd->gd_freepv = pv; | |
1696 | else | |
1697 | zfree(pvzone, pv); | |
984263bc MD |
1698 | } |
1699 | ||
1700 | /* | |
1701 | * get a new pv_entry, allocating a block from the system | |
2bb9cc6f MD |
1702 | * when needed. This function may be called from an interrupt thread. |
1703 | * | |
1704 | * THIS FUNCTION CAN BLOCK ON THE ZALLOC TOKEN, serialization of other | |
1705 | * tokens (aka vm_token) to be temporarily lost. | |
4107b0c0 MD |
1706 | * |
1707 | * The caller must hold vm_token. | |
984263bc MD |
1708 | */ |
1709 | static pv_entry_t | |
1710 | get_pv_entry(void) | |
1711 | { | |
2bb9cc6f MD |
1712 | struct mdglobaldata *gd; |
1713 | pv_entry_t pv; | |
1714 | ||
984263bc MD |
1715 | pv_entry_count++; |
1716 | if (pv_entry_high_water && | |
20479584 MD |
1717 | (pv_entry_count > pv_entry_high_water) && |
1718 | (pmap_pagedaemon_waken == 0)) { | |
984263bc MD |
1719 | pmap_pagedaemon_waken = 1; |
1720 | wakeup (&vm_pages_needed); | |
1721 | } | |
2bb9cc6f MD |
1722 | gd = mdcpu; |
1723 | if ((pv = gd->gd_freepv) != NULL) | |
1724 | gd->gd_freepv = NULL; | |
1725 | else | |
1726 | pv = zalloc(pvzone); | |
1727 | return pv; | |
984263bc MD |
1728 | } |
1729 | ||
1730 | /* | |
1731 | * This routine is very drastic, but can save the system | |
1732 | * in a pinch. | |
4107b0c0 MD |
1733 | * |
1734 | * No requirements. | |
984263bc MD |
1735 | */ |
1736 | void | |
840de426 | 1737 | pmap_collect(void) |
984263bc MD |
1738 | { |
1739 | int i; | |
1740 | vm_page_t m; | |
1741 | static int warningdone=0; | |
1742 | ||
1743 | if (pmap_pagedaemon_waken == 0) | |
1744 | return; | |
4107b0c0 | 1745 | lwkt_gettoken(&vm_token); |
20479584 | 1746 | pmap_pagedaemon_waken = 0; |
984263bc MD |
1747 | |
1748 | if (warningdone < 5) { | |
948209ce MD |
1749 | kprintf("pmap_collect: collecting pv entries -- " |
1750 | "suggest increasing PMAP_SHPGPERPROC\n"); | |
984263bc MD |
1751 | warningdone++; |
1752 | } | |
1753 | ||
b12defdc | 1754 | for (i = 0; i < vm_page_array_size; i++) { |
984263bc | 1755 | m = &vm_page_array[i]; |
b12defdc | 1756 | if (m->wire_count || m->hold_count) |
984263bc | 1757 | continue; |
b12defdc MD |
1758 | if (vm_page_busy_try(m, TRUE) == 0) { |
1759 | if (m->wire_count == 0 && m->hold_count == 0) { | |
1760 | pmap_remove_all(m); | |
1761 | } | |
1762 | vm_page_wakeup(m); | |
4107b0c0 | 1763 | } |
984263bc | 1764 | } |
4107b0c0 | 1765 | lwkt_reltoken(&vm_token); |
984263bc MD |
1766 | } |
1767 | ||
1768 | ||
1769 | /* | |
b1482674 MD |
1770 | * Remove the pv entry and unwire the page table page related to the |
1771 | * pte the caller has cleared from the page table. | |
4107b0c0 MD |
1772 | * |
1773 | * The caller must hold vm_token. | |
984263bc | 1774 | */ |
554cf9ac | 1775 | static void |
0f7a3396 | 1776 | pmap_remove_entry(struct pmap *pmap, vm_page_t m, |
4107b0c0 | 1777 | vm_offset_t va, pmap_inval_info_t info) |
984263bc MD |
1778 | { |
1779 | pv_entry_t pv; | |
984263bc | 1780 | |
b1482674 MD |
1781 | /* |
1782 | * Cannot block | |
1783 | */ | |
4107b0c0 | 1784 | ASSERT_LWKT_TOKEN_HELD(&vm_token); |
984263bc MD |
1785 | if (m->md.pv_list_count < pmap->pm_stats.resident_count) { |
1786 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
1787 | if (pmap == pv->pv_pmap && va == pv->pv_va) | |
1788 | break; | |
1789 | } | |
1790 | } else { | |
1791 | TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { | |
5926987a MD |
1792 | #ifdef PMAP_DEBUG |
1793 | KKASSERT(pv->pv_pmap == pmap); | |
1794 | #endif | |
1795 | if (va == pv->pv_va) | |
984263bc MD |
1796 | break; |
1797 | } | |
1798 | } | |
5926987a | 1799 | KKASSERT(pv); |
984263bc | 1800 | |
b1482674 MD |
1801 | /* |
1802 | * Cannot block | |
1803 | */ | |
5926987a MD |
1804 | test_m_maps_pv(m, pv); |
1805 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); | |
1806 | m->md.pv_list_count--; | |
cef01e15 MD |
1807 | if (m->object) |
1808 | atomic_add_int(&m->object->agg_pv_list_count, -1); | |
5926987a MD |
1809 | if (TAILQ_EMPTY(&m->md.pv_list)) |
1810 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); | |
1811 | TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); | |
1812 | ++pmap->pm_generation; | |
b1482674 MD |
1813 | |
1814 | /* | |
1815 | * This can block. | |
1816 | */ | |
b12defdc | 1817 | vm_object_hold(pmap->pm_pteobj); |
554cf9ac | 1818 | pmap_unuse_pt(pmap, va, pv->pv_ptem, info); |
b12defdc | 1819 | vm_object_drop(pmap->pm_pteobj); |
5926987a | 1820 | free_pv_entry(pv); |
984263bc MD |
1821 | } |
1822 | ||
1823 | /* | |
4107b0c0 MD |
1824 | * Create a pv entry for page at pa for (pmap, va). |
1825 | * | |
1826 | * The caller must hold vm_token. | |
984263bc MD |
1827 | */ |
1828 | static void | |
2bb9cc6f MD |
1829 | pmap_insert_entry(pmap_t pmap, pv_entry_t pv, vm_offset_t va, |
1830 | vm_page_t mpte, vm_page_t m) | |
984263bc | 1831 | { |
5926987a MD |
1832 | #ifdef PMAP_DEBUG |
1833 | KKASSERT(pv->pv_m == NULL); | |
1834 | pv->pv_m = m; | |
1835 | #endif | |
984263bc MD |
1836 | pv->pv_va = va; |
1837 | pv->pv_pmap = pmap; | |
1838 | pv->pv_ptem = mpte; | |
1839 | ||
1840 | TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); | |
1841 | TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); | |
5926987a | 1842 | ++pmap->pm_generation; |
984263bc | 1843 | m->md.pv_list_count++; |
cef01e15 MD |
1844 | if (m->object) |
1845 | atomic_add_int(&m->object->agg_pv_list_count, 1); | |
984263bc MD |
1846 | } |
1847 | ||
1848 | /* | |
5926987a MD |
1849 | * pmap_remove_pte: do the things to unmap a page in a process. |
1850 | * | |
4107b0c0 MD |
1851 | * The caller must hold vm_token. |
1852 | * | |
1853 | * WARNING! As with most other pmap functions this one can block, so | |
1854 | * callers using temporary page table mappings must reload | |
1855 | * them. | |
984263bc | 1856 | */ |
554cf9ac | 1857 | static void |
0f7a3396 | 1858 | pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va, |
5926987a | 1859 | pmap_inval_info_t info) |
984263bc MD |
1860 | { |
1861 | unsigned oldpte; | |
1862 | vm_page_t m; | |
1863 | ||
5926987a | 1864 | ptbase_assert(pmap); |
c2fb025d | 1865 | pmap_inval_interlock(info, pmap, va); |
5926987a | 1866 | ptbase_assert(pmap); |
984263bc MD |
1867 | oldpte = loadandclear(ptq); |
1868 | if (oldpte & PG_W) | |
1869 | pmap->pm_stats.wired_count -= 1; | |
c2fb025d | 1870 | pmap_inval_deinterlock(info, pmap); |
90244566 | 1871 | KKASSERT(oldpte & PG_V); |
984263bc MD |
1872 | /* |
1873 | * Machines that don't support invlpg, also don't support | |
0f7a3396 MD |
1874 | * PG_G. XXX PG_G is disabled for SMP so don't worry about |
1875 | * the SMP case. | |
984263bc MD |
1876 | */ |
1877 | if (oldpte & PG_G) | |
41a01a4d | 1878 | cpu_invlpg((void *)va); |
eec2b734 MD |
1879 | KKASSERT(pmap->pm_stats.resident_count > 0); |
1880 | --pmap->pm_stats.resident_count; | |
984263bc MD |
1881 | if (oldpte & PG_MANAGED) { |
1882 | m = PHYS_TO_VM_PAGE(oldpte); | |
1883 | if (oldpte & PG_M) { | |
1884 | #if defined(PMAP_DIAGNOSTIC) | |
1885 | if (pmap_nw_modified((pt_entry_t) oldpte)) { | |
d557216f MD |
1886 | kprintf("pmap_remove: modified page not " |
1887 | "writable: va: %p, pte: 0x%lx\n", | |
1888 | (void *)va, (long)oldpte); | |
984263bc MD |
1889 | } |
1890 | #endif | |
1891 | if (pmap_track_modified(va)) | |
1892 | vm_page_dirty(m); | |
1893 | } | |
1894 | if (oldpte & PG_A) | |
1895 | vm_page_flag_set(m, PG_REFERENCED); | |
554cf9ac | 1896 | pmap_remove_entry(pmap, m, va, info); |
984263bc | 1897 | } else { |
554cf9ac | 1898 | pmap_unuse_pt(pmap, va, NULL, info); |
984263bc | 1899 | } |
984263bc MD |
1900 | } |
1901 | ||
1902 | /* | |
5926987a | 1903 | * Remove a single page from a process address space. |
e0e69b7d | 1904 | * |
4107b0c0 | 1905 | * The caller must hold vm_token. |
984263bc MD |
1906 | */ |
1907 | static void | |
0f7a3396 | 1908 | pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info) |
984263bc | 1909 | { |
840de426 | 1910 | unsigned *ptq; |
984263bc MD |
1911 | |
1912 | /* | |
90244566 | 1913 | * If there is no pte for this address, just skip it!!! Otherwise |
e0e69b7d | 1914 | * get a local va for mappings for this pmap and remove the entry. |
984263bc | 1915 | */ |
e0e69b7d MD |
1916 | if (*pmap_pde(pmap, va) != 0) { |
1917 | ptq = get_ptbase(pmap) + i386_btop(va); | |
1918 | if (*ptq) { | |
0f7a3396 | 1919 | pmap_remove_pte(pmap, ptq, va, info); |
5926987a | 1920 | /* ptq invalid */ |
e0e69b7d | 1921 | } |
984263bc | 1922 | } |
984263bc MD |
1923 | } |
1924 | ||
1925 | /* | |
4107b0c0 | 1926 | * Remove the given range of addresses from the specified map. |
984263bc | 1927 | * |
4107b0c0 MD |
1928 | * It is assumed that the start and end are properly rounded to the page |
1929 | * size. | |
e0e69b7d | 1930 | * |
4107b0c0 | 1931 | * No requirements. |
984263bc MD |
1932 | */ |
1933 | void | |
840de426 | 1934 | pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) |
984263bc | 1935 | { |
840de426 | 1936 | unsigned *ptbase; |
984263bc MD |
1937 | vm_offset_t pdnxt; |
1938 | vm_offset_t ptpaddr; | |
1939 | vm_offset_t sindex, eindex; | |
0f7a3396 | 1940 | struct pmap_inval_info info; |
984263bc MD |
1941 | |
1942 | if (pmap == NULL) | |
1943 | return; | |
1944 | ||
b12defdc | 1945 | vm_object_hold(pmap->pm_pteobj); |
4107b0c0 MD |
1946 | lwkt_gettoken(&vm_token); |
1947 | if (pmap->pm_stats.resident_count == 0) { | |
1948 | lwkt_reltoken(&vm_token); | |
b12defdc | 1949 | vm_object_drop(pmap->pm_pteobj); |
984263bc | 1950 | return; |
4107b0c0 | 1951 | } |
984263bc | 1952 | |
0f7a3396 MD |
1953 | pmap_inval_init(&info); |
1954 | ||
984263bc MD |
1955 | /* |
1956 | * special handling of removing one page. a very | |
1957 | * common operation and easy to short circuit some | |
1958 | * code. | |
1959 | */ | |
1960 | if (((sva + PAGE_SIZE) == eva) && | |
1961 | (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { | |
0f7a3396 | 1962 | pmap_remove_page(pmap, sva, &info); |
c2fb025d | 1963 | pmap_inval_done(&info); |
4107b0c0 | 1964 | lwkt_reltoken(&vm_token); |
b12defdc | 1965 | vm_object_drop(pmap->pm_pteobj); |
984263bc MD |
1966 | return; |
1967 | } | |
1968 | ||
984263bc MD |
1969 | /* |
1970 | * Get a local virtual address for the mappings that are being | |
1971 | * worked with. | |
1972 | */ | |
984263bc MD |
1973 | sindex = i386_btop(sva); |
1974 | eindex = i386_btop(eva); | |
1975 | ||
554cf9ac | 1976 | while (sindex < eindex) { |
984263bc MD |
1977 | unsigned pdirindex; |
1978 | ||
1979 | /* | |
554cf9ac | 1980 | * Stop scanning if no pages are left |
984263bc | 1981 | */ |
984263bc MD |
1982 | if (pmap->pm_stats.resident_count == 0) |
1983 | break; | |
1984 | ||
554cf9ac MD |
1985 | /* |
1986 | * Calculate index for next page table, limited by eindex. | |
1987 | */ | |
1988 | pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); | |
1989 | if (pdnxt > eindex) | |
1990 | pdnxt = eindex; | |
1991 | ||
984263bc | 1992 | pdirindex = sindex / NPDEPG; |
554cf9ac MD |
1993 | ptpaddr = (unsigned)pmap->pm_pdir[pdirindex]; |
1994 | if (ptpaddr & PG_PS) { | |
c2fb025d | 1995 | pmap_inval_interlock(&info, pmap, -1); |
984263bc MD |
1996 | pmap->pm_pdir[pdirindex] = 0; |
1997 | pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; | |
be3aecf7 | 1998 | pmap->pm_cached = 0; |
c2fb025d | 1999 | pmap_inval_deinterlock(&info, pmap); |
554cf9ac | 2000 | sindex = pdnxt; |
984263bc MD |
2001 | continue; |
2002 | } | |
2003 | ||
2004 | /* | |
2005 | * Weed out invalid mappings. Note: we assume that the page | |
2006 | * directory table is always allocated, and in kernel virtual. | |
2007 | */ | |
554cf9ac MD |
2008 | if (ptpaddr == 0) { |
2009 | sindex = pdnxt; | |
984263bc | 2010 | continue; |
984263bc MD |
2011 | } |
2012 | ||
8790d7d8 | 2013 | /* |
554cf9ac MD |
2014 | * Sub-scan the page table page. pmap_remove_pte() can |
2015 | * block on us, invalidating ptbase, so we must reload | |
2016 | * ptbase and we must also check whether the page directory | |
2017 | * page is still present. | |
8790d7d8 | 2018 | */ |
554cf9ac | 2019 | while (sindex < pdnxt) { |
984263bc | 2020 | vm_offset_t va; |
8790d7d8 MD |
2021 | |
2022 | ptbase = get_ptbase(pmap); | |
554cf9ac MD |
2023 | if (ptbase[sindex]) { |
2024 | va = i386_ptob(sindex); | |
2025 | pmap_remove_pte(pmap, ptbase + sindex, | |
2026 | va, &info); | |
2027 | } | |
2028 | if (pmap->pm_pdir[pdirindex] == 0 || | |
2029 | (pmap->pm_pdir[pdirindex] & PG_PS)) { | |
984263bc | 2030 | break; |
554cf9ac MD |
2031 | } |
2032 | ++sindex; | |
984263bc MD |
2033 | } |
2034 | } | |
c2fb025d | 2035 | pmap_inval_done(&info); |
4107b0c0 | 2036 | lwkt_reltoken(&vm_token); |
b12defdc | 2037 | vm_object_drop(pmap->pm_pteobj); |
984263bc MD |
2038 | } |
2039 | ||
2040 | /* | |
4107b0c0 MD |
2041 | * Removes this physical page from all physical maps in which it resides. |
2042 | * Reflects back modify bits to the pager. | |
984263bc | 2043 | * |
3321ee05 | 2044 | * vm_token must be held by caller. |
984263bc | 2045 | */ |
984263bc | 2046 | static void |
840de426 | 2047 | pmap_remove_all(vm_page_t m) |
984263bc | 2048 | { |
0f7a3396 | 2049 | struct pmap_inval_info info; |
840de426 | 2050 | unsigned *pte, tpte; |
0f7a3396 | 2051 | pv_entry_t pv; |
3321ee05 | 2052 | pmap_t pmap; |
984263bc | 2053 | |
bee81bdd SS |
2054 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) |
2055 | return; | |
bcc6a8ac MD |
2056 | if (TAILQ_EMPTY(&m->md.pv_list)) |
2057 | return; | |
984263bc | 2058 | |
3c3589cf | 2059 | lwkt_gettoken(&vm_token); |
0f7a3396 | 2060 | pmap_inval_init(&info); |
984263bc | 2061 | while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { |
3321ee05 MD |
2062 | pmap = pv->pv_pmap; |
2063 | KKASSERT(pmap->pm_stats.resident_count > 0); | |
2064 | --pmap->pm_stats.resident_count; | |
2065 | pmap_hold(pmap); | |
984263bc | 2066 | |
3321ee05 MD |
2067 | pte = pmap_pte_quick(pmap, pv->pv_va); |
2068 | pmap_inval_interlock(&info, pmap, pv->pv_va); | |
984263bc MD |
2069 | tpte = loadandclear(pte); |
2070 | if (tpte & PG_W) | |
3321ee05 MD |
2071 | pmap->pm_stats.wired_count--; |
2072 | pmap_inval_deinterlock(&info, pmap); | |
984263bc MD |
2073 | if (tpte & PG_A) |
2074 | vm_page_flag_set(m, PG_REFERENCED); | |
c2fb025d | 2075 | KKASSERT(PHYS_TO_VM_PAGE(tpte) == m); |
984263bc MD |
2076 | |
2077 | /* | |
2078 | * Update the vm_page_t clean and reference bits. | |
2079 | */ | |
2080 | if (tpte & PG_M) { | |
2081 | #if defined(PMAP_DIAGNOSTIC) | |
2082 | if (pmap_nw_modified((pt_entry_t) tpte)) { | |
d557216f MD |
2083 | kprintf("pmap_remove_all: modified page " |
2084 | "not writable: va: %p, pte: 0x%lx\n", | |
2085 | (void *)pv->pv_va, (long)tpte); | |
984263bc MD |
2086 | } |
2087 | #endif | |
2088 | if (pmap_track_modified(pv->pv_va)) | |
2089 | vm_page_dirty(m); | |
2090 | } | |
5926987a MD |
2091 | #ifdef PMAP_DEBUG |
2092 | KKASSERT(pv->pv_m == m); | |
2093 | #endif | |
2bb9cc6f | 2094 | KKASSERT(pv == TAILQ_FIRST(&m->md.pv_list)); |
984263bc | 2095 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); |
3321ee05 MD |
2096 | TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); |
2097 | ++pmap->pm_generation; | |
984263bc | 2098 | m->md.pv_list_count--; |
cef01e15 MD |
2099 | if (m->object) |
2100 | atomic_add_int(&m->object->agg_pv_list_count, -1); | |
17cde63e MD |
2101 | if (TAILQ_EMPTY(&m->md.pv_list)) |
2102 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); | |
3321ee05 MD |
2103 | vm_object_hold(pmap->pm_pteobj); |
2104 | pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem, &info); | |
2105 | vm_object_drop(pmap->pm_pteobj); | |
984263bc | 2106 | free_pv_entry(pv); |
3321ee05 | 2107 | pmap_drop(pmap); |
984263bc | 2108 | } |
17cde63e | 2109 | KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); |
c2fb025d | 2110 | pmap_inval_done(&info); |
3c3589cf | 2111 | lwkt_reltoken(&vm_token); |
984263bc MD |
2112 | } |
2113 | ||
2114 | /* | |
4107b0c0 MD |
2115 | * Set the physical protection on the specified range of this map |
2116 | * as requested. | |
e0e69b7d | 2117 | * |
4107b0c0 | 2118 | * No requirements. |
984263bc MD |
2119 | */ |
2120 | void | |
2121 | pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) | |
2122 | { | |
840de426 | 2123 | unsigned *ptbase; |
984263bc MD |
2124 | vm_offset_t pdnxt, ptpaddr; |
2125 | vm_pindex_t sindex, eindex; | |
0f7a3396 | 2126 | pmap_inval_info info; |
984263bc MD |
2127 | |
2128 | if (pmap == NULL) | |
2129 | return; | |
2130 | ||
2131 | if ((prot & VM_PROT_READ) == VM_PROT_NONE) { | |
2132 | pmap_remove(pmap, sva, eva); | |
2133 | return; | |
2134 | } | |
2135 | ||
2136 | if (prot & VM_PROT_WRITE) | |
2137 | return; | |
2138 | ||
4107b0c0 | 2139 | lwkt_gettoken(&vm_token); |
0f7a3396 | 2140 | pmap_inval_init(&info); |
984263bc MD |
2141 | |
2142 | ptbase = get_ptbase(pmap); | |
2143 | ||
2144 | sindex = i386_btop(sva); | |
2145 | eindex = i386_btop(eva); | |
2146 | ||
2147 | for (; sindex < eindex; sindex = pdnxt) { | |
984263bc MD |
2148 | unsigned pdirindex; |
2149 | ||
2150 | pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); | |
2151 | ||
2152 | pdirindex = sindex / NPDEPG; | |
2153 | if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { | |
c2fb025d | 2154 | pmap_inval_interlock(&info, pmap, -1); |
55f2596a | 2155 | pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); |
984263bc | 2156 | pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; |
c2fb025d | 2157 | pmap_inval_deinterlock(&info, pmap); |
984263bc MD |
2158 | continue; |
2159 | } | |
2160 | ||
2161 | /* | |
2162 | * Weed out invalid mappings. Note: we assume that the page | |
2163 | * directory table is always allocated, and in kernel virtual. | |
2164 | */ | |
2165 | if (ptpaddr == 0) | |
2166 | continue; | |
2167 | ||
2168 | if (pdnxt > eindex) { | |
2169 | pdnxt = eindex; | |
2170 | } | |
2171 | ||
2172 | for (; sindex != pdnxt; sindex++) { | |
984263bc | 2173 | unsigned pbits; |
c2fb025d | 2174 | unsigned cbits; |
984263bc MD |
2175 | vm_page_t m; |
2176 | ||
17cde63e | 2177 | /* |
d5b2d319 | 2178 | * XXX non-optimal. |
17cde63e | 2179 | */ |
c2fb025d MD |
2180 | pmap_inval_interlock(&info, pmap, i386_ptob(sindex)); |
2181 | again: | |
984263bc | 2182 | pbits = ptbase[sindex]; |
c2fb025d | 2183 | cbits = pbits; |
984263bc MD |
2184 | |
2185 | if (pbits & PG_MANAGED) { | |
2186 | m = NULL; | |
2187 | if (pbits & PG_A) { | |
2188 | m = PHYS_TO_VM_PAGE(pbits); | |
2189 | vm_page_flag_set(m, PG_REFERENCED); | |
c2fb025d | 2190 | cbits &= ~PG_A; |
984263bc MD |
2191 | } |
2192 | if (pbits & PG_M) { | |
2193 | if (pmap_track_modified(i386_ptob(sindex))) { | |
2194 | if (m == NULL) | |
2195 | m = PHYS_TO_VM_PAGE(pbits); | |
2196 | vm_page_dirty(m); | |
c2fb025d | 2197 | cbits &= ~PG_M; |
984263bc MD |
2198 | } |
2199 | } | |
2200 | } | |
c2fb025d MD |
2201 | cbits &= ~PG_RW; |
2202 | if (pbits != cbits && | |
2203 | !atomic_cmpset_int(ptbase + sindex, pbits, cbits)) { | |
2204 | goto again; | |
984263bc | 2205 | } |
c2fb025d | 2206 | pmap_inval_deinterlock(&info, pmap); |
984263bc MD |
2207 | } |
2208 | } | |
c2fb025d | 2209 | pmap_inval_done(&info); |
4107b0c0 | 2210 | lwkt_reltoken(&vm_token); |
984263bc MD |
2211 | } |
2212 | ||
2213 | /* | |
4107b0c0 MD |
2214 | * Insert the given physical page (p) at the specified virtual address (v) |
2215 | * in the target physical map with the protection requested. | |
984263bc | 2216 | * |
4107b0c0 MD |
2217 | * If specified, the page will be wired down, meaning that the related pte |
2218 | * cannot be reclaimed. | |
984263bc | 2219 | * |
4107b0c0 | 2220 | * No requirements. |
984263bc MD |
2221 | */ |
2222 | void | |
2223 | pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | |
921c891e | 2224 | boolean_t wired, vm_map_entry_t entry __unused) |
984263bc | 2225 | { |
6ef943a3 | 2226 | vm_paddr_t pa; |
840de426 | 2227 | unsigned *pte; |
6ef943a3 | 2228 | vm_paddr_t opa; |
984263bc MD |
2229 | vm_offset_t origpte, newpte; |
2230 | vm_page_t mpte; | |
0f7a3396 | 2231 | pmap_inval_info info; |
2bb9cc6f | 2232 | pv_entry_t pv; |
984263bc MD |
2233 | |
2234 | if (pmap == NULL) | |
2235 | return; | |
2236 | ||
2237 | va &= PG_FRAME; | |
2238 | #ifdef PMAP_DIAGNOSTIC | |
c439ad8f | 2239 | if (va >= KvaEnd) |
984263bc | 2240 | panic("pmap_enter: toobig"); |
d557216f MD |
2241 | if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) { |
2242 | panic("pmap_enter: invalid to pmap_enter page " | |
2243 | "table pages (va: %p)", (void *)va); | |
2244 | } | |
984263bc | 2245 | #endif |
fbbaeba3 MD |
2246 | if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) { |
2247 | kprintf("Warning: pmap_enter called on UVA with kernel_pmap\n"); | |
7ce2998e | 2248 | print_backtrace(-1); |
fbbaeba3 MD |
2249 | } |
2250 | if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) { | |
2251 | kprintf("Warning: pmap_enter called on KVA without kernel_pmap\n"); | |
7ce2998e | 2252 | print_backtrace(-1); |
fbbaeba3 | 2253 | } |
984263bc | 2254 | |
b12defdc | 2255 | vm_object_hold(pmap->pm_pteobj); |
4107b0c0 MD |
2256 | lwkt_gettoken(&vm_token); |
2257 | ||
2bb9cc6f MD |
2258 | /* |
2259 | * This can block, get it before we do anything important. | |
2260 | */ | |
2261 | if (pmap_initialized && | |
2262 | (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { | |
2263 | pv = get_pv_entry(); | |
2264 | } else { | |
2265 | pv = NULL; | |
2266 | } | |
2267 | ||
984263bc MD |
2268 | /* |
2269 | * In the case that a page table page is not | |
2270 | * resident, we are creating it here. | |
2271 | */ | |
17cde63e | 2272 | if (va < UPT_MIN_ADDRESS) |
984263bc | 2273 | mpte = pmap_allocpte(pmap, va); |
17cde63e MD |
2274 | else |
2275 | mpte = NULL; | |
984263bc | 2276 | |
b12defdc MD |
2277 | if ((prot & VM_PROT_NOSYNC) == 0) |
2278 | pmap_inval_init(&info); | |
984263bc MD |
2279 | pte = pmap_pte(pmap, va); |
2280 | ||
2281 | /* | |
2282 | * Page Directory table entry not valid, we need a new PT page | |
2283 | */ | |
2284 | if (pte == NULL) { | |
ed20d0e3 | 2285 | panic("pmap_enter: invalid page directory pdir=0x%lx, va=%p", |
d557216f | 2286 | (long)pmap->pm_pdir[PTDPTDI], (void *)va); |
984263bc MD |
2287 | } |
2288 | ||
2289 | pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; | |
2290 | origpte = *(vm_offset_t *)pte; | |
2291 | opa = origpte & PG_FRAME; | |
2292 | ||
2293 | if (origpte & PG_PS) | |
2294 | panic("pmap_enter: attempted pmap_enter on 4MB page"); | |
2295 | ||
2296 | /* | |
2297 | * Mapping has not changed, must be protection or wiring change. | |
2298 | */ | |
2299 | if (origpte && (opa == pa)) { | |
2300 | /* | |
2301 | * Wiring change, just update stats. We don't worry about | |
2302 | * wiring PT pages as they remain resident as long as there | |
2303 | * are valid mappings in them. Hence, if a user page is wired, | |
2304 | * the PT page will be also. | |
2305 | */ | |
2306 | if (wired && ((origpte & PG_W) == 0)) | |
2307 | pmap->pm_stats.wired_count++; | |
2308 | else if (!wired && (origpte & PG_W)) | |
2309 | pmap->pm_stats.wired_count--; | |
2310 | ||
2311 | #if defined(PMAP_DIAGNOSTIC) | |
2312 | if (pmap_nw_modified((pt_entry_t) origpte)) { | |
d557216f MD |
2313 | kprintf("pmap_enter: modified page not " |
2314 | "writable: va: %p, pte: 0x%lx\n", | |
2315 | (void *)va, (long )origpte); | |
984263bc MD |
2316 | } |
2317 | #endif | |
2318 | ||
984263bc MD |
2319 | /* |
2320 | * We might be turning off write access to the page, | |
2321 | * so we go ahead and sense modify status. | |
2322 | */ | |
2323 | if (origpte & PG_MANAGED) { | |
2324 | if ((origpte & PG_M) && pmap_track_modified(va)) { | |
2325 | vm_page_t om; | |
2326 | om = PHYS_TO_VM_PAGE(opa); | |
2327 | vm_page_dirty(om); | |
2328 | } | |
2329 | pa |= PG_MANAGED; | |
17cde63e | 2330 | KKASSERT(m->flags & PG_MAPPED); |
984263bc MD |
2331 | } |
2332 | goto validate; | |
2333 | } | |
2334 | /* | |
2335 | * Mapping has changed, invalidate old range and fall through to | |
2336 | * handle validating new mapping. | |
5926987a MD |
2337 | * |
2338 | * Since we have a ref on the page directory page pmap_pte() | |
2339 | * will always return non-NULL. | |
2340 | * | |
2341 | * NOTE: pmap_remove_pte() can block and cause the temporary ptbase | |
2342 | * to get wiped. reload the ptbase. I'm not sure if it is | |
2343 | * also possible to race another pmap_enter() but check for | |
2344 | * that case too. | |
984263bc | 2345 | */ |
5926987a | 2346 | while (opa) { |
5926987a MD |
2347 | KKASSERT((origpte & PG_FRAME) == |
2348 | (*(vm_offset_t *)pte & PG_FRAME)); | |
554cf9ac | 2349 | pmap_remove_pte(pmap, pte, va, &info); |
5926987a MD |
2350 | pte = pmap_pte(pmap, va); |
2351 | origpte = *(vm_offset_t *)pte; | |
2352 | opa = origpte & PG_FRAME; | |
2353 | if (opa) { | |
2354 | kprintf("pmap_enter: Warning, raced pmap %p va %p\n", | |
2355 | pmap, (void *)va); | |
2356 | } | |
984263bc MD |
2357 | } |
2358 | ||
2359 | /* | |
2360 | * Enter on the PV list if part of our managed memory. Note that we | |
2361 | * raise IPL while manipulating pv_table since pmap_enter can be | |
2362 | * called at interrupt time. | |
2363 | */ | |
2364 | if (pmap_initialized && | |
2365 | (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { | |
2bb9cc6f MD |
2366 | pmap_insert_entry(pmap, pv, va, mpte, m); |
2367 | pv = NULL; | |
5926987a | 2368 | ptbase_assert(pmap); |
984263bc | 2369 | pa |= PG_MANAGED; |
17cde63e | 2370 | vm_page_flag_set(m, PG_MAPPED); |
984263bc MD |
2371 | } |
2372 | ||
2373 | /* | |
2374 | * Increment counters | |
2375 | */ | |
eec2b734 | 2376 | ++pmap->pm_stats.resident_count; |
984263bc MD |
2377 | if (wired) |
2378 | pmap->pm_stats.wired_count++; | |
5926987a | 2379 | KKASSERT(*pte == 0); |
984263bc MD |
2380 | |
2381 | validate: | |
2382 | /* | |
2383 | * Now validate mapping with desired protection/wiring. | |
2384 | */ | |
5926987a | 2385 | ptbase_assert(pmap); |
984263bc MD |
2386 | newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); |
2387 | ||
2388 | if (wired) | |
2389 | newpte |= PG_W; | |
2390 | if (va < UPT_MIN_ADDRESS) | |
2391 | newpte |= PG_U; | |
fbbaeba3 | 2392 | if (pmap == &kernel_pmap) |
984263bc MD |
2393 | newpte |= pgeflag; |
2394 | ||
2395 | /* | |
2bb9cc6f MD |
2396 | * If the mapping or permission bits are different, we need |
2397 | * to update the pte. If the pte is already present we have | |
2398 | * to get rid of the extra wire-count on mpte we had obtained | |
2399 | * above. | |
b1482674 MD |
2400 | * |
2401 | * mpte has a new wire_count, which also serves to prevent the | |
2402 | * page table page from getting ripped out while we work. If we | |
2403 | * are modifying an existing pte instead of installing a new one | |
2404 | * we have to drop it. | |
984263bc MD |
2405 | */ |
2406 | if ((origpte & ~(PG_M|PG_A)) != newpte) { | |
b12defdc MD |
2407 | if (prot & VM_PROT_NOSYNC) |
2408 | cpu_invlpg((void *)va); | |
2409 | else | |
2410 | pmap_inval_interlock(&info, pmap, va); | |
5926987a | 2411 | ptbase_assert(pmap); |
2bb9cc6f MD |
2412 | |
2413 | if (*pte) { | |
2414 | KKASSERT((*pte & PG_FRAME) == (newpte & PG_FRAME)); | |
2415 | if (vm_page_unwire_quick(mpte)) | |
2416 | panic("pmap_enter: Insufficient wire_count"); | |
2417 | } | |
2418 | ||
984263bc | 2419 | *pte = newpte | PG_A; |
b12defdc MD |
2420 | if ((prot & VM_PROT_NOSYNC) == 0) |
2421 | pmap_inval_deinterlock(&info, pmap); | |
17cde63e MD |
2422 | if (newpte & PG_RW) |
2423 | vm_page_flag_set(m, PG_WRITEABLE); | |
b1482674 MD |
2424 | } else { |
2425 | if (*pte) { | |
2426 | KKASSERT((*pte & PG_FRAME) == (newpte & PG_FRAME)); | |
2427 | if (vm_page_unwire_quick(mpte)) | |
2428 | panic("pmap_enter: Insufficient wire_count"); | |
2429 | } | |
984263bc | 2430 | } |
b1482674 MD |
2431 | |
2432 | /* | |
2433 | * NOTE: mpte invalid after this point if we block. | |
2434 | */ | |
c695044a | 2435 | KKASSERT((newpte & PG_MANAGED) == 0 || (m->flags & PG_MAPPED)); |
b12defdc MD |
2436 | if ((prot & VM_PROT_NOSYNC) == 0) |
2437 | pmap_inval_done(&info); | |
2bb9cc6f MD |
2438 | if (pv) |
2439 | free_pv_entry(pv); | |
4107b0c0 | 2440 | lwkt_reltoken(&vm_token); |
b12defdc | 2441 | vm_object_drop(pmap->pm_pteobj); |
984263bc MD |
2442 | } |
2443 | ||
2444 | /* | |
17cde63e MD |
2445 | * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. |
2446 | * This code also assumes that the pmap has no pre-existing entry for this | |
2447 | * VA. | |
2448 | * | |
2449 | * This code currently may only be used on user pmaps, not kernel_pmap. | |
4107b0c0 MD |
2450 | * |
2451 | * No requirements. | |
984263bc | 2452 | */ |
1b9d3514 | 2453 | void |
17cde63e | 2454 | pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) |
984263bc MD |
2455 | { |
2456 | unsigned *pte; | |
6ef943a3 | 2457 | vm_paddr_t pa; |
17cde63e MD |
2458 | vm_page_t mpte; |
2459 | unsigned ptepindex; | |
2460 | vm_offset_t ptepa; | |
0f7a3396 | 2461 | pmap_inval_info info; |
2bb9cc6f | 2462 | pv_entry_t pv; |
0f7a3396 | 2463 | |
b12defdc | 2464 | vm_object_hold(pmap->pm_pteobj); |
4107b0c0 | 2465 | lwkt_gettoken(&vm_token); |
2bb9cc6f MD |
2466 | |
2467 | /* | |
2468 | * This can block, get it before we do anything important. | |
2469 | */ | |
2470 | if (pmap_initialized && | |
2471 | (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { | |
2472 | pv = get_pv_entry(); | |
2473 | } else { | |
2474 | pv = NULL; | |
2475 | } | |
2476 | ||
0f7a3396 | 2477 | pmap_inval_init(&info); |
984263bc | 2478 | |
fbbaeba3 MD |
2479 | if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) { |
2480 | kprintf("Warning: pmap_enter_quick called on UVA with kernel_pmap\n"); | |
7ce2998e | 2481 | print_backtrace(-1); |
fbbaeba3 MD |
2482 | } |
2483 | if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) { | |
2484 | kprintf("Warning: pmap_enter_quick called on KVA without kernel_pmap\n"); | |
7ce2998e | 2485 | print_backtrace(-1); |
fbbaeba3 MD |
2486 | } |
2487 | ||
17cde63e MD |
2488 | KKASSERT(va < UPT_MIN_ADDRESS); /* assert used on user pmaps only */ |
2489 | ||
984263bc | 2490 | /* |
17cde63e MD |
2491 | * Calculate the page table page (mpte), allocating it if necessary. |
2492 | * | |
2493 | * A held page table page (mpte), or NULL, is passed onto the | |
2494 | * section following. | |
984263bc MD |
2495 | */ |
2496 | if (va < UPT_MIN_ADDRESS) { | |
984263bc MD |
2497 | /* |
2498 | * Calculate pagetable page index | |
2499 | */ | |
2500 | ptepindex = va >> PDRSHIFT; | |
17cde63e MD |
2501 | |
2502 | do { | |
984263bc MD |
2503 | /* |
2504 | * Get the page directory entry | |
2505 | */ | |
2506 | ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; | |
2507 | ||
2508 | /* | |
2509 | * If the page table page is mapped, we just increment | |
90244566 | 2510 | * the wire count, and activate it. |
984263bc MD |
2511 | */ |
2512 | if (ptepa) { | |
2513 | if (ptepa & PG_PS) | |
2514 | panic("pmap_enter_quick: unexpected mapping into 4MB page"); | |
b1482674 MD |
2515 | if ((mpte = pmap->pm_ptphint) != NULL && |
2516 | (mpte->pindex == ptepindex) && | |
2517 | (mpte->flags & PG_BUSY) == 0) { | |
2bb9cc6f | 2518 | vm_page_wire_quick(mpte); |
984263bc | 2519 | } else { |
2bb9cc6f MD |
2520 | mpte = pmap_page_lookup(pmap->pm_pteobj, |
2521 | ptepindex); | |
984263bc | 2522 | pmap->pm_ptphint = mpte; |
2bb9cc6f | 2523 | vm_page_wire_quick(mpte); |
b12defdc | 2524 | vm_page_wakeup(mpte); |
984263bc | 2525 | } |
984263bc MD |
2526 | } else { |
2527 | mpte = _pmap_allocpte(pmap, ptepindex); | |
2528 | } | |
17cde63e | 2529 | } while (mpte == NULL); |
984263bc MD |
2530 | } else { |
2531 | mpte = NULL; | |
17cde63e | 2532 | /* this code path is not yet used */ |
984263bc MD |
2533 | } |
2534 | ||
2535 | /* | |
17cde63e MD |
2536 | * With a valid (and held) page directory page, we can just use |
2537 | * vtopte() to get to the pte. If the pte is already present | |
2538 | * we do not disturb it. | |
984263bc MD |
2539 | */ |
2540 | pte = (unsigned *)vtopte(va); | |
554cf9ac MD |
2541 | if (*pte) { |
2542 | KKASSERT(*pte & PG_V); | |
17cde63e MD |
2543 | pa = VM_PAGE_TO_PHYS(m); |
2544 | KKASSERT(((*pte ^ pa) & PG_FRAME) == 0); | |
c2fb025d | 2545 | pmap_inval_done(&info); |
b1482674 MD |
2546 | if (mpte) |
2547 | pmap_unwire_pte(pmap, mpte, &info); | |
b1482674 | 2548 | if (pv) { |
2bb9cc6f | 2549 | free_pv_entry(pv); |
b1482674 MD |
2550 | /* pv = NULL; */ |
2551 | } | |
554cf9ac MD |
2552 | lwkt_reltoken(&vm_token); |
2553 | vm_object_drop(pmap->pm_pteobj); | |
17cde63e | 2554 | return; |
984263bc MD |
2555 | } |
2556 | ||
2557 | /* | |
17cde63e | 2558 | * Enter on the PV list if part of our managed memory |
984263bc | 2559 | */ |
2bb9cc6f MD |
2560 | if (pmap_initialized && |
2561 | (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { | |
2562 | pmap_insert_entry(pmap, pv, va, mpte, m); | |
2563 | pv = NULL; | |
17cde63e MD |
2564 | vm_page_flag_set(m, PG_MAPPED); |
2565 | } | |
984263bc MD |
2566 | |
2567 | /* | |
2568 | * Increment counters | |
2569 | */ | |
eec2b734 | 2570 | ++pmap->pm_stats.resident_count; |
984263bc MD |
2571 | |
2572 | pa = VM_PAGE_TO_PHYS(m); | |
2573 | ||
2574 | /* | |
2575 | * Now validate mapping with RO protection | |
2576 | */ | |
2577 | if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) | |
2578 | *pte = pa | PG_V | PG_U; | |
2579 | else | |
2580 | *pte = pa | PG_V | PG_U | PG_MANAGED; | |
17cde63e | 2581 | /* pmap_inval_add(&info, pmap, va); shouldn't be needed inval->valid */ |
c2fb025d | 2582 | pmap_inval_done(&info); |
b1482674 | 2583 | if (pv) { |
2bb9cc6f | 2584 | free_pv_entry(pv); |
b1482674 MD |
2585 | /* pv = NULL; */ |
2586 | } | |
4107b0c0 | 2587 | lwkt_reltoken(&vm_token); |
b12defdc | 2588 | vm_object_drop(pmap->pm_pteobj); |
984263bc MD |
2589 | } |
2590 | ||
2591 | /* | |
2592 | * Make a temporary mapping for a physical address. This is only intended | |
2593 | * to be used for panic dumps. | |
4107b0c0 | 2594 | * |
fb8345e6 MD |
2595 | * The caller is responsible for calling smp_invltlb(). |
2596 | * | |
4107b0c0 | 2597 | * No requirements. |
984263bc MD |
2598 | */ |
2599 | void * | |
8e5ea5f7 | 2600 | pmap_kenter_temporary(vm_paddr_t pa, long i) |
984263bc | 2601 | { |
fb8345e6 | 2602 | pmap_kenter_quick((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); |
984263bc MD |
2603 | return ((void *)crashdumpmap); |
2604 | } | |
2605 | ||
2606 | #define MAX_INIT_PT (96) | |
06ecca5a | 2607 | |
984263bc | 2608 | /* |
06ecca5a MD |
2609 | * This routine preloads the ptes for a given object into the specified pmap. |
2610 | * This eliminates the blast of soft faults on process startup and | |
2611 | * immediately after an mmap. | |
4107b0c0 MD |
2612 | * |
2613 | * No requirements. | |
984263bc | 2614 | */ |
1f804340 MD |
2615 | static int pmap_object_init_pt_callback(vm_page_t p, void *data); |
2616 | ||
984263bc | 2617 | void |
083a7402 MD |
2618 | pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, |
2619 | vm_object_t object, vm_pindex_t pindex, | |
2620 | vm_size_t size, int limit) | |
984263bc | 2621 | { |
1f804340 | 2622 | struct rb_vm_page_scan_info info; |
287ebb09 | 2623 | struct lwp *lp; |
984263bc | 2624 | int psize; |
984263bc | 2625 | |
54a764e8 MD |
2626 | /* |
2627 | * We can't preinit if read access isn't set or there is no pmap | |
2628 | * or object. | |
2629 | */ | |
083a7402 | 2630 | if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) |
984263bc MD |
2631 | return; |
2632 | ||
54a764e8 MD |
2633 | /* |
2634 | * We can't preinit if the pmap is not the current pmap | |
2635 | */ | |
287ebb09 MD |
2636 | lp = curthread->td_lwp; |
2637 | if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) | |
54a764e8 MD |
2638 | return; |
2639 | ||
984263bc MD |
2640 | psize = i386_btop(size); |
2641 | ||
2642 | if ((object->type != OBJT_VNODE) || | |
2643 | ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && | |
2644 | (object->resident_page_count > MAX_INIT_PT))) { | |
2645 | return; | |
2646 | } | |
2647 | ||
2648 | if (psize + pindex > object->size) { | |
2649 | if (object->size < pindex) | |
2650 | return; | |
2651 | psize = object->size - pindex; | |
2652 | } | |
2653 | ||
1f804340 MD |
2654 | if (psize == 0) |
2655 | return; | |
06ecca5a | 2656 | |
984263bc | 2657 | /* |
1f804340 MD |
2658 | * Use a red-black scan to traverse the requested range and load |
2659 | * any valid pages found into the pmap. | |
06ecca5a | 2660 | * |
9acd5bbb MD |
2661 | * We cannot safely scan the object's memq unless we are in a |
2662 | * critical section since interrupts can remove pages from objects. | |
984263bc | 2663 | */ |
1f804340 MD |
2664 | info.start_pindex = pindex; |
2665 | info.end_pindex = pindex + psize - 1; | |
2666 | info.limit = limit; | |
2667 | info.mpte = NULL; | |
2668 | info.addr = addr; | |
2669 | info.pmap = pmap; | |
2670 | ||
2f2d9e58 | 2671 | vm_object_hold(object); |
1f804340 MD |
2672 | vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, |
2673 | pmap_object_init_pt_callback, &info); | |
2f2d9e58 | 2674 | vm_object_drop(object); |
1f804340 | 2675 | } |
06ecca5a | 2676 | |
4107b0c0 MD |
2677 | /* |
2678 | * The caller must hold vm_token. | |
2679 | */ | |
1f804340 MD |
2680 | static |
2681 | int | |
2682 | pmap_object_init_pt_callback(vm_page_t p, void *data) | |
2683 | { | |
2684 | struct rb_vm_page_scan_info *info = data; | |
2685 | vm_pindex_t rel_index; | |
2686 | /* | |
2687 | * don't allow an madvise to blow away our really | |
2688 | * free pages allocating pv entries. | |
2689 | */ | |
2690 | if ((info->limit & MAP_PREFAULT_MADVISE) && | |
2691 | vmstats.v_free_count < vmstats.v_free_reserved) { | |
2692 | return(-1); | |
984263bc | 2693 | } |
0d987a03 MD |
2694 | |
2695 | /* | |
2696 | * Ignore list markers and ignore pages we cannot instantly | |
2697 | * busy (while holding the object token). | |
2698 | */ | |
2699 | if (p->flags & PG_MARKER) | |
2700 | return 0; | |
b12defdc MD |
2701 | if (vm_page_busy_try(p, TRUE)) |
2702 | return 0; | |
1f804340 | 2703 | if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && |
b12defdc | 2704 | (p->flags & PG_FICTITIOUS) == 0) { |
1f804340 MD |
2705 | if ((p->queue - p->pc) == PQ_CACHE) |
2706 | vm_page_deactivate(p); | |
1f804340 | 2707 | rel_index = p->pindex - info->start_pindex; |
17cde63e MD |
2708 | pmap_enter_quick(info->pmap, |
2709 | info->addr + i386_ptob(rel_index), p); | |
1f804340 | 2710 | } |
b12defdc | 2711 | vm_page_wakeup(p); |
1f804340 | 2712 | return(0); |
984263bc MD |
2713 | } |
2714 | ||
2715 | /* | |
1b9d3514 MD |
2716 | * Return TRUE if the pmap is in shape to trivially |
2717 | * pre-fault the specified address. | |
2718 | * | |
2719 | * Returns FALSE if it would be non-trivial or if a | |
2720 | * pte is already loaded into the slot. | |
4107b0c0 MD |
2721 | * |
2722 | * No requirements. | |
984263bc | 2723 | */ |
1b9d3514 MD |
2724 | int |
2725 | pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) | |
984263bc | 2726 | { |
1b9d3514 | 2727 | unsigned *pte; |
4107b0c0 | 2728 | int ret; |
984263bc | 2729 | |
4107b0c0 MD |
2730 | lwkt_gettoken(&vm_token); |
2731 | if ((*pmap_pde(pmap, addr)) == 0) { | |
2732 | ret = 0; | |
2733 | } else { | |
2734 | pte = (unsigned *) vtopte(addr); | |
2735 | ret = (*pte) ? 0 : 1; | |
2736 | } | |
2737 | lwkt_reltoken(&vm_token); | |
2738 | return(ret); | |
984263bc MD |
2739 | } |
2740 | ||
2741 | /* | |
4107b0c0 MD |
2742 | * Change the wiring attribute for a map/virtual-adderss pair. The mapping |
2743 | * must already exist. | |
2744 | * | |
2745 | * No requirements. | |
984263bc MD |
2746 | */ |
2747 | void | |
921c891e MD |
2748 | pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, |
2749 | vm_map_entry_t entry __unused) | |
984263bc | 2750 | { |
840de426 | 2751 | unsigned *pte; |
984263bc MD |
2752 | |
2753 | if (pmap == NULL) | |
2754 | return; | |
2755 | ||
4107b0c0 | 2756 | lwkt_gettoken(&vm_token); |
984263bc MD |
2757 | pte = pmap_pte(pmap, va); |
2758 | ||
2759 | if (wired && !pmap_pte_w(pte)) | |
2760 | pmap->pm_stats.wired_count++; | |
2761 | else if (!wired && pmap_pte_w(pte)) | |
2762 | pmap->pm_stats.wired_count--; | |
2763 | ||
2764 | /* | |
2765 | * Wiring is not a hardware characteristic so there is no need to | |
0f7a3396 MD |
2766 | * invalidate TLB. However, in an SMP environment we must use |
2767 | * a locked bus cycle to update the pte (if we are not using | |
2768 | * the pmap_inval_*() API that is)... it's ok to do this for simple | |
2769 | * wiring changes. | |
984263bc | 2770 | */ |
0f7a3396 MD |
2771 | #ifdef SMP |
2772 | if (wired) | |
2773 | atomic_set_int(pte, PG_W); | |
2774 | else | |
2775 | atomic_clear_int(pte, PG_W); | |
2776 | #else | |
2777 | if (wired) | |
2778 | atomic_set_int_nonlocked(pte, PG_W); | |
2779 | else | |
2780 | atomic_clear_int_nonlocked(pte, PG_W); | |
2781 | #endif | |
4107b0c0 | 2782 | lwkt_reltoken(&vm_token); |
984263bc MD |
2783 | } |
2784 | ||
984263bc | 2785 | /* |
4107b0c0 MD |
2786 | * Copy the range specified by src_addr/len from the source map to the |
2787 | * range dst_addr/len in the destination map. | |
2788 | * | |
2789 | * This routine is only advisory and need not do anything. | |
984263bc | 2790 | * |
4107b0c0 | 2791 | * No requirements. |
984263bc | 2792 | */ |
984263bc | 2793 | void |
840de426 | 2794 | pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, |
4107b0c0 | 2795 | vm_size_t len, vm_offset_t src_addr) |
984263bc | 2796 | { |
4107b0c0 | 2797 | /* does nothing */ |
984263bc MD |
2798 | } |
2799 | ||
984263bc | 2800 | /* |
4107b0c0 MD |
2801 | * Zero the specified PA by mapping the page into KVM and clearing its |
2802 | * contents. | |
e0e69b7d | 2803 | * |
4107b0c0 | 2804 | * No requirements. |
984263bc MD |
2805 | */ |
2806 | void | |
6ef943a3 | 2807 | pmap_zero_page(vm_paddr_t phys) |
984263bc | 2808 | { |
85100692 | 2809 | struct mdglobaldata *gd = mdcpu; |
17a9f566 | 2810 | |
e0e69b7d | 2811 | crit_enter(); |
85100692 MD |
2812 | if (*(int *)gd->gd_CMAP3) |
2813 | panic("pmap_zero_page: CMAP3 busy"); | |
85100692 | 2814 | *(int *)gd->gd_CMAP3 = |
17a9f566 | 2815 | PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; |
85100692 | 2816 | cpu_invlpg(gd->gd_CADDR3); |
1fa15583 | 2817 | bzero(gd->gd_CADDR3, PAGE_SIZE); |
85100692 | 2818 | *(int *) gd->gd_CMAP3 = 0; |
e0e69b7d | 2819 | crit_exit(); |
8100156a MD |
2820 | } |
2821 | ||
2822 | /* | |
4107b0c0 | 2823 | * Assert that a page is empty, panic if it isn't. |
8100156a | 2824 | * |
4107b0c0 | 2825 | * No requirements. |
8100156a MD |
2826 | */ |
2827 | void | |
2828 | pmap_page_assertzero(vm_paddr_t phys) | |
2829 | { | |
2830 | struct mdglobaldata *gd = mdcpu; | |
2831 | int i; | |
2832 | ||
2833 | crit_enter(); | |
2834 | if (*(int *)gd->gd_CMAP3) | |
2835 | panic("pmap_zero_page: CMAP3 busy"); | |
2836 | *(int *)gd->gd_CMAP3 = | |
2837 | PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; | |
2838 | cpu_invlpg(gd->gd_CADDR3); | |
2839 | for (i = 0; i < PAGE_SIZE; i += 4) { | |
2840 | if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) { | |
ed20d0e3 | 2841 | panic("pmap_page_assertzero() @ %p not zero!", |
8100156a MD |
2842 | (void *)gd->gd_CADDR3); |
2843 | } | |
2844 | } | |
2845 | *(int *) gd->gd_CMAP3 = 0; | |
2846 | crit_exit(); | |
984263bc MD |
2847 | } |
2848 | ||
2849 | /* | |
4107b0c0 MD |
2850 | * Zero part of a physical page by mapping it into memory and clearing |
2851 | * its contents with bzero. | |
e0e69b7d | 2852 | * |
4107b0c0 | 2853 | * off and size may not cover an area beyond a single hardware page. |
984263bc | 2854 | * |
4107b0c0 | 2855 | * No requirements. |
984263bc MD |
2856 | */ |
2857 | void | |
6ef943a3 | 2858 | pmap_zero_page_area(vm_paddr_t phys, int off, int size) |
984263bc | 2859 | { |
85100692 | 2860 | struct mdglobaldata *gd = mdcpu; |
17a9f566 | 2861 | |
e0e69b7d | 2862 | crit_enter(); |
85100692 MD |
2863 | if (*(int *) gd->gd_CMAP3) |
2864 | panic("pmap_zero_page: CMAP3 busy"); | |
85100692 MD |
2865 | *(int *) gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; |
2866 | cpu_invlpg(gd->gd_CADDR3); | |
1fa15583 | 2867 | bzero((char *)gd->gd_CADDR3 + off, size); |
85100692 | 2868 | *(int *) gd->gd_CMAP3 = 0; |
e0e69b7d | 2869 | crit_exit(); |
984263bc MD |
2870 | } |
2871 | ||
2872 | /* | |
4107b0c0 MD |
2873 | * Copy the physical page from the source PA to the target PA. |
2874 | * This function may be called from an interrupt. No locking | |
2875 | * is required. | |
e0e69b7d | 2876 | * |
4107b0c0 | 2877 | * No requirements. |
984263bc MD |
2878 | */ |
2879 | void | |
6ef943a3 | 2880 | pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) |
984263bc | 2881 | { |
85100692 | 2882 | struct mdglobaldata *gd = mdcpu; |
17a9f566 | 2883 | |
e0e69b7d | 2884 | crit_enter(); |
85100692 MD |
2885 | if (*(int *) gd->gd_CMAP1) |
2886 | panic("pmap_copy_page: CMAP1 busy"); | |
2887 | if (*(int *) gd->gd_CMAP2) | |
2888 | panic("pmap_copy_page: CMAP2 busy"); | |
984263bc | 2889 | |
85100692 MD |
2890 | *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; |
2891 | *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; | |
984263bc | 2892 | |
85100692 MD |
2893 | cpu_invlpg(gd->gd_CADDR1); |
2894 | cpu_invlpg(gd->gd_CADDR2); | |
984263bc | 2895 | |
85100692 | 2896 | bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE); |
984263bc | 2897 | |
85100692 MD |
2898 | *(int *) gd->gd_CMAP1 = 0; |
2899 | *(int *) gd->gd_CMAP2 = 0; | |
e0e69b7d | 2900 | crit_exit(); |
984263bc MD |
2901 | } |
2902 | ||
f6bf3af1 | 2903 | /* |
4107b0c0 MD |
2904 | * Copy the physical page from the source PA to the target PA. |
2905 | * This function may be called from an interrupt. No locking | |
2906 | * is required. | |
f6bf3af1 | 2907 | * |
4107b0c0 | 2908 | * No requirements. |
f6bf3af1 MD |
2909 | */ |
2910 | void | |
2911 | pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) | |
2912 | { | |
2913 | struct mdglobaldata *gd = mdcpu; | |
2914 | ||
2915 | crit_enter(); | |
2916 | if (*(int *) gd->gd_CMAP1) | |
2917 | panic("pmap_copy_page: CMAP1 busy"); | |
2918 | if (*(int *) gd->gd_CMAP2) | |
2919 | panic("pmap_copy_page: CMAP2 busy"); | |
2920 | ||
2921 | *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; | |
2922 | *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; | |
2923 | ||
2924 | cpu_invlpg(gd->gd_CADDR1); | |
2925 | cpu_invlpg(gd->gd_CADDR2); | |
2926 | ||
2927 | bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), | |
2928 | (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), | |
2929 | bytes); | |
2930 | ||
2931 | *(int *) gd->gd_CMAP1 = 0; | |
2932 | *(int *) gd->gd_CMAP2 = 0; | |
2933 | crit_exit(); | |
2934 | } | |
2935 | ||
984263bc MD |
2936 | /* |
2937 | * Returns true if the pmap's pv is one of the first | |
2938 | * 16 pvs linked to from this page. This count may | |
2939 | * be changed upwards or downwards in the future; it | |
2940 | * is only necessary that true be returned for a small | |
2941 | * subset of pmaps for proper page aging. | |
4107b0c0 MD |
2942 | * |
2943 | * No requirements. | |
984263bc MD |
2944 | */ |
2945 | boolean_t | |
840de426 | 2946 | pmap_page_exists_quick(pmap_t pmap, vm_page_t m) |
984263bc MD |
2947 | { |
2948 | pv_entry_t pv; | |
2949 | int loops = 0; | |
984263bc MD |
2950 | |
2951 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
2952 | return FALSE; | |
2953 | ||
4107b0c0 | 2954 | lwkt_gettoken(&vm_token); |
984263bc MD |
2955 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { |
2956 | if (pv->pv_pmap == pmap) { | |
11502947 | 2957 | lwkt_reltoken(&vm_token); |
984263bc MD |
2958 | return TRUE; |
2959 | } | |
2960 | loops++; | |
2961 | if (loops >= 16) | |
2962 | break; | |
2963 | } | |
4107b0c0 | 2964 | lwkt_reltoken(&vm_token); |
984263bc MD |
2965 | return (FALSE); |
2966 | } | |
2967 | ||
984263bc MD |
2968 | /* |
2969 | * Remove all pages from specified address space | |
2970 | * this aids process exit speeds. Also, this code | |
2971 | * is special cased for current process only, but | |
2972 | * can have the more generic (and slightly slower) | |
2973 | * mode enabled. This is much faster than pmap_remove | |
2974 | * in the case of running down an entire address space. | |
4107b0c0 MD |
2975 | * |
2976 | * No requirements. | |
984263bc MD |
2977 | */ |
2978 | void | |
840de426 | 2979 | pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) |
984263bc | 2980 | { |
287ebb09 | 2981 | struct lwp *lp; |
984263bc MD |
2982 | unsigned *pte, tpte; |
2983 | pv_entry_t pv, npv; | |
984263bc | 2984 | vm_page_t m; |
0f7a3396 | 2985 | pmap_inval_info info; |
4a22e893 | 2986 | int iscurrentpmap; |
8790d7d8 | 2987 | int32_t save_generation; |
984263bc | 2988 | |
287ebb09 MD |
2989 | lp = curthread->td_lwp; |
2990 | if (lp && pmap == vmspace_pmap(lp->lwp_vmspace)) | |
4a22e893 MD |
2991 | iscurrentpmap = 1; |
2992 | else | |
2993 | iscurrentpmap = 0; | |
984263bc | 2994 | |
b12defdc MD |
2995 | if (pmap->pm_pteobj) |
2996 | vm_object_hold(pmap->pm_pteobj); | |
4107b0c0 | 2997 | lwkt_gettoken(&vm_token); |
0f7a3396 | 2998 | pmap_inval_init(&info); |
b12defdc | 2999 | |
4a22e893 | 3000 | for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { |
984263bc MD |
3001 | if (pv->pv_va >= eva || pv->pv_va < sva) { |
3002 | npv = TAILQ_NEXT(pv, pv_plist); | |
3003 | continue; | |
3004 | } | |
3005 | ||
8790d7d8 MD |
3006 | KKASSERT(pmap == pv->pv_pmap); |
3007 | ||
4a22e893 MD |
3008 | if (iscurrentpmap) |
3009 | pte = (unsigned *)vtopte(pv->pv_va); | |
3010 | else | |
8790d7d8 | 3011 | pte = pmap_pte_quick(pmap, pv->pv_va); |
5926987a | 3012 | KKASSERT(*pte); |
c2fb025d | 3013 | pmap_inval_interlock(&info, pmap, pv->pv_va); |
984263bc | 3014 | |
4a22e893 MD |
3015 | /* |
3016 | * We cannot remove wired pages from a process' mapping | |
3017 | * at this time | |
3018 | */ | |
17cde63e | 3019 | if (*pte & PG_W) { |
c2fb025d | 3020 | pmap_inval_deinterlock(&info, pmap); |
984263bc MD |
3021 | npv = TAILQ_NEXT(pv, pv_plist); |
3022 | continue; | |
3023 | } | |
2247fe02 | 3024 | KKASSERT(*pte); |
17cde63e | 3025 | tpte = loadandclear(pte); |
c2fb025d | 3026 | pmap_inval_deinterlock(&info, pmap); |
984263bc MD |
3027 | |
3028 | m = PHYS_TO_VM_PAGE(tpte); | |
5926987a | 3029 | test_m_maps_pv(m, pv); |
984263bc MD |
3030 | |
3031 | KASSERT(m < &vm_page_array[vm_page_array_size], | |
3032 | ("pmap_remove_pages: bad tpte %x", tpte)); | |
3033 | ||
eec2b734 MD |
3034 | KKASSERT(pmap->pm_stats.resident_count > 0); |
3035 | --pmap->pm_stats.resident_count; | |
984263bc MD |
3036 | |
3037 | /* | |
3038 | * Update the vm_page_t clean and reference bits. | |
3039 | */ | |
3040 | if (tpte & PG_M) { | |
3041 | vm_page_dirty(m); | |
3042 | } | |
3043 | ||
984263bc | 3044 | npv = TAILQ_NEXT(pv, pv_plist); |
5926987a MD |
3045 | #ifdef PMAP_DEBUG |
3046 | KKASSERT(pv->pv_m == m); | |
3047 | KKASSERT(pv->pv_pmap == pmap); | |
3048 | #endif | |
8790d7d8 MD |
3049 | TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); |
3050 | save_generation = ++pmap->pm_generation; | |
984263bc MD |
3051 | |
3052 | m->md.pv_list_count--; | |
cef01e15 MD |
3053 | if (m->object) |
3054 | atomic_add_int(&m->object->agg_pv_list_count, -1); | |
984263bc | 3055 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); |
17cde63e | 3056 | if (TAILQ_EMPTY(&m->md.pv_list)) |
984263bc | 3057 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); |
984263bc | 3058 | |
8790d7d8 | 3059 | pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem, &info); |
984263bc | 3060 | free_pv_entry(pv); |
8790d7d8 MD |
3061 | |
3062 | /* | |
3063 | * Restart the scan if we blocked during the unuse or free | |
3064 | * calls and other removals were made. | |
3065 | */ | |
3066 | if (save_generation != pmap->pm_generation) { | |
3067 | kprintf("Warning: pmap_remove_pages race-A avoided\n"); | |
5926987a | 3068 | npv = TAILQ_FIRST(&pmap->pm_pvlist); |
8790d7d8 | 3069 | } |
984263bc | 3070 | } |
c2fb025d | 3071 | pmap_inval_done(&info); |
4107b0c0 | 3072 | lwkt_reltoken(&vm_token); |
b12defdc MD |
3073 | if (pmap->pm_pteobj) |
3074 | vm_object_drop(pmap->pm_pteobj); | |
984263bc MD |
3075 | } |
3076 | ||
3077 | /* | |
3078 | * pmap_testbit tests bits in pte's | |
5e8d0349 | 3079 | * note that the testbit/clearbit routines are inline, |
984263bc | 3080 | * and a lot of things compile-time evaluate. |
4107b0c0 MD |
3081 | * |
3082 | * The caller must hold vm_token. | |
984263bc MD |
3083 | */ |
3084 | static boolean_t | |
840de426 | 3085 | pmap_testbit(vm_page_t m, int bit) |
984263bc MD |
3086 | { |
3087 | pv_entry_t pv; | |
3088 | unsigned *pte; | |
984263bc MD |
3089 | |
3090 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
3091 | return FALSE; | |
3092 | ||
3093 | if (TAILQ_FIRST(&m->md.pv_list) == NULL) | |
3094 | return FALSE; | |
3095 | ||
984263bc MD |
3096 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { |
3097 | /* | |
3098 | * if the bit being tested is the modified bit, then | |
3099 | * mark clean_map and ptes as never | |
3100 | * modified. | |
3101 | */ | |
3102 | if (bit & (PG_A|PG_M)) { | |
3103 | if (!pmap_track_modified(pv->pv_va)) | |
3104 | continue; | |
3105 | } | |
3106 | ||
3107 | #if defined(PMAP_DIAGNOSTIC) | |
3108 | if (!pv->pv_pmap) { | |
d557216f MD |
3109 | kprintf("Null pmap (tb) at va: %p\n", |
3110 | (void *)pv->pv_va); | |
984263bc MD |
3111 | continue; |
3112 | } | |
3113 | #endif | |
3114 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); | |
74b9d1ec | 3115 | if (*pte & bit) { |
984263bc | 3116 | return TRUE; |
74b9d1ec | 3117 | } |
984263bc | 3118 | } |
984263bc MD |
3119 | return (FALSE); |
3120 | } | |
3121 | ||
3122 | /* | |
4107b0c0 MD |
3123 | * This routine is used to modify bits in ptes |
3124 | * | |
3125 | * The caller must hold vm_token. | |
984263bc MD |
3126 | */ |
3127 | static __inline void | |
5e8d0349 | 3128 | pmap_clearbit(vm_page_t m, int bit) |
984263bc | 3129 | { |
0f7a3396 | 3130 | struct pmap_inval_info info; |
840de426 MD |
3131 | pv_entry_t pv; |
3132 | unsigned *pte; | |
5e8d0349 | 3133 | unsigned pbits; |
984263bc MD |
3134 | |
3135 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
3136 | return; | |
3137 | ||
0f7a3396 | 3138 | pmap_inval_init(&info); |
984263bc MD |
3139 | |
3140 | /* | |
3141 | * Loop over all current mappings setting/clearing as appropos If | |
3142 | * setting RO do we need to clear the VAC? | |
3143 | */ | |
3144 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
3145 | /* | |
3146 | * don't write protect pager mappings | |
3147 | */ | |
5e8d0349 | 3148 | if (bit == PG_RW) { |
984263bc MD |
3149 | if (!pmap_track_modified(pv->pv_va)) |
3150 | continue; | |
3151 | } | |
3152 | ||
3153 | #if defined(PMAP_DIAGNOSTIC) | |
3154 | if (!pv->pv_pmap) { | |
d557216f MD |
3155 | kprintf("Null pmap (cb) at va: %p\n", |
3156 | (void *)pv->pv_va); | |
984263bc MD |
3157 | continue; |
3158 | } | |
3159 | #endif | |
3160 | ||
0f7a3396 MD |
3161 | /* |
3162 | * Careful here. We can use a locked bus instruction to | |
3163 | * clear PG_A or PG_M safely but we need to synchronize | |
3164 | * with the target cpus when we mess with PG_RW. | |
70fc5283 MD |
3165 | * |
3166 | * We do not have to force synchronization when clearing | |
3167 | * PG_M even for PTEs generated via virtual memory maps, | |
3168 | * because the virtual kernel will invalidate the pmap | |
3169 | * entry when/if it needs to resynchronize the Modify bit. | |
0f7a3396 | 3170 | */ |
70fc5283 | 3171 | if (bit & PG_RW) |
c2fb025d | 3172 | pmap_inval_interlock(&info, pv->pv_pmap, pv->pv_va); |
17cde63e MD |
3173 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); |
3174 | again: | |
5e8d0349 MD |
3175 | pbits = *pte; |
3176 | if (pbits & bit) { | |
3177 | if (bit == PG_RW) { | |
17cde63e | 3178 | if (pbits & PG_M) { |
5e8d0349 | 3179 | vm_page_dirty(m); |
17cde63e MD |
3180 | atomic_clear_int(pte, PG_M|PG_RW); |
3181 | } else { | |
3182 | /* |