Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1991 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * Copyright (c) 1994 John S. Dyson | |
5 | * All rights reserved. | |
6 | * Copyright (c) 1994 David Greenman | |
7 | * All rights reserved. | |
8 | * | |
9 | * This code is derived from software contributed to Berkeley by | |
10 | * the Systems Programming Group of the University of Utah Computer | |
11 | * Science Department and William Jolitz of UUNET Technologies Inc. | |
12 | * | |
13 | * Redistribution and use in source and binary forms, with or without | |
14 | * modification, are permitted provided that the following conditions | |
15 | * are met: | |
16 | * 1. Redistributions of source code must retain the above copyright | |
17 | * notice, this list of conditions and the following disclaimer. | |
18 | * 2. Redistributions in binary form must reproduce the above copyright | |
19 | * notice, this list of conditions and the following disclaimer in the | |
20 | * documentation and/or other materials provided with the distribution. | |
21 | * 3. All advertising materials mentioning features or use of this software | |
22 | * must display the following acknowledgement: | |
23 | * This product includes software developed by the University of | |
24 | * California, Berkeley and its contributors. | |
25 | * 4. Neither the name of the University nor the names of its contributors | |
26 | * may be used to endorse or promote products derived from this software | |
27 | * without specific prior written permission. | |
28 | * | |
29 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
30 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
31 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
32 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
33 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
34 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
35 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
36 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
37 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
38 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
39 | * SUCH DAMAGE. | |
40 | * | |
41 | * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 | |
42 | * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ | |
0e5797fe | 43 | * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.55 2006/05/25 04:17:07 dillon Exp $ |
984263bc MD |
44 | */ |
45 | ||
46 | /* | |
47 | * Manages physical address maps. | |
48 | * | |
49 | * In addition to hardware address maps, this | |
50 | * module is called upon to provide software-use-only | |
51 | * maps which may or may not be stored in the same | |
52 | * form as hardware maps. These pseudo-maps are | |
53 | * used to store intermediate results from copy | |
54 | * operations to and from address spaces. | |
55 | * | |
56 | * Since the information managed by this module is | |
57 | * also stored by the logical address mapping module, | |
58 | * this module may throw away valid virtual-to-physical | |
59 | * mappings at almost any time. However, invalidations | |
60 | * of virtual-to-physical mappings must be done as | |
61 | * requested. | |
62 | * | |
63 | * In order to cope with hardware architectures which | |
64 | * make virtual-to-physical map invalidates expensive, | |
65 | * this module may delay invalidate or reduced protection | |
66 | * operations until such time as they are actually | |
67 | * necessary. This module is given full information as | |
68 | * to which processors are currently using which maps, | |
69 | * and to when physical maps must be made correct. | |
70 | */ | |
71 | ||
72 | #include "opt_disable_pse.h" | |
73 | #include "opt_pmap.h" | |
74 | #include "opt_msgbuf.h" | |
984263bc MD |
75 | |
76 | #include <sys/param.h> | |
77 | #include <sys/systm.h> | |
78 | #include <sys/kernel.h> | |
79 | #include <sys/proc.h> | |
80 | #include <sys/msgbuf.h> | |
81 | #include <sys/vmmeter.h> | |
82 | #include <sys/mman.h> | |
83 | ||
84 | #include <vm/vm.h> | |
85 | #include <vm/vm_param.h> | |
86 | #include <sys/sysctl.h> | |
87 | #include <sys/lock.h> | |
88 | #include <vm/vm_kern.h> | |
89 | #include <vm/vm_page.h> | |
90 | #include <vm/vm_map.h> | |
91 | #include <vm/vm_object.h> | |
92 | #include <vm/vm_extern.h> | |
93 | #include <vm/vm_pageout.h> | |
94 | #include <vm/vm_pager.h> | |
95 | #include <vm/vm_zone.h> | |
96 | ||
97 | #include <sys/user.h> | |
e0e69b7d | 98 | #include <sys/thread2.h> |
984263bc MD |
99 | |
100 | #include <machine/cputypes.h> | |
101 | #include <machine/md_var.h> | |
102 | #include <machine/specialreg.h> | |
984263bc | 103 | #include <machine/smp.h> |
27c6605f | 104 | #include <arch/apic/apicreg.h> |
85100692 | 105 | #include <machine/globaldata.h> |
0f7a3396 MD |
106 | #include <machine/pmap.h> |
107 | #include <machine/pmap_inval.h> | |
984263bc MD |
108 | |
109 | #define PMAP_KEEP_PDIRS | |
110 | #ifndef PMAP_SHPGPERPROC | |
111 | #define PMAP_SHPGPERPROC 200 | |
112 | #endif | |
113 | ||
114 | #if defined(DIAGNOSTIC) | |
115 | #define PMAP_DIAGNOSTIC | |
116 | #endif | |
117 | ||
118 | #define MINPV 2048 | |
119 | ||
120 | #if !defined(PMAP_DIAGNOSTIC) | |
121 | #define PMAP_INLINE __inline | |
122 | #else | |
123 | #define PMAP_INLINE | |
124 | #endif | |
125 | ||
126 | /* | |
127 | * Get PDEs and PTEs for user/kernel address space | |
128 | */ | |
129 | #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) | |
130 | #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) | |
131 | ||
132 | #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) | |
133 | #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) | |
134 | #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) | |
135 | #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) | |
136 | #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) | |
137 | ||
984263bc MD |
138 | |
139 | /* | |
140 | * Given a map and a machine independent protection code, | |
141 | * convert to a vax protection code. | |
142 | */ | |
639a9b43 MD |
143 | #define pte_prot(m, p) \ |
144 | (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) | |
984263bc MD |
145 | static int protection_codes[8]; |
146 | ||
147 | static struct pmap kernel_pmap_store; | |
148 | pmap_t kernel_pmap; | |
149 | ||
6ef943a3 MD |
150 | vm_paddr_t avail_start; /* PA of first available physical page */ |
151 | vm_paddr_t avail_end; /* PA of last available physical page */ | |
984263bc MD |
152 | vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ |
153 | vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ | |
154 | static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ | |
155 | static int pgeflag; /* PG_G or-in */ | |
156 | static int pseflag; /* PG_PS or-in */ | |
157 | ||
158 | static vm_object_t kptobj; | |
159 | ||
160 | static int nkpt; | |
161 | vm_offset_t kernel_vm_end; | |
162 | ||
163 | /* | |
164 | * Data for the pv entry allocation mechanism | |
165 | */ | |
166 | static vm_zone_t pvzone; | |
167 | static struct vm_zone pvzone_store; | |
168 | static struct vm_object pvzone_obj; | |
169 | static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; | |
170 | static int pmap_pagedaemon_waken = 0; | |
171 | static struct pv_entry *pvinit; | |
172 | ||
173 | /* | |
174 | * All those kernel PT submaps that BSD is so fond of | |
175 | */ | |
e731d345 | 176 | pt_entry_t *CMAP1 = 0, *ptmmap; |
984263bc | 177 | caddr_t CADDR1 = 0, ptvmmap = 0; |
984263bc MD |
178 | static pt_entry_t *msgbufmap; |
179 | struct msgbuf *msgbufp=0; | |
180 | ||
181 | /* | |
182 | * Crashdump maps. | |
183 | */ | |
184 | static pt_entry_t *pt_crashdumpmap; | |
185 | static caddr_t crashdumpmap; | |
186 | ||
984263bc | 187 | extern pt_entry_t *SMPpt; |
984263bc | 188 | |
3ae0cd58 RG |
189 | static PMAP_INLINE void free_pv_entry (pv_entry_t pv); |
190 | static unsigned * get_ptbase (pmap_t pmap); | |
191 | static pv_entry_t get_pv_entry (void); | |
192 | static void i386_protection_init (void); | |
193 | static __inline void pmap_changebit (vm_page_t m, int bit, boolean_t setem); | |
194 | ||
195 | static void pmap_remove_all (vm_page_t m); | |
196 | static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va, | |
197 | vm_page_t m, vm_page_t mpte); | |
0f7a3396 MD |
198 | static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, |
199 | vm_offset_t sva, pmap_inval_info_t info); | |
200 | static void pmap_remove_page (struct pmap *pmap, | |
201 | vm_offset_t va, pmap_inval_info_t info); | |
3ae0cd58 | 202 | static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, |
0f7a3396 | 203 | vm_offset_t va, pmap_inval_info_t info); |
3ae0cd58 RG |
204 | static boolean_t pmap_testbit (vm_page_t m, int bit); |
205 | static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, | |
206 | vm_page_t mpte, vm_page_t m); | |
207 | ||
208 | static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); | |
209 | ||
210 | static int pmap_release_free_page (pmap_t pmap, vm_page_t p); | |
211 | static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex); | |
212 | static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); | |
213 | static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); | |
0f7a3396 | 214 | static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); |
984263bc MD |
215 | static vm_offset_t pmap_kmem_choose(vm_offset_t addr); |
216 | ||
217 | static unsigned pdir4mb; | |
218 | ||
840de426 MD |
219 | /* |
220 | * Move the kernel virtual free pointer to the next | |
221 | * 4MB. This is used to help improve performance | |
222 | * by using a large (4MB) page for much of the kernel | |
223 | * (.text, .data, .bss) | |
224 | */ | |
225 | static vm_offset_t | |
226 | pmap_kmem_choose(vm_offset_t addr) | |
227 | { | |
228 | vm_offset_t newaddr = addr; | |
229 | #ifndef DISABLE_PSE | |
230 | if (cpu_feature & CPUID_PSE) { | |
231 | newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); | |
232 | } | |
233 | #endif | |
234 | return newaddr; | |
235 | } | |
236 | ||
984263bc | 237 | /* |
e0e69b7d MD |
238 | * pmap_pte: |
239 | * | |
240 | * Extract the page table entry associated with the given map/virtual | |
241 | * pair. | |
242 | * | |
243 | * This function may NOT be called from an interrupt. | |
984263bc | 244 | */ |
984263bc | 245 | PMAP_INLINE unsigned * |
840de426 | 246 | pmap_pte(pmap_t pmap, vm_offset_t va) |
984263bc MD |
247 | { |
248 | unsigned *pdeaddr; | |
249 | ||
250 | if (pmap) { | |
251 | pdeaddr = (unsigned *) pmap_pde(pmap, va); | |
252 | if (*pdeaddr & PG_PS) | |
253 | return pdeaddr; | |
254 | if (*pdeaddr) { | |
255 | return get_ptbase(pmap) + i386_btop(va); | |
256 | } | |
257 | } | |
258 | return (0); | |
259 | } | |
260 | ||
261 | /* | |
e0e69b7d MD |
262 | * pmap_pte_quick: |
263 | * | |
264 | * Super fast pmap_pte routine best used when scanning the pv lists. | |
265 | * This eliminates many course-grained invltlb calls. Note that many of | |
266 | * the pv list scans are across different pmaps and it is very wasteful | |
267 | * to do an entire invltlb when checking a single mapping. | |
268 | * | |
9acd5bbb | 269 | * Should only be called while in a critical section. |
984263bc | 270 | */ |
840de426 MD |
271 | static unsigned * |
272 | pmap_pte_quick(pmap_t pmap, vm_offset_t va) | |
984263bc | 273 | { |
840de426 MD |
274 | struct mdglobaldata *gd = mdcpu; |
275 | unsigned pde, newpf; | |
276 | ||
277 | if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) { | |
278 | unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; | |
279 | unsigned index = i386_btop(va); | |
280 | /* are we current address space or kernel? */ | |
281 | if ((pmap == kernel_pmap) || | |
282 | (frame == (((unsigned) PTDpde) & PG_FRAME))) { | |
283 | return (unsigned *) PTmap + index; | |
284 | } | |
285 | newpf = pde & PG_FRAME; | |
286 | if ( ((* (unsigned *) gd->gd_PMAP1) & PG_FRAME) != newpf) { | |
287 | * (unsigned *) gd->gd_PMAP1 = newpf | PG_RW | PG_V; | |
288 | cpu_invlpg(gd->gd_PADDR1); | |
289 | } | |
290 | return gd->gd_PADDR1 + ((unsigned) index & (NPTEPG - 1)); | |
984263bc | 291 | } |
840de426 | 292 | return (0); |
984263bc MD |
293 | } |
294 | ||
840de426 | 295 | |
984263bc MD |
296 | /* |
297 | * Bootstrap the system enough to run with virtual memory. | |
298 | * | |
299 | * On the i386 this is called after mapping has already been enabled | |
300 | * and just syncs the pmap module with what has already been done. | |
301 | * [We can't call it easily with mapping off since the kernel is not | |
302 | * mapped with PA == VA, hence we would have to relocate every address | |
303 | * from the linked base (virtual) address "KERNBASE" to the actual | |
304 | * (physical) address starting relative to 0] | |
305 | */ | |
306 | void | |
307 | pmap_bootstrap(firstaddr, loadaddr) | |
6ef943a3 MD |
308 | vm_paddr_t firstaddr; |
309 | vm_paddr_t loadaddr; | |
984263bc MD |
310 | { |
311 | vm_offset_t va; | |
312 | pt_entry_t *pte; | |
85100692 | 313 | struct mdglobaldata *gd; |
984263bc | 314 | int i; |
81c04d07 | 315 | int pg; |
984263bc MD |
316 | |
317 | avail_start = firstaddr; | |
318 | ||
319 | /* | |
320 | * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too | |
321 | * large. It should instead be correctly calculated in locore.s and | |
322 | * not based on 'first' (which is a physical address, not a virtual | |
323 | * address, for the start of unused physical memory). The kernel | |
324 | * page tables are NOT double mapped and thus should not be included | |
325 | * in this calculation. | |
326 | */ | |
327 | virtual_avail = (vm_offset_t) KERNBASE + firstaddr; | |
328 | virtual_avail = pmap_kmem_choose(virtual_avail); | |
329 | ||
330 | virtual_end = VM_MAX_KERNEL_ADDRESS; | |
331 | ||
332 | /* | |
333 | * Initialize protection array. | |
334 | */ | |
335 | i386_protection_init(); | |
336 | ||
337 | /* | |
338 | * The kernel's pmap is statically allocated so we don't have to use | |
339 | * pmap_create, which is unlikely to work correctly at this part of | |
340 | * the boot sequence (XXX and which no longer exists). | |
341 | */ | |
342 | kernel_pmap = &kernel_pmap_store; | |
343 | ||
b5b32410 | 344 | kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (u_int)IdlePTD); |
984263bc | 345 | kernel_pmap->pm_count = 1; |
3614fd91 | 346 | kernel_pmap->pm_active = (cpumask_t)-1; /* don't allow deactivation */ |
984263bc MD |
347 | TAILQ_INIT(&kernel_pmap->pm_pvlist); |
348 | nkpt = NKPT; | |
349 | ||
350 | /* | |
351 | * Reserve some special page table entries/VA space for temporary | |
352 | * mapping of pages. | |
353 | */ | |
354 | #define SYSMAP(c, p, v, n) \ | |
355 | v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); | |
356 | ||
357 | va = virtual_avail; | |
358 | pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); | |
359 | ||
360 | /* | |
361 | * CMAP1/CMAP2 are used for zeroing and copying pages. | |
362 | */ | |
363 | SYSMAP(caddr_t, CMAP1, CADDR1, 1) | |
984263bc MD |
364 | |
365 | /* | |
366 | * Crashdump maps. | |
367 | */ | |
368 | SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); | |
369 | ||
e731d345 MD |
370 | /* |
371 | * ptvmmap is used for reading arbitrary physical pages via | |
372 | * /dev/mem. | |
373 | */ | |
374 | SYSMAP(caddr_t, ptmmap, ptvmmap, 1) | |
375 | ||
984263bc MD |
376 | /* |
377 | * msgbufp is used to map the system message buffer. | |
378 | * XXX msgbufmap is not used. | |
379 | */ | |
380 | SYSMAP(struct msgbuf *, msgbufmap, msgbufp, | |
381 | atop(round_page(MSGBUF_SIZE))) | |
382 | ||
984263bc MD |
383 | virtual_avail = va; |
384 | ||
17a9f566 | 385 | *(int *) CMAP1 = 0; |
984263bc MD |
386 | for (i = 0; i < NKPT; i++) |
387 | PTD[i] = 0; | |
388 | ||
a2a5ad0d MD |
389 | /* |
390 | * PG_G is terribly broken on SMP because we IPI invltlb's in some | |
391 | * cases rather then invl1pg. Actually, I don't even know why it | |
392 | * works under UP because self-referential page table mappings | |
393 | */ | |
394 | #ifdef SMP | |
395 | pgeflag = 0; | |
396 | #else | |
397 | if (cpu_feature & CPUID_PGE) | |
984263bc | 398 | pgeflag = PG_G; |
a2a5ad0d | 399 | #endif |
984263bc MD |
400 | |
401 | /* | |
402 | * Initialize the 4MB page size flag | |
403 | */ | |
404 | pseflag = 0; | |
405 | /* | |
406 | * The 4MB page version of the initial | |
407 | * kernel page mapping. | |
408 | */ | |
409 | pdir4mb = 0; | |
410 | ||
411 | #if !defined(DISABLE_PSE) | |
412 | if (cpu_feature & CPUID_PSE) { | |
413 | unsigned ptditmp; | |
414 | /* | |
415 | * Note that we have enabled PSE mode | |
416 | */ | |
417 | pseflag = PG_PS; | |
418 | ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE)); | |
419 | ptditmp &= ~(NBPDR - 1); | |
420 | ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; | |
421 | pdir4mb = ptditmp; | |
422 | ||
8a8d5d85 MD |
423 | #ifndef SMP |
424 | /* | |
425 | * Enable the PSE mode. If we are SMP we can't do this | |
426 | * now because the APs will not be able to use it when | |
427 | * they boot up. | |
428 | */ | |
429 | load_cr4(rcr4() | CR4_PSE); | |
984263bc | 430 | |
8a8d5d85 MD |
431 | /* |
432 | * We can do the mapping here for the single processor | |
433 | * case. We simply ignore the old page table page from | |
434 | * now on. | |
435 | */ | |
436 | /* | |
437 | * For SMP, we still need 4K pages to bootstrap APs, | |
438 | * PSE will be enabled as soon as all APs are up. | |
439 | */ | |
b5b32410 MD |
440 | PTD[KPTDI] = (pd_entry_t)ptditmp; |
441 | kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t)ptditmp; | |
0f7a3396 | 442 | cpu_invltlb(); |
8a8d5d85 | 443 | #endif |
984263bc MD |
444 | } |
445 | #endif | |
97359a5b | 446 | #ifdef SMP |
984263bc MD |
447 | if (cpu_apic_address == 0) |
448 | panic("pmap_bootstrap: no local apic!"); | |
449 | ||
450 | /* local apic is mapped on last page */ | |
451 | SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | | |
452 | (cpu_apic_address & PG_FRAME)); | |
17a9f566 | 453 | #endif |
984263bc | 454 | |
81c04d07 MD |
455 | /* |
456 | * We need to finish setting up the globaldata page for the BSP. | |
457 | * locore has already populated the page table for the mdglobaldata | |
458 | * portion. | |
459 | */ | |
460 | pg = MDGLOBALDATA_BASEALLOC_PAGES; | |
85100692 | 461 | gd = &CPU_prvspace[0].mdglobaldata; |
81c04d07 MD |
462 | gd->gd_CMAP1 = &SMPpt[pg + 0]; |
463 | gd->gd_CMAP2 = &SMPpt[pg + 1]; | |
464 | gd->gd_CMAP3 = &SMPpt[pg + 2]; | |
465 | gd->gd_PMAP1 = &SMPpt[pg + 3]; | |
85100692 MD |
466 | gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1; |
467 | gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2; | |
468 | gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; | |
469 | gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1; | |
984263bc | 470 | |
0f7a3396 | 471 | cpu_invltlb(); |
984263bc MD |
472 | } |
473 | ||
474 | #ifdef SMP | |
475 | /* | |
476 | * Set 4mb pdir for mp startup | |
477 | */ | |
478 | void | |
479 | pmap_set_opt(void) | |
480 | { | |
481 | if (pseflag && (cpu_feature & CPUID_PSE)) { | |
482 | load_cr4(rcr4() | CR4_PSE); | |
72740893 | 483 | if (pdir4mb && mycpu->gd_cpuid == 0) { /* only on BSP */ |
984263bc MD |
484 | kernel_pmap->pm_pdir[KPTDI] = |
485 | PTD[KPTDI] = (pd_entry_t)pdir4mb; | |
486 | cpu_invltlb(); | |
487 | } | |
488 | } | |
489 | } | |
490 | #endif | |
491 | ||
492 | /* | |
493 | * Initialize the pmap module. | |
494 | * Called by vm_init, to initialize any structures that the pmap | |
495 | * system needs to map virtual memory. | |
496 | * pmap_init has been enhanced to support in a fairly consistant | |
497 | * way, discontiguous physical memory. | |
498 | */ | |
499 | void | |
e7252eda | 500 | pmap_init(void) |
984263bc MD |
501 | { |
502 | int i; | |
503 | int initial_pvs; | |
504 | ||
505 | /* | |
506 | * object for kernel page table pages | |
507 | */ | |
508 | kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); | |
509 | ||
510 | /* | |
511 | * Allocate memory for random pmap data structures. Includes the | |
512 | * pv_head_table. | |
513 | */ | |
514 | ||
515 | for(i = 0; i < vm_page_array_size; i++) { | |
516 | vm_page_t m; | |
517 | ||
518 | m = &vm_page_array[i]; | |
519 | TAILQ_INIT(&m->md.pv_list); | |
520 | m->md.pv_list_count = 0; | |
521 | } | |
522 | ||
523 | /* | |
524 | * init the pv free list | |
525 | */ | |
526 | initial_pvs = vm_page_array_size; | |
527 | if (initial_pvs < MINPV) | |
528 | initial_pvs = MINPV; | |
529 | pvzone = &pvzone_store; | |
530 | pvinit = (struct pv_entry *) kmem_alloc(kernel_map, | |
531 | initial_pvs * sizeof (struct pv_entry)); | |
532 | zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, | |
533 | vm_page_array_size); | |
534 | ||
535 | /* | |
536 | * Now it is safe to enable pv_table recording. | |
537 | */ | |
538 | pmap_initialized = TRUE; | |
539 | } | |
540 | ||
541 | /* | |
542 | * Initialize the address space (zone) for the pv_entries. Set a | |
543 | * high water mark so that the system can recover from excessive | |
544 | * numbers of pv entries. | |
545 | */ | |
546 | void | |
547 | pmap_init2() | |
548 | { | |
549 | int shpgperproc = PMAP_SHPGPERPROC; | |
550 | ||
551 | TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); | |
552 | pv_entry_max = shpgperproc * maxproc + vm_page_array_size; | |
553 | TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); | |
554 | pv_entry_high_water = 9 * (pv_entry_max / 10); | |
555 | zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); | |
556 | } | |
557 | ||
558 | ||
559 | /*************************************************** | |
560 | * Low level helper routines..... | |
561 | ***************************************************/ | |
562 | ||
563 | #if defined(PMAP_DIAGNOSTIC) | |
564 | ||
565 | /* | |
566 | * This code checks for non-writeable/modified pages. | |
567 | * This should be an invalid condition. | |
568 | */ | |
569 | static int | |
570 | pmap_nw_modified(pt_entry_t ptea) | |
571 | { | |
572 | int pte; | |
573 | ||
574 | pte = (int) ptea; | |
575 | ||
576 | if ((pte & (PG_M|PG_RW)) == PG_M) | |
577 | return 1; | |
578 | else | |
579 | return 0; | |
580 | } | |
581 | #endif | |
582 | ||
583 | ||
584 | /* | |
585 | * this routine defines the region(s) of memory that should | |
586 | * not be tested for the modified bit. | |
587 | */ | |
588 | static PMAP_INLINE int | |
589 | pmap_track_modified(vm_offset_t va) | |
590 | { | |
591 | if ((va < clean_sva) || (va >= clean_eva)) | |
592 | return 1; | |
593 | else | |
594 | return 0; | |
595 | } | |
596 | ||
984263bc | 597 | static unsigned * |
e0e69b7d | 598 | get_ptbase(pmap_t pmap) |
984263bc MD |
599 | { |
600 | unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; | |
e0e69b7d | 601 | struct globaldata *gd = mycpu; |
984263bc MD |
602 | |
603 | /* are we current address space or kernel? */ | |
604 | if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { | |
605 | return (unsigned *) PTmap; | |
606 | } | |
e0e69b7d | 607 | |
984263bc | 608 | /* otherwise, we are alternate address space */ |
e0e69b7d MD |
609 | KKASSERT(gd->gd_intr_nesting_level == 0 && (gd->gd_curthread->td_flags & TDF_INTTHREAD) == 0); |
610 | ||
984263bc | 611 | if (frame != (((unsigned) APTDpde) & PG_FRAME)) { |
b5b32410 | 612 | APTDpde = (pd_entry_t)(frame | PG_RW | PG_V); |
984263bc MD |
613 | /* The page directory is not shared between CPUs */ |
614 | cpu_invltlb(); | |
984263bc MD |
615 | } |
616 | return (unsigned *) APTmap; | |
617 | } | |
618 | ||
984263bc | 619 | /* |
e0e69b7d MD |
620 | * pmap_extract: |
621 | * | |
622 | * Extract the physical page address associated with the map/VA pair. | |
623 | * | |
624 | * This function may not be called from an interrupt if the pmap is | |
625 | * not kernel_pmap. | |
984263bc | 626 | */ |
6ef943a3 | 627 | vm_paddr_t |
840de426 | 628 | pmap_extract(pmap_t pmap, vm_offset_t va) |
984263bc MD |
629 | { |
630 | vm_offset_t rtval; | |
631 | vm_offset_t pdirindex; | |
840de426 | 632 | |
984263bc MD |
633 | pdirindex = va >> PDRSHIFT; |
634 | if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { | |
635 | unsigned *pte; | |
636 | if ((rtval & PG_PS) != 0) { | |
637 | rtval &= ~(NBPDR - 1); | |
638 | rtval |= va & (NBPDR - 1); | |
639 | return rtval; | |
640 | } | |
641 | pte = get_ptbase(pmap) + i386_btop(va); | |
642 | rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); | |
643 | return rtval; | |
644 | } | |
645 | return 0; | |
f6bf3af1 MD |
646 | } |
647 | ||
648 | /* | |
649 | * Extract user accessible page only, return NULL if the page is not | |
650 | * present or if it's current state is not sufficient. Caller will | |
651 | * generally call vm_fault() on failure and try again. | |
652 | */ | |
653 | vm_page_t | |
654 | pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot) | |
655 | { | |
656 | vm_offset_t rtval; | |
657 | vm_offset_t pdirindex; | |
658 | ||
659 | pdirindex = va >> PDRSHIFT; | |
660 | if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { | |
661 | unsigned *pte; | |
662 | vm_page_t m; | |
984263bc | 663 | |
f6bf3af1 MD |
664 | if ((rtval & PG_PS) != 0) { |
665 | if ((rtval & (PG_V|PG_U)) != (PG_V|PG_U)) | |
666 | return (NULL); | |
667 | if ((prot & VM_PROT_WRITE) && (rtval & PG_RW) == 0) | |
668 | return (NULL); | |
669 | rtval &= ~(NBPDR - 1); | |
670 | rtval |= va & (NBPDR - 1); | |
671 | m = PHYS_TO_VM_PAGE(rtval); | |
672 | } else { | |
673 | pte = get_ptbase(pmap) + i386_btop(va); | |
674 | if ((*pte & (PG_V|PG_U)) != (PG_V|PG_U)) | |
675 | return (NULL); | |
676 | if ((prot & VM_PROT_WRITE) && (*pte & PG_RW) == 0) | |
677 | return (NULL); | |
678 | rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); | |
679 | m = PHYS_TO_VM_PAGE(rtval); | |
680 | } | |
681 | return(m); | |
682 | } | |
683 | return (NULL); | |
984263bc MD |
684 | } |
685 | ||
686 | /*************************************************** | |
687 | * Low level mapping routines..... | |
688 | ***************************************************/ | |
689 | ||
690 | /* | |
6d1ec6fa HP |
691 | * Routine: pmap_kenter |
692 | * Function: | |
693 | * Add a wired page to the KVA | |
694 | * NOTE! note that in order for the mapping to take effect -- you | |
695 | * should do an invltlb after doing the pmap_kenter(). | |
984263bc | 696 | */ |
24712b90 | 697 | void |
6ef943a3 | 698 | pmap_kenter(vm_offset_t va, vm_paddr_t pa) |
984263bc | 699 | { |
840de426 | 700 | unsigned *pte; |
0f7a3396 MD |
701 | unsigned npte; |
702 | pmap_inval_info info; | |
984263bc | 703 | |
0f7a3396 MD |
704 | pmap_inval_init(&info); |
705 | pmap_inval_add(&info, kernel_pmap, va); | |
984263bc MD |
706 | npte = pa | PG_RW | PG_V | pgeflag; |
707 | pte = (unsigned *)vtopte(va); | |
984263bc | 708 | *pte = npte; |
0f7a3396 | 709 | pmap_inval_flush(&info); |
984263bc MD |
710 | } |
711 | ||
6d1ec6fa HP |
712 | /* |
713 | * Routine: pmap_kenter_quick | |
714 | * Function: | |
715 | * Similar to pmap_kenter(), except we only invalidate the | |
716 | * mapping on the current CPU. | |
717 | */ | |
24712b90 MD |
718 | void |
719 | pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) | |
720 | { | |
721 | unsigned *pte; | |
722 | unsigned npte; | |
723 | ||
724 | npte = pa | PG_RW | PG_V | pgeflag; | |
725 | pte = (unsigned *)vtopte(va); | |
726 | *pte = npte; | |
727 | cpu_invlpg((void *)va); | |
728 | } | |
729 | ||
730 | void | |
731 | pmap_kenter_sync(vm_offset_t va) | |
732 | { | |
733 | pmap_inval_info info; | |
734 | ||
735 | pmap_inval_init(&info); | |
736 | pmap_inval_add(&info, kernel_pmap, va); | |
737 | pmap_inval_flush(&info); | |
738 | } | |
739 | ||
740 | void | |
741 | pmap_kenter_sync_quick(vm_offset_t va) | |
742 | { | |
743 | cpu_invlpg((void *)va); | |
744 | } | |
745 | ||
984263bc MD |
746 | /* |
747 | * remove a page from the kernel pagetables | |
748 | */ | |
24712b90 | 749 | void |
840de426 | 750 | pmap_kremove(vm_offset_t va) |
984263bc | 751 | { |
840de426 | 752 | unsigned *pte; |
0f7a3396 | 753 | pmap_inval_info info; |
984263bc | 754 | |
0f7a3396 MD |
755 | pmap_inval_init(&info); |
756 | pmap_inval_add(&info, kernel_pmap, va); | |
984263bc MD |
757 | pte = (unsigned *)vtopte(va); |
758 | *pte = 0; | |
0f7a3396 | 759 | pmap_inval_flush(&info); |
984263bc MD |
760 | } |
761 | ||
24712b90 MD |
762 | void |
763 | pmap_kremove_quick(vm_offset_t va) | |
764 | { | |
765 | unsigned *pte; | |
766 | pte = (unsigned *)vtopte(va); | |
767 | *pte = 0; | |
768 | cpu_invlpg((void *)va); | |
769 | } | |
770 | ||
984263bc MD |
771 | /* |
772 | * Used to map a range of physical addresses into kernel | |
773 | * virtual address space. | |
774 | * | |
775 | * For now, VM is already on, we only need to map the | |
776 | * specified memory. | |
777 | */ | |
778 | vm_offset_t | |
6ef943a3 | 779 | pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot) |
984263bc MD |
780 | { |
781 | while (start < end) { | |
782 | pmap_kenter(virt, start); | |
783 | virt += PAGE_SIZE; | |
784 | start += PAGE_SIZE; | |
785 | } | |
786 | return (virt); | |
787 | } | |
788 | ||
789 | ||
790 | /* | |
791 | * Add a list of wired pages to the kva | |
792 | * this routine is only used for temporary | |
793 | * kernel mappings that do not need to have | |
794 | * page modification or references recorded. | |
795 | * Note that old mappings are simply written | |
796 | * over. The page *must* be wired. | |
797 | */ | |
798 | void | |
840de426 | 799 | pmap_qenter(vm_offset_t va, vm_page_t *m, int count) |
984263bc MD |
800 | { |
801 | vm_offset_t end_va; | |
802 | ||
803 | end_va = va + count * PAGE_SIZE; | |
804 | ||
805 | while (va < end_va) { | |
806 | unsigned *pte; | |
807 | ||
808 | pte = (unsigned *)vtopte(va); | |
809 | *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; | |
984263bc | 810 | cpu_invlpg((void *)va); |
984263bc MD |
811 | va += PAGE_SIZE; |
812 | m++; | |
813 | } | |
814 | #ifdef SMP | |
0f7a3396 | 815 | smp_invltlb(); /* XXX */ |
984263bc MD |
816 | #endif |
817 | } | |
818 | ||
8100156a MD |
819 | void |
820 | pmap_qenter2(vm_offset_t va, vm_page_t *m, int count, cpumask_t *mask) | |
821 | { | |
822 | vm_offset_t end_va; | |
823 | cpumask_t cmask = mycpu->gd_cpumask; | |
824 | ||
825 | end_va = va + count * PAGE_SIZE; | |
826 | ||
827 | while (va < end_va) { | |
828 | unsigned *pte; | |
829 | unsigned pteval; | |
830 | ||
831 | /* | |
832 | * Install the new PTE. If the pte changed from the prior | |
833 | * mapping we must reset the cpu mask and invalidate the page. | |
834 | * If the pte is the same but we have not seen it on the | |
835 | * current cpu, invlpg the existing mapping. Otherwise the | |
836 | * entry is optimal and no invalidation is required. | |
837 | */ | |
838 | pte = (unsigned *)vtopte(va); | |
839 | pteval = VM_PAGE_TO_PHYS(*m) | PG_A | PG_RW | PG_V | pgeflag; | |
840 | if (*pte != pteval) { | |
a02705a9 | 841 | *mask = 0; |
8100156a MD |
842 | *pte = pteval; |
843 | cpu_invlpg((void *)va); | |
844 | } else if ((*mask & cmask) == 0) { | |
8100156a MD |
845 | cpu_invlpg((void *)va); |
846 | } | |
847 | va += PAGE_SIZE; | |
848 | m++; | |
849 | } | |
a02705a9 | 850 | *mask |= cmask; |
8100156a MD |
851 | } |
852 | ||
984263bc MD |
853 | /* |
854 | * this routine jerks page mappings from the | |
855 | * kernel -- it is meant only for temporary mappings. | |
856 | */ | |
857 | void | |
840de426 | 858 | pmap_qremove(vm_offset_t va, int count) |
984263bc MD |
859 | { |
860 | vm_offset_t end_va; | |
861 | ||
862 | end_va = va + count*PAGE_SIZE; | |
863 | ||
864 | while (va < end_va) { | |
865 | unsigned *pte; | |
866 | ||
867 | pte = (unsigned *)vtopte(va); | |
868 | *pte = 0; | |
984263bc | 869 | cpu_invlpg((void *)va); |
984263bc MD |
870 | va += PAGE_SIZE; |
871 | } | |
872 | #ifdef SMP | |
873 | smp_invltlb(); | |
874 | #endif | |
875 | } | |
876 | ||
06ecca5a MD |
877 | /* |
878 | * This routine works like vm_page_lookup() but also blocks as long as the | |
879 | * page is busy. This routine does not busy the page it returns. | |
880 | * | |
881 | * Unless the caller is managing objects whos pages are in a known state, | |
654a39f0 MD |
882 | * the call should be made with a critical section held so the page's object |
883 | * association remains valid on return. | |
06ecca5a | 884 | */ |
984263bc | 885 | static vm_page_t |
840de426 | 886 | pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) |
984263bc MD |
887 | { |
888 | vm_page_t m; | |
06ecca5a | 889 | |
984263bc MD |
890 | retry: |
891 | m = vm_page_lookup(object, pindex); | |
892 | if (m && vm_page_sleep_busy(m, FALSE, "pplookp")) | |
893 | goto retry; | |
06ecca5a | 894 | return(m); |
984263bc MD |
895 | } |
896 | ||
263e4574 MD |
897 | /* |
898 | * Create a new thread and optionally associate it with a (new) process. | |
6ef943a3 | 899 | * NOTE! the new thread's cpu may not equal the current cpu. |
263e4574 | 900 | */ |
7d0bac62 MD |
901 | void |
902 | pmap_init_thread(thread_t td) | |
263e4574 | 903 | { |
f470d0c8 | 904 | /* enforce pcb placement */ |
f470d0c8 | 905 | td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; |
65d6ce10 | 906 | td->td_savefpu = &td->td_pcb->pcb_save; |
7d0bac62 | 907 | td->td_sp = (char *)td->td_pcb - 16; |
263e4574 MD |
908 | } |
909 | ||
984263bc MD |
910 | /* |
911 | * Create the UPAGES for a new process. | |
912 | * This routine directly affects the fork perf for a process. | |
913 | */ | |
914 | void | |
7d0bac62 | 915 | pmap_init_proc(struct proc *p, struct thread *td) |
984263bc | 916 | { |
7e1d4bf4 MD |
917 | p->p_addr = (void *)td->td_kstack; |
918 | p->p_thread = td; | |
919 | td->td_proc = p; | |
ef09c3ed | 920 | td->td_lwp = &p->p_lwp; |
8ad65e08 | 921 | td->td_switch = cpu_heavy_switch; |
8a8d5d85 | 922 | #ifdef SMP |
d3d32139 | 923 | KKASSERT(td->td_mpcount == 1); |
8a8d5d85 | 924 | #endif |
7e1d4bf4 | 925 | bzero(p->p_addr, sizeof(*p->p_addr)); |
984263bc MD |
926 | } |
927 | ||
928 | /* | |
929 | * Dispose the UPAGES for a process that has exited. | |
930 | * This routine directly impacts the exit perf of a process. | |
931 | */ | |
7e1d4bf4 MD |
932 | struct thread * |
933 | pmap_dispose_proc(struct proc *p) | |
984263bc | 934 | { |
7e1d4bf4 MD |
935 | struct thread *td; |
936 | ||
f1d1c3fa MD |
937 | KASSERT(p->p_lock == 0, ("attempt to dispose referenced proc! %p", p)); |
938 | ||
7e1d4bf4 MD |
939 | if ((td = p->p_thread) != NULL) { |
940 | p->p_thread = NULL; | |
941 | td->td_proc = NULL; | |
942 | } | |
943 | p->p_addr = NULL; | |
944 | return(td); | |
984263bc MD |
945 | } |
946 | ||
984263bc MD |
947 | /*************************************************** |
948 | * Page table page management routines..... | |
949 | ***************************************************/ | |
950 | ||
951 | /* | |
952 | * This routine unholds page table pages, and if the hold count | |
953 | * drops to zero, then it decrements the wire count. | |
954 | */ | |
955 | static int | |
0f7a3396 | 956 | _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) |
840de426 | 957 | { |
0f7a3396 | 958 | pmap_inval_flush(info); |
984263bc MD |
959 | while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) |
960 | ; | |
961 | ||
962 | if (m->hold_count == 0) { | |
984263bc MD |
963 | /* |
964 | * unmap the page table page | |
965 | */ | |
0f7a3396 | 966 | pmap_inval_add(info, pmap, -1); |
984263bc MD |
967 | pmap->pm_pdir[m->pindex] = 0; |
968 | --pmap->pm_stats.resident_count; | |
984263bc MD |
969 | |
970 | if (pmap->pm_ptphint == m) | |
971 | pmap->pm_ptphint = NULL; | |
972 | ||
973 | /* | |
974 | * If the page is finally unwired, simply free it. | |
975 | */ | |
976 | --m->wire_count; | |
977 | if (m->wire_count == 0) { | |
984263bc MD |
978 | vm_page_flash(m); |
979 | vm_page_busy(m); | |
980 | vm_page_free_zero(m); | |
12e4aaff | 981 | --vmstats.v_wire_count; |
984263bc MD |
982 | } |
983 | return 1; | |
984 | } | |
985 | return 0; | |
986 | } | |
987 | ||
988 | static PMAP_INLINE int | |
0f7a3396 | 989 | pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) |
984263bc MD |
990 | { |
991 | vm_page_unhold(m); | |
992 | if (m->hold_count == 0) | |
0f7a3396 | 993 | return _pmap_unwire_pte_hold(pmap, m, info); |
984263bc MD |
994 | else |
995 | return 0; | |
996 | } | |
997 | ||
998 | /* | |
999 | * After removing a page table entry, this routine is used to | |
1000 | * conditionally free the page, and manage the hold/wire counts. | |
1001 | */ | |
1002 | static int | |
0f7a3396 MD |
1003 | pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte, |
1004 | pmap_inval_info_t info) | |
984263bc MD |
1005 | { |
1006 | unsigned ptepindex; | |
1007 | if (va >= UPT_MIN_ADDRESS) | |
1008 | return 0; | |
1009 | ||
1010 | if (mpte == NULL) { | |
1011 | ptepindex = (va >> PDRSHIFT); | |
1012 | if (pmap->pm_ptphint && | |
1013 | (pmap->pm_ptphint->pindex == ptepindex)) { | |
1014 | mpte = pmap->pm_ptphint; | |
1015 | } else { | |
0f7a3396 | 1016 | pmap_inval_flush(info); |
984263bc MD |
1017 | mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); |
1018 | pmap->pm_ptphint = mpte; | |
1019 | } | |
1020 | } | |
1021 | ||
0f7a3396 | 1022 | return pmap_unwire_pte_hold(pmap, mpte, info); |
984263bc MD |
1023 | } |
1024 | ||
1025 | void | |
840de426 | 1026 | pmap_pinit0(struct pmap *pmap) |
984263bc MD |
1027 | { |
1028 | pmap->pm_pdir = | |
1029 | (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); | |
24712b90 | 1030 | pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD); |
984263bc MD |
1031 | pmap->pm_count = 1; |
1032 | pmap->pm_active = 0; | |
1033 | pmap->pm_ptphint = NULL; | |
1034 | TAILQ_INIT(&pmap->pm_pvlist); | |
1035 | bzero(&pmap->pm_stats, sizeof pmap->pm_stats); | |
1036 | } | |
1037 | ||
1038 | /* | |
1039 | * Initialize a preallocated and zeroed pmap structure, | |
1040 | * such as one in a vmspace structure. | |
1041 | */ | |
1042 | void | |
840de426 | 1043 | pmap_pinit(struct pmap *pmap) |
984263bc MD |
1044 | { |
1045 | vm_page_t ptdpg; | |
1046 | ||
1047 | /* | |
1048 | * No need to allocate page table space yet but we do need a valid | |
1049 | * page directory table. | |
1050 | */ | |
b5b32410 | 1051 | if (pmap->pm_pdir == NULL) { |
984263bc MD |
1052 | pmap->pm_pdir = |
1053 | (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); | |
b5b32410 | 1054 | } |
984263bc MD |
1055 | |
1056 | /* | |
1057 | * allocate object for the ptes | |
1058 | */ | |
1059 | if (pmap->pm_pteobj == NULL) | |
1060 | pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1); | |
1061 | ||
1062 | /* | |
1063 | * allocate the page directory page | |
1064 | */ | |
1065 | ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI, | |
1066 | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); | |
1067 | ||
1068 | ptdpg->wire_count = 1; | |
12e4aaff | 1069 | ++vmstats.v_wire_count; |
984263bc MD |
1070 | |
1071 | ||
1072 | vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/ | |
1073 | ptdpg->valid = VM_PAGE_BITS_ALL; | |
1074 | ||
24712b90 | 1075 | pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); |
984263bc MD |
1076 | if ((ptdpg->flags & PG_ZERO) == 0) |
1077 | bzero(pmap->pm_pdir, PAGE_SIZE); | |
1078 | ||
984263bc | 1079 | pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; |
984263bc MD |
1080 | |
1081 | /* install self-referential address mapping entry */ | |
1082 | *(unsigned *) (pmap->pm_pdir + PTDPTDI) = | |
1083 | VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; | |
1084 | ||
1085 | pmap->pm_count = 1; | |
1086 | pmap->pm_active = 0; | |
1087 | pmap->pm_ptphint = NULL; | |
1088 | TAILQ_INIT(&pmap->pm_pvlist); | |
1089 | bzero(&pmap->pm_stats, sizeof pmap->pm_stats); | |
1090 | } | |
1091 | ||
1092 | /* | |
1093 | * Wire in kernel global address entries. To avoid a race condition | |
1094 | * between pmap initialization and pmap_growkernel, this procedure | |
1095 | * should be called after the vmspace is attached to the process | |
1096 | * but before this pmap is activated. | |
1097 | */ | |
1098 | void | |
840de426 | 1099 | pmap_pinit2(struct pmap *pmap) |
984263bc MD |
1100 | { |
1101 | /* XXX copies current process, does not fill in MPPTDI */ | |
1102 | bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); | |
1103 | } | |
1104 | ||
344ad853 MD |
1105 | /* |
1106 | * Attempt to release and free and vm_page in a pmap. Returns 1 on success, | |
1107 | * 0 on failure (if the procedure had to sleep). | |
1108 | */ | |
984263bc | 1109 | static int |
840de426 | 1110 | pmap_release_free_page(struct pmap *pmap, vm_page_t p) |
984263bc MD |
1111 | { |
1112 | unsigned *pde = (unsigned *) pmap->pm_pdir; | |
1113 | /* | |
1114 | * This code optimizes the case of freeing non-busy | |
1115 | * page-table pages. Those pages are zero now, and | |
1116 | * might as well be placed directly into the zero queue. | |
1117 | */ | |
1118 | if (vm_page_sleep_busy(p, FALSE, "pmaprl")) | |
1119 | return 0; | |
1120 | ||
1121 | vm_page_busy(p); | |
1122 | ||
1123 | /* | |
1124 | * Remove the page table page from the processes address space. | |
1125 | */ | |
1126 | pde[p->pindex] = 0; | |
1127 | pmap->pm_stats.resident_count--; | |
1128 | ||
1129 | if (p->hold_count) { | |
1130 | panic("pmap_release: freeing held page table page"); | |
1131 | } | |
1132 | /* | |
1133 | * Page directory pages need to have the kernel | |
1134 | * stuff cleared, so they can go into the zero queue also. | |
1135 | */ | |
1136 | if (p->pindex == PTDPTDI) { | |
1137 | bzero(pde + KPTDI, nkpt * PTESIZE); | |
984263bc | 1138 | pde[MPPTDI] = 0; |
984263bc | 1139 | pde[APTDPTDI] = 0; |
24712b90 | 1140 | pmap_kremove((vm_offset_t)pmap->pm_pdir); |
984263bc MD |
1141 | } |
1142 | ||
1143 | if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) | |
1144 | pmap->pm_ptphint = NULL; | |
1145 | ||
1146 | p->wire_count--; | |
12e4aaff | 1147 | vmstats.v_wire_count--; |
984263bc MD |
1148 | vm_page_free_zero(p); |
1149 | return 1; | |
1150 | } | |
1151 | ||
1152 | /* | |
1153 | * this routine is called if the page table page is not | |
1154 | * mapped correctly. | |
1155 | */ | |
1156 | static vm_page_t | |
840de426 | 1157 | _pmap_allocpte(pmap_t pmap, unsigned ptepindex) |
984263bc MD |
1158 | { |
1159 | vm_offset_t pteva, ptepa; | |
1160 | vm_page_t m; | |
1161 | ||
1162 | /* | |
1163 | * Find or fabricate a new pagetable page | |
1164 | */ | |
1165 | m = vm_page_grab(pmap->pm_pteobj, ptepindex, | |
dc1fd4b3 | 1166 | VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); |
984263bc MD |
1167 | |
1168 | KASSERT(m->queue == PQ_NONE, | |
1169 | ("_pmap_allocpte: %p->queue != PQ_NONE", m)); | |
1170 | ||
1171 | if (m->wire_count == 0) | |
12e4aaff | 1172 | vmstats.v_wire_count++; |
984263bc MD |
1173 | m->wire_count++; |
1174 | ||
1175 | /* | |
1176 | * Increment the hold count for the page table page | |
1177 | * (denoting a new mapping.) | |
1178 | */ | |
1179 | m->hold_count++; | |
1180 | ||
1181 | /* | |
1182 | * Map the pagetable page into the process address space, if | |
1183 | * it isn't already there. | |
1184 | */ | |
1185 | ||
1186 | pmap->pm_stats.resident_count++; | |
1187 | ||
1188 | ptepa = VM_PAGE_TO_PHYS(m); | |
1189 | pmap->pm_pdir[ptepindex] = | |
1190 | (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); | |
1191 | ||
1192 | /* | |
1193 | * Set the page table hint | |
1194 | */ | |
1195 | pmap->pm_ptphint = m; | |
1196 | ||
1197 | /* | |
1198 | * Try to use the new mapping, but if we cannot, then | |
1199 | * do it with the routine that maps the page explicitly. | |
1200 | */ | |
1201 | if ((m->flags & PG_ZERO) == 0) { | |
1202 | if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == | |
1203 | (((unsigned) PTDpde) & PG_FRAME)) { | |
1204 | pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex); | |
1205 | bzero((caddr_t) pteva, PAGE_SIZE); | |
1206 | } else { | |
1207 | pmap_zero_page(ptepa); | |
1208 | } | |
1209 | } | |
1210 | ||
1211 | m->valid = VM_PAGE_BITS_ALL; | |
1212 | vm_page_flag_clear(m, PG_ZERO); | |
1213 | vm_page_flag_set(m, PG_MAPPED); | |
1214 | vm_page_wakeup(m); | |
1215 | ||
1216 | return m; | |
1217 | } | |
1218 | ||
1219 | static vm_page_t | |
840de426 | 1220 | pmap_allocpte(pmap_t pmap, vm_offset_t va) |
984263bc MD |
1221 | { |
1222 | unsigned ptepindex; | |
1223 | vm_offset_t ptepa; | |
1224 | vm_page_t m; | |
1225 | ||
1226 | /* | |
1227 | * Calculate pagetable page index | |
1228 | */ | |
1229 | ptepindex = va >> PDRSHIFT; | |
1230 | ||
1231 | /* | |
1232 | * Get the page directory entry | |
1233 | */ | |
1234 | ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; | |
1235 | ||
1236 | /* | |
1237 | * This supports switching from a 4MB page to a | |
1238 | * normal 4K page. | |
1239 | */ | |
1240 | if (ptepa & PG_PS) { | |
1241 | pmap->pm_pdir[ptepindex] = 0; | |
1242 | ptepa = 0; | |
0f7a3396 MD |
1243 | cpu_invltlb(); |
1244 | smp_invltlb(); | |
984263bc MD |
1245 | } |
1246 | ||
1247 | /* | |
1248 | * If the page table page is mapped, we just increment the | |
1249 | * hold count, and activate it. | |
1250 | */ | |
1251 | if (ptepa) { | |
1252 | /* | |
1253 | * In order to get the page table page, try the | |
1254 | * hint first. | |
1255 | */ | |
1256 | if (pmap->pm_ptphint && | |
1257 | (pmap->pm_ptphint->pindex == ptepindex)) { | |
1258 | m = pmap->pm_ptphint; | |
1259 | } else { | |
1260 | m = pmap_page_lookup( pmap->pm_pteobj, ptepindex); | |
1261 | pmap->pm_ptphint = m; | |
1262 | } | |
1263 | m->hold_count++; | |
1264 | return m; | |
1265 | } | |
1266 | /* | |
1267 | * Here if the pte page isn't mapped, or if it has been deallocated. | |
1268 | */ | |
1269 | return _pmap_allocpte(pmap, ptepindex); | |
1270 | } | |
1271 | ||
1272 | ||
1273 | /*************************************************** | |
1274 | * Pmap allocation/deallocation routines. | |
1275 | ***************************************************/ | |
1276 | ||
1277 | /* | |
1278 | * Release any resources held by the given physical map. | |
1279 | * Called when a pmap initialized by pmap_pinit is being released. | |
1280 | * Should only be called if the map contains no valid mappings. | |
1281 | */ | |
1282 | void | |
840de426 | 1283 | pmap_release(struct pmap *pmap) |
984263bc MD |
1284 | { |
1285 | vm_page_t p,n,ptdpg; | |
1286 | vm_object_t object = pmap->pm_pteobj; | |
1287 | int curgeneration; | |
1288 | ||
1289 | #if defined(DIAGNOSTIC) | |
1290 | if (object->ref_count != 1) | |
1291 | panic("pmap_release: pteobj reference count != 1"); | |
1292 | #endif | |
1293 | ||
1294 | ptdpg = NULL; | |
1295 | retry: | |
9acd5bbb | 1296 | crit_enter(); |
984263bc MD |
1297 | curgeneration = object->generation; |
1298 | for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { | |
1299 | n = TAILQ_NEXT(p, listq); | |
1300 | if (p->pindex == PTDPTDI) { | |
1301 | ptdpg = p; | |
1302 | continue; | |
1303 | } | |
344ad853 MD |
1304 | if (!pmap_release_free_page(pmap, p)) { |
1305 | crit_exit(); | |
1306 | goto retry; | |
1307 | } | |
1308 | if (object->generation != curgeneration) { | |
1309 | crit_exit(); | |
1310 | goto retry; | |
984263bc MD |
1311 | } |
1312 | } | |
344ad853 MD |
1313 | if (ptdpg && !pmap_release_free_page(pmap, ptdpg)) { |
1314 | crit_exit(); | |
984263bc | 1315 | goto retry; |
344ad853 MD |
1316 | } |
1317 | crit_exit(); | |
984263bc MD |
1318 | } |
1319 | \f | |
1320 | static int | |
1321 | kvm_size(SYSCTL_HANDLER_ARGS) | |
1322 | { | |
1323 | unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; | |
1324 | ||
1325 | return sysctl_handle_long(oidp, &ksize, 0, req); | |
1326 | } | |
1327 | SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, | |
1328 | 0, 0, kvm_size, "IU", "Size of KVM"); | |
1329 | ||
1330 | static int | |
1331 | kvm_free(SYSCTL_HANDLER_ARGS) | |
1332 | { | |
1333 | unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; | |
1334 | ||
1335 | return sysctl_handle_long(oidp, &kfree, 0, req); | |
1336 | } | |
1337 | SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, | |
1338 | 0, 0, kvm_free, "IU", "Amount of KVM free"); | |
1339 | ||
1340 | /* | |
0e5797fe | 1341 | * Grow the number of kernel page table entries, if needed. |
984263bc | 1342 | */ |
0e5797fe MD |
1343 | struct pmap_growkernel_info { |
1344 | pd_entry_t newpdir; | |
1345 | }; | |
1346 | ||
1347 | static int pmap_growkernel_callback(struct proc *p, void *data); | |
1348 | ||
984263bc MD |
1349 | void |
1350 | pmap_growkernel(vm_offset_t addr) | |
1351 | { | |
0e5797fe | 1352 | struct pmap_growkernel_info info; |
984263bc MD |
1353 | vm_offset_t ptppaddr; |
1354 | vm_page_t nkpg; | |
1355 | pd_entry_t newpdir; | |
1356 | ||
9acd5bbb | 1357 | crit_enter(); |
984263bc MD |
1358 | if (kernel_vm_end == 0) { |
1359 | kernel_vm_end = KERNBASE; | |
1360 | nkpt = 0; | |
1361 | while (pdir_pde(PTD, kernel_vm_end)) { | |
1362 | kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); | |
1363 | nkpt++; | |
1364 | } | |
1365 | } | |
1366 | addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); | |
1367 | while (kernel_vm_end < addr) { | |
1368 | if (pdir_pde(PTD, kernel_vm_end)) { | |
1369 | kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); | |
1370 | continue; | |
1371 | } | |
1372 | ||
1373 | /* | |
1374 | * This index is bogus, but out of the way | |
1375 | */ | |
dc1fd4b3 MD |
1376 | nkpg = vm_page_alloc(kptobj, nkpt, |
1377 | VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT); | |
1378 | if (nkpg == NULL) | |
984263bc MD |
1379 | panic("pmap_growkernel: no memory to grow kernel"); |
1380 | ||
984263bc MD |
1381 | vm_page_wire(nkpg); |
1382 | ptppaddr = VM_PAGE_TO_PHYS(nkpg); | |
1383 | pmap_zero_page(ptppaddr); | |
1384 | newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); | |
1385 | pdir_pde(PTD, kernel_vm_end) = newpdir; | |
984263bc | 1386 | *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; |
0e5797fe MD |
1387 | nkpt++; |
1388 | ||
1389 | /* | |
1390 | * vm_fork and friends copy nkpt page table pages to the high | |
1391 | * side of a new process's pmap. This occurs after the | |
1392 | * process has been added to allproc, so scanning the proc | |
1393 | * list afterwords should be sufficient to fixup existing | |
1394 | * processes. | |
1395 | */ | |
1396 | info.newpdir = newpdir; | |
1397 | allproc_scan(pmap_growkernel_callback, &info); | |
984263bc MD |
1398 | kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); |
1399 | } | |
9acd5bbb | 1400 | crit_exit(); |
984263bc MD |
1401 | } |
1402 | ||
0e5797fe MD |
1403 | static int |
1404 | pmap_growkernel_callback(struct proc *p, void *data) | |
1405 | { | |
1406 | struct pmap_growkernel_info *info = data; | |
1407 | struct pmap *pmap; | |
1408 | ||
1409 | if (p->p_vmspace) { | |
1410 | pmap = vmspace_pmap(p->p_vmspace); | |
1411 | *pmap_pde(pmap, kernel_vm_end) = info->newpdir; | |
1412 | } | |
1413 | return(0); | |
1414 | } | |
1415 | ||
984263bc MD |
1416 | /* |
1417 | * Retire the given physical map from service. | |
1418 | * Should only be called if the map contains | |
1419 | * no valid mappings. | |
1420 | */ | |
1421 | void | |
840de426 | 1422 | pmap_destroy(pmap_t pmap) |
984263bc MD |
1423 | { |
1424 | int count; | |
1425 | ||
1426 | if (pmap == NULL) | |
1427 | return; | |
1428 | ||
1429 | count = --pmap->pm_count; | |
1430 | if (count == 0) { | |
1431 | pmap_release(pmap); | |
1432 | panic("destroying a pmap is not yet implemented"); | |
1433 | } | |
1434 | } | |
1435 | ||
1436 | /* | |
1437 | * Add a reference to the specified pmap. | |
1438 | */ | |
1439 | void | |
840de426 | 1440 | pmap_reference(pmap_t pmap) |
984263bc MD |
1441 | { |
1442 | if (pmap != NULL) { | |
1443 | pmap->pm_count++; | |
1444 | } | |
1445 | } | |
1446 | ||
1447 | /*************************************************** | |
1448 | * page management routines. | |
1449 | ***************************************************/ | |
1450 | ||
1451 | /* | |
8a8d5d85 MD |
1452 | * free the pv_entry back to the free list. This function may be |
1453 | * called from an interrupt. | |
984263bc MD |
1454 | */ |
1455 | static PMAP_INLINE void | |
840de426 | 1456 | free_pv_entry(pv_entry_t pv) |
984263bc MD |
1457 | { |
1458 | pv_entry_count--; | |
8a8d5d85 | 1459 | zfree(pvzone, pv); |
984263bc MD |
1460 | } |
1461 | ||
1462 | /* | |
1463 | * get a new pv_entry, allocating a block from the system | |
8a8d5d85 | 1464 | * when needed. This function may be called from an interrupt. |
984263bc MD |
1465 | */ |
1466 | static pv_entry_t | |
1467 | get_pv_entry(void) | |
1468 | { | |
1469 | pv_entry_count++; | |
1470 | if (pv_entry_high_water && | |
1471 | (pv_entry_count > pv_entry_high_water) && | |
1472 | (pmap_pagedaemon_waken == 0)) { | |
1473 | pmap_pagedaemon_waken = 1; | |
1474 | wakeup (&vm_pages_needed); | |
1475 | } | |
8a8d5d85 | 1476 | return zalloc(pvzone); |
984263bc MD |
1477 | } |
1478 | ||
1479 | /* | |
1480 | * This routine is very drastic, but can save the system | |
1481 | * in a pinch. | |
1482 | */ | |
1483 | void | |
840de426 | 1484 | pmap_collect(void) |
984263bc MD |
1485 | { |
1486 | int i; | |
1487 | vm_page_t m; | |
1488 | static int warningdone=0; | |
1489 | ||
1490 | if (pmap_pagedaemon_waken == 0) | |
1491 | return; | |
1492 | ||
1493 | if (warningdone < 5) { | |
1494 | printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); | |
1495 | warningdone++; | |
1496 | } | |
1497 | ||
1498 | for(i = 0; i < vm_page_array_size; i++) { | |
1499 | m = &vm_page_array[i]; | |
1500 | if (m->wire_count || m->hold_count || m->busy || | |
1501 | (m->flags & PG_BUSY)) | |
1502 | continue; | |
1503 | pmap_remove_all(m); | |
1504 | } | |
1505 | pmap_pagedaemon_waken = 0; | |
1506 | } | |
1507 | ||
1508 | ||
1509 | /* | |
1510 | * If it is the first entry on the list, it is actually | |
1511 | * in the header and we must copy the following entry up | |
1512 | * to the header. Otherwise we must search the list for | |
1513 | * the entry. In either case we free the now unused entry. | |
1514 | */ | |
984263bc | 1515 | static int |
0f7a3396 MD |
1516 | pmap_remove_entry(struct pmap *pmap, vm_page_t m, |
1517 | vm_offset_t va, pmap_inval_info_t info) | |
984263bc MD |
1518 | { |
1519 | pv_entry_t pv; | |
1520 | int rtval; | |
984263bc | 1521 | |
9acd5bbb | 1522 | crit_enter(); |
984263bc MD |
1523 | if (m->md.pv_list_count < pmap->pm_stats.resident_count) { |
1524 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
1525 | if (pmap == pv->pv_pmap && va == pv->pv_va) | |
1526 | break; | |
1527 | } | |
1528 | } else { | |
1529 | TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { | |
1530 | if (va == pv->pv_va) | |
1531 | break; | |
1532 | } | |
1533 | } | |
1534 | ||
1535 | rtval = 0; | |
1536 | if (pv) { | |
0f7a3396 | 1537 | rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info); |
984263bc MD |
1538 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); |
1539 | m->md.pv_list_count--; | |
1540 | if (TAILQ_FIRST(&m->md.pv_list) == NULL) | |
1541 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); | |
984263bc MD |
1542 | TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); |
1543 | free_pv_entry(pv); | |
1544 | } | |
9acd5bbb | 1545 | crit_exit(); |
984263bc MD |
1546 | return rtval; |
1547 | } | |
1548 | ||
1549 | /* | |
1550 | * Create a pv entry for page at pa for | |
1551 | * (pmap, va). | |
1552 | */ | |
1553 | static void | |
840de426 | 1554 | pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) |
984263bc | 1555 | { |
984263bc MD |
1556 | pv_entry_t pv; |
1557 | ||
9acd5bbb | 1558 | crit_enter(); |
984263bc MD |
1559 | pv = get_pv_entry(); |
1560 | pv->pv_va = va; | |
1561 | pv->pv_pmap = pmap; | |
1562 | pv->pv_ptem = mpte; | |
1563 | ||
1564 | TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); | |
1565 | TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); | |
1566 | m->md.pv_list_count++; | |
1567 | ||
9acd5bbb | 1568 | crit_exit(); |
984263bc MD |
1569 | } |
1570 | ||
1571 | /* | |
1572 | * pmap_remove_pte: do the things to unmap a page in a process | |
1573 | */ | |
1574 | static int | |
0f7a3396 MD |
1575 | pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va, |
1576 | pmap_inval_info_t info) | |
984263bc MD |
1577 | { |
1578 | unsigned oldpte; | |
1579 | vm_page_t m; | |
1580 | ||
0f7a3396 | 1581 | pmap_inval_add(info, pmap, va); |
984263bc MD |
1582 | oldpte = loadandclear(ptq); |
1583 | if (oldpte & PG_W) | |
1584 | pmap->pm_stats.wired_count -= 1; | |
1585 | /* | |
1586 | * Machines that don't support invlpg, also don't support | |
0f7a3396 MD |
1587 | * PG_G. XXX PG_G is disabled for SMP so don't worry about |
1588 | * the SMP case. | |
984263bc MD |
1589 | */ |
1590 | if (oldpte & PG_G) | |
41a01a4d | 1591 | cpu_invlpg((void *)va); |
984263bc MD |
1592 | pmap->pm_stats.resident_count -= 1; |
1593 | if (oldpte & PG_MANAGED) { | |
1594 | m = PHYS_TO_VM_PAGE(oldpte); | |
1595 | if (oldpte & PG_M) { | |
1596 | #if defined(PMAP_DIAGNOSTIC) | |
1597 | if (pmap_nw_modified((pt_entry_t) oldpte)) { | |
1598 | printf( | |
1599 | "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", | |
1600 | va, oldpte); | |
1601 | } | |
1602 | #endif | |
1603 | if (pmap_track_modified(va)) | |
1604 | vm_page_dirty(m); | |
1605 | } | |
1606 | if (oldpte & PG_A) | |
1607 | vm_page_flag_set(m, PG_REFERENCED); | |
0f7a3396 | 1608 | return pmap_remove_entry(pmap, m, va, info); |
984263bc | 1609 | } else { |
0f7a3396 | 1610 | return pmap_unuse_pt(pmap, va, NULL, info); |
984263bc MD |
1611 | } |
1612 | ||
1613 | return 0; | |
1614 | } | |
1615 | ||
1616 | /* | |
e0e69b7d MD |
1617 | * pmap_remove_page: |
1618 | * | |
1619 | * Remove a single page from a process address space. | |
1620 | * | |
1621 | * This function may not be called from an interrupt if the pmap is | |
1622 | * not kernel_pmap. | |
984263bc MD |
1623 | */ |
1624 | static void | |
0f7a3396 | 1625 | pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info) |
984263bc | 1626 | { |
840de426 | 1627 | unsigned *ptq; |
984263bc MD |
1628 | |
1629 | /* | |
e0e69b7d MD |
1630 | * if there is no pte for this address, just skip it!!! Otherwise |
1631 | * get a local va for mappings for this pmap and remove the entry. | |
984263bc | 1632 | */ |
e0e69b7d MD |
1633 | if (*pmap_pde(pmap, va) != 0) { |
1634 | ptq = get_ptbase(pmap) + i386_btop(va); | |
1635 | if (*ptq) { | |
0f7a3396 | 1636 | pmap_remove_pte(pmap, ptq, va, info); |
e0e69b7d | 1637 | } |
984263bc | 1638 | } |
984263bc MD |
1639 | } |
1640 | ||
1641 | /* | |
0f7a3396 | 1642 | * pmap_remove: |
e0e69b7d | 1643 | * |
984263bc MD |
1644 | * Remove the given range of addresses from the specified map. |
1645 | * | |
1646 | * It is assumed that the start and end are properly | |
1647 | * rounded to the page size. | |
e0e69b7d MD |
1648 | * |
1649 | * This function may not be called from an interrupt if the pmap is | |
1650 | * not kernel_pmap. | |
984263bc MD |
1651 | */ |
1652 | void | |
840de426 | 1653 | pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) |
984263bc | 1654 | { |
840de426 | 1655 | unsigned *ptbase; |
984263bc MD |
1656 | vm_offset_t pdnxt; |
1657 | vm_offset_t ptpaddr; | |
1658 | vm_offset_t sindex, eindex; | |
0f7a3396 | 1659 | struct pmap_inval_info info; |
984263bc MD |
1660 | |
1661 | if (pmap == NULL) | |
1662 | return; | |
1663 | ||
1664 | if (pmap->pm_stats.resident_count == 0) | |
1665 | return; | |
1666 | ||
0f7a3396 MD |
1667 | pmap_inval_init(&info); |
1668 | ||
984263bc MD |
1669 | /* |
1670 | * special handling of removing one page. a very | |
1671 | * common operation and easy to short circuit some | |
1672 | * code. | |
1673 | */ | |
1674 | if (((sva + PAGE_SIZE) == eva) && | |
1675 | (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { | |
0f7a3396 MD |
1676 | pmap_remove_page(pmap, sva, &info); |
1677 | pmap_inval_flush(&info); | |
984263bc MD |
1678 | return; |
1679 | } | |
1680 | ||
984263bc MD |
1681 | /* |
1682 | * Get a local virtual address for the mappings that are being | |
1683 | * worked with. | |
1684 | */ | |
1685 | ptbase = get_ptbase(pmap); | |
1686 | ||
1687 | sindex = i386_btop(sva); | |
1688 | eindex = i386_btop(eva); | |
1689 | ||
1690 | for (; sindex < eindex; sindex = pdnxt) { | |
1691 | unsigned pdirindex; | |
1692 | ||
1693 | /* | |
1694 | * Calculate index for next page table. | |
1695 | */ | |
1696 | pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); | |
1697 | if (pmap->pm_stats.resident_count == 0) | |
1698 | break; | |
1699 | ||
1700 | pdirindex = sindex / NPDEPG; | |
1701 | if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { | |
0f7a3396 | 1702 | pmap_inval_add(&info, pmap, -1); |
984263bc MD |
1703 | pmap->pm_pdir[pdirindex] = 0; |
1704 | pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; | |
984263bc MD |
1705 | continue; |
1706 | } | |
1707 | ||
1708 | /* | |
1709 | * Weed out invalid mappings. Note: we assume that the page | |
1710 | * directory table is always allocated, and in kernel virtual. | |
1711 | */ | |
1712 | if (ptpaddr == 0) | |
1713 | continue; | |
1714 | ||
1715 | /* | |
1716 | * Limit our scan to either the end of the va represented | |
1717 | * by the current page table page, or to the end of the | |
1718 | * range being removed. | |
1719 | */ | |
1720 | if (pdnxt > eindex) { | |
1721 | pdnxt = eindex; | |
1722 | } | |
1723 | ||
0f7a3396 | 1724 | for (; sindex != pdnxt; sindex++) { |
984263bc | 1725 | vm_offset_t va; |
0f7a3396 | 1726 | if (ptbase[sindex] == 0) |
984263bc | 1727 | continue; |
984263bc | 1728 | va = i386_ptob(sindex); |
0f7a3396 | 1729 | if (pmap_remove_pte(pmap, ptbase + sindex, va, &info)) |
984263bc MD |
1730 | break; |
1731 | } | |
1732 | } | |
0f7a3396 | 1733 | pmap_inval_flush(&info); |
984263bc MD |
1734 | } |
1735 | ||
1736 | /* | |
e0e69b7d MD |
1737 | * pmap_remove_all: |
1738 | * | |
1739 | * Removes this physical page from all physical maps in which it resides. | |
1740 | * Reflects back modify bits to the pager. | |
984263bc | 1741 | * |
e0e69b7d | 1742 | * This routine may not be called from an interrupt. |
984263bc MD |
1743 | */ |
1744 | ||
1745 | static void | |
840de426 | 1746 | pmap_remove_all(vm_page_t m) |
984263bc | 1747 | { |
0f7a3396 | 1748 | struct pmap_inval_info info; |
840de426 | 1749 | unsigned *pte, tpte; |
0f7a3396 | 1750 | pv_entry_t pv; |
984263bc MD |
1751 | |
1752 | #if defined(PMAP_DIAGNOSTIC) | |
1753 | /* | |
1754 | * XXX this makes pmap_page_protect(NONE) illegal for non-managed | |
1755 | * pages! | |
1756 | */ | |
1757 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { | |
c469b1c4 | 1758 | panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); |
984263bc MD |
1759 | } |
1760 | #endif | |
1761 | ||
0f7a3396 | 1762 | pmap_inval_init(&info); |
9acd5bbb | 1763 | crit_enter(); |
984263bc MD |
1764 | while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { |
1765 | pv->pv_pmap->pm_stats.resident_count--; | |
1766 | ||
1767 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); | |
0f7a3396 | 1768 | pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); |
984263bc MD |
1769 | |
1770 | tpte = loadandclear(pte); | |
1771 | if (tpte & PG_W) | |
1772 | pv->pv_pmap->pm_stats.wired_count--; | |
1773 | ||
1774 | if (tpte & PG_A) | |
1775 | vm_page_flag_set(m, PG_REFERENCED); | |
1776 | ||
1777 | /* | |
1778 | * Update the vm_page_t clean and reference bits. | |
1779 | */ | |
1780 | if (tpte & PG_M) { | |
1781 | #if defined(PMAP_DIAGNOSTIC) | |
1782 | if (pmap_nw_modified((pt_entry_t) tpte)) { | |
1783 | printf( | |
1784 | "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", | |
1785 | pv->pv_va, tpte); | |
1786 | } | |
1787 | #endif | |
1788 | if (pmap_track_modified(pv->pv_va)) | |
1789 | vm_page_dirty(m); | |
1790 | } | |
984263bc MD |
1791 | TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); |
1792 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); | |
1793 | m->md.pv_list_count--; | |
0f7a3396 | 1794 | pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); |
984263bc MD |
1795 | free_pv_entry(pv); |
1796 | } | |
1797 | ||
1798 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); | |
9acd5bbb | 1799 | crit_exit(); |
0f7a3396 | 1800 | pmap_inval_flush(&info); |
984263bc MD |
1801 | } |
1802 | ||
1803 | /* | |
e0e69b7d MD |
1804 | * pmap_protect: |
1805 | * | |
1806 | * Set the physical protection on the specified range of this map | |
1807 | * as requested. | |
1808 | * | |
1809 | * This function may not be called from an interrupt if the map is | |
1810 | * not the kernel_pmap. | |
984263bc MD |
1811 | */ |
1812 | void | |
1813 | pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) | |
1814 | { | |
840de426 | 1815 | unsigned *ptbase; |
984263bc MD |
1816 | vm_offset_t pdnxt, ptpaddr; |
1817 | vm_pindex_t sindex, eindex; | |
0f7a3396 | 1818 | pmap_inval_info info; |
984263bc MD |
1819 | |
1820 | if (pmap == NULL) | |
1821 | return; | |
1822 | ||
1823 | if ((prot & VM_PROT_READ) == VM_PROT_NONE) { | |
1824 | pmap_remove(pmap, sva, eva); | |
1825 | return; | |
1826 | } | |
1827 | ||
1828 | if (prot & VM_PROT_WRITE) | |
1829 | return; | |
1830 | ||
0f7a3396 | 1831 | pmap_inval_init(&info); |
984263bc MD |
1832 | |
1833 | ptbase = get_ptbase(pmap); | |
1834 | ||
1835 | sindex = i386_btop(sva); | |
1836 | eindex = i386_btop(eva); | |
1837 | ||
1838 | for (; sindex < eindex; sindex = pdnxt) { | |
1839 | ||
1840 | unsigned pdirindex; | |
1841 | ||
1842 | pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); | |
1843 | ||
1844 | pdirindex = sindex / NPDEPG; | |
1845 | if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { | |
0f7a3396 | 1846 | pmap_inval_add(&info, pmap, -1); |
984263bc MD |
1847 | (unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); |
1848 | pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; | |
984263bc MD |
1849 | continue; |
1850 | } | |
1851 | ||
1852 | /* | |
1853 | * Weed out invalid mappings. Note: we assume that the page | |
1854 | * directory table is always allocated, and in kernel virtual. | |
1855 | */ | |
1856 | if (ptpaddr == 0) | |
1857 | continue; | |
1858 | ||
1859 | if (pdnxt > eindex) { | |
1860 | pdnxt = eindex; | |
1861 | } | |
1862 | ||
1863 | for (; sindex != pdnxt; sindex++) { | |
1864 | ||
1865 | unsigned pbits; | |
1866 | vm_page_t m; | |
1867 | ||
0f7a3396 MD |
1868 | /* XXX this isn't optimal */ |
1869 | pmap_inval_add(&info, pmap, i386_ptob(sindex)); | |
984263bc MD |
1870 | pbits = ptbase[sindex]; |
1871 | ||
1872 | if (pbits & PG_MANAGED) { | |
1873 | m = NULL; | |
1874 | if (pbits & PG_A) { | |
1875 | m = PHYS_TO_VM_PAGE(pbits); | |
1876 | vm_page_flag_set(m, PG_REFERENCED); | |
1877 | pbits &= ~PG_A; | |
1878 | } | |
1879 | if (pbits & PG_M) { | |
1880 | if (pmap_track_modified(i386_ptob(sindex))) { | |
1881 | if (m == NULL) | |
1882 | m = PHYS_TO_VM_PAGE(pbits); | |
1883 | vm_page_dirty(m); | |
1884 | pbits &= ~PG_M; | |
1885 | } | |
1886 | } | |
1887 | } | |
1888 | ||
1889 | pbits &= ~PG_RW; | |
1890 | ||
1891 | if (pbits != ptbase[sindex]) { | |
1892 | ptbase[sindex] = pbits; | |
984263bc MD |
1893 | } |
1894 | } | |
1895 | } | |
0f7a3396 | 1896 | pmap_inval_flush(&info); |
984263bc MD |
1897 | } |
1898 | ||
1899 | /* | |
1900 | * Insert the given physical page (p) at | |
1901 | * the specified virtual address (v) in the | |
1902 | * target physical map with the protection requested. | |
1903 | * | |
1904 | * If specified, the page will be wired down, meaning | |
1905 | * that the related pte can not be reclaimed. | |
1906 | * | |
1907 | * NB: This is the only routine which MAY NOT lazy-evaluate | |
1908 | * or lose information. That is, this routine must actually | |
1909 | * insert this page into the given map NOW. | |
1910 | */ | |
1911 | void | |
1912 | pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | |
1913 | boolean_t wired) | |
1914 | { | |
6ef943a3 | 1915 | vm_paddr_t pa; |
840de426 | 1916 | unsigned *pte; |
6ef943a3 | 1917 | vm_paddr_t opa; |
984263bc MD |
1918 | vm_offset_t origpte, newpte; |
1919 | vm_page_t mpte; | |
0f7a3396 | 1920 | pmap_inval_info info; |
984263bc MD |
1921 | |
1922 | if (pmap == NULL) | |
1923 | return; | |
1924 | ||
1925 | va &= PG_FRAME; | |
1926 | #ifdef PMAP_DIAGNOSTIC | |
1927 | if (va > VM_MAX_KERNEL_ADDRESS) | |
1928 | panic("pmap_enter: toobig"); | |
1929 | if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) | |
1930 | panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); | |
1931 | #endif | |
1932 | ||
1933 | mpte = NULL; | |
1934 | /* | |
1935 | * In the case that a page table page is not | |
1936 | * resident, we are creating it here. | |
1937 | */ | |
1938 | if (va < UPT_MIN_ADDRESS) { | |
1939 | mpte = pmap_allocpte(pmap, va); | |
1940 | } | |
984263bc | 1941 | |
0f7a3396 | 1942 | pmap_inval_init(&info); |
984263bc MD |
1943 | pte = pmap_pte(pmap, va); |
1944 | ||
1945 | /* | |
1946 | * Page Directory table entry not valid, we need a new PT page | |
1947 | */ | |
1948 | if (pte == NULL) { | |
6ef943a3 MD |
1949 | panic("pmap_enter: invalid page directory pdir=%x, va=0x%x\n", |
1950 | (unsigned) pmap->pm_pdir[PTDPTDI], va); | |
984263bc MD |
1951 | } |
1952 | ||
1953 | pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; | |
0f7a3396 | 1954 | pmap_inval_add(&info, pmap, va); /* XXX non-optimal */ |
984263bc MD |
1955 | origpte = *(vm_offset_t *)pte; |
1956 | opa = origpte & PG_FRAME; | |
1957 | ||
1958 | if (origpte & PG_PS) | |
1959 | panic("pmap_enter: attempted pmap_enter on 4MB page"); | |
1960 | ||
1961 | /* | |
1962 | * Mapping has not changed, must be protection or wiring change. | |
1963 | */ | |
1964 | if (origpte && (opa == pa)) { | |
1965 | /* | |
1966 | * Wiring change, just update stats. We don't worry about | |
1967 | * wiring PT pages as they remain resident as long as there | |
1968 | * are valid mappings in them. Hence, if a user page is wired, | |
1969 | * the PT page will be also. | |
1970 | */ | |
1971 | if (wired && ((origpte & PG_W) == 0)) | |
1972 | pmap->pm_stats.wired_count++; | |
1973 | else if (!wired && (origpte & PG_W)) | |
1974 | pmap->pm_stats.wired_count--; | |
1975 | ||
1976 | #if defined(PMAP_DIAGNOSTIC) | |
1977 | if (pmap_nw_modified((pt_entry_t) origpte)) { | |
1978 | printf( | |
1979 | "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", | |
1980 | va, origpte); | |
1981 | } | |
1982 | #endif | |
1983 | ||
1984 | /* | |
639a9b43 MD |
1985 | * Remove the extra pte reference. Note that we cannot |
1986 | * optimize the RO->RW case because we have adjusted the | |
1987 | * wiring count above and may need to adjust the wiring | |
1988 | * bits below. | |
984263bc MD |
1989 | */ |
1990 | if (mpte) | |
1991 | mpte->hold_count--; | |
1992 | ||
984263bc MD |
1993 | /* |
1994 | * We might be turning off write access to the page, | |
1995 | * so we go ahead and sense modify status. | |
1996 | */ | |
1997 | if (origpte & PG_MANAGED) { | |
1998 | if ((origpte & PG_M) && pmap_track_modified(va)) { | |
1999 | vm_page_t om; | |
2000 | om = PHYS_TO_VM_PAGE(opa); | |
2001 | vm_page_dirty(om); | |
2002 | } | |
2003 | pa |= PG_MANAGED; | |
2004 | } | |
2005 | goto validate; | |
2006 | } | |
2007 | /* | |
2008 | * Mapping has changed, invalidate old range and fall through to | |
2009 | * handle validating new mapping. | |
2010 | */ | |
2011 | if (opa) { | |
2012 | int err; | |
0f7a3396 | 2013 | err = pmap_remove_pte(pmap, pte, va, &info); |
984263bc MD |
2014 | if (err) |
2015 | panic("pmap_enter: pte vanished, va: 0x%x", va); | |
2016 | } | |
2017 | ||
2018 | /* | |
2019 | * Enter on the PV list if part of our managed memory. Note that we | |
2020 | * raise IPL while manipulating pv_table since pmap_enter can be | |
2021 | * called at interrupt time. | |
2022 | */ | |
2023 | if (pmap_initialized && | |
2024 | (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { | |
2025 | pmap_insert_entry(pmap, va, mpte, m); | |
2026 | pa |= PG_MANAGED; | |
2027 | } | |
2028 | ||
2029 | /* | |
2030 | * Increment counters | |
2031 | */ | |
2032 | pmap->pm_stats.resident_count++; | |
2033 | if (wired) | |
2034 | pmap->pm_stats.wired_count++; | |
2035 | ||
2036 | validate: | |
2037 | /* | |
2038 | * Now validate mapping with desired protection/wiring. | |
2039 | */ | |
2040 | newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); | |
2041 | ||
2042 | if (wired) | |
2043 | newpte |= PG_W; | |
2044 | if (va < UPT_MIN_ADDRESS) | |
2045 | newpte |= PG_U; | |
2046 | if (pmap == kernel_pmap) | |
2047 | newpte |= pgeflag; | |
2048 | ||
2049 | /* | |
2050 | * if the mapping or permission bits are different, we need | |
2051 | * to update the pte. | |
2052 | */ | |
2053 | if ((origpte & ~(PG_M|PG_A)) != newpte) { | |
2054 | *pte = newpte | PG_A; | |
984263bc | 2055 | } |
0f7a3396 | 2056 | pmap_inval_flush(&info); |
984263bc MD |
2057 | } |
2058 | ||
2059 | /* | |
2060 | * this code makes some *MAJOR* assumptions: | |
2061 | * 1. Current pmap & pmap exists. | |
2062 | * 2. Not wired. | |
2063 | * 3. Read access. | |
2064 | * 4. No page table pages. | |
2065 | * 5. Tlbflush is deferred to calling procedure. | |
2066 | * 6. Page IS managed. | |
2067 | * but is *MUCH* faster than pmap_enter... | |
2068 | */ | |
2069 | ||
2070 | static vm_page_t | |
840de426 | 2071 | pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) |
984263bc MD |
2072 | { |
2073 | unsigned *pte; | |
6ef943a3 | 2074 | vm_paddr_t pa; |
0f7a3396 MD |
2075 | pmap_inval_info info; |
2076 | ||
2077 | pmap_inval_init(&info); | |
984263bc MD |
2078 | |
2079 | /* | |
2080 | * In the case that a page table page is not | |
2081 | * resident, we are creating it here. | |
2082 | */ | |
2083 | if (va < UPT_MIN_ADDRESS) { | |
2084 | unsigned ptepindex; | |
2085 | vm_offset_t ptepa; | |
2086 | ||
2087 | /* | |
2088 | * Calculate pagetable page index | |
2089 | */ | |
2090 | ptepindex = va >> PDRSHIFT; | |
2091 | if (mpte && (mpte->pindex == ptepindex)) { | |
2092 | mpte->hold_count++; | |
2093 | } else { | |
2094 | retry: | |
2095 | /* | |
2096 | * Get the page directory entry | |
2097 | */ | |
2098 | ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; | |
2099 | ||
2100 | /* | |
2101 | * If the page table page is mapped, we just increment | |
2102 | * the hold count, and activate it. | |
2103 | */ | |
2104 | if (ptepa) { | |
2105 | if (ptepa & PG_PS) | |
2106 | panic("pmap_enter_quick: unexpected mapping into 4MB page"); | |
2107 | if (pmap->pm_ptphint && | |
2108 | (pmap->pm_ptphint->pindex == ptepindex)) { | |
2109 | mpte = pmap->pm_ptphint; | |
2110 | } else { | |
2111 | mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); | |
2112 | pmap->pm_ptphint = mpte; | |
2113 | } | |
2114 | if (mpte == NULL) | |
2115 | goto retry; | |
2116 | mpte->hold_count++; | |
2117 | } else { | |
2118 | mpte = _pmap_allocpte(pmap, ptepindex); | |
2119 | } | |
2120 | } | |
2121 | } else { | |
2122 | mpte = NULL; | |
2123 | } | |
2124 | ||
2125 | /* | |
2126 | * This call to vtopte makes the assumption that we are | |
2127 | * entering the page into the current pmap. In order to support | |
2128 | * quick entry into any pmap, one would likely use pmap_pte_quick. | |
2129 | * But that isn't as quick as vtopte. | |
2130 | */ | |
2131 | pte = (unsigned *)vtopte(va); | |
2132 | if (*pte) { | |
2133 | if (mpte) | |
0f7a3396 | 2134 | pmap_unwire_pte_hold(pmap, mpte, &info); |
984263bc MD |
2135 | return 0; |
2136 | } | |
2137 | ||
2138 | /* | |
2139 | * Enter on the PV list if part of our managed memory. Note that we | |
2140 | * raise IPL while manipulating pv_table since pmap_enter can be | |
2141 | * called at interrupt time. | |
2142 | */ | |
2143 | if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) | |
2144 | pmap_insert_entry(pmap, va, mpte, m); | |
2145 | ||
2146 | /* | |
2147 | * Increment counters | |
2148 | */ | |
2149 | pmap->pm_stats.resident_count++; | |
2150 | ||
2151 | pa = VM_PAGE_TO_PHYS(m); | |
2152 | ||
2153 | /* | |
2154 | * Now validate mapping with RO protection | |
2155 | */ | |
2156 | if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) | |
2157 | *pte = pa | PG_V | PG_U; | |
2158 | else | |
2159 | *pte = pa | PG_V | PG_U | PG_MANAGED; | |
2160 | ||
2161 | return mpte; | |
2162 | } | |
2163 | ||
2164 | /* | |
2165 | * Make a temporary mapping for a physical address. This is only intended | |
2166 | * to be used for panic dumps. | |
2167 | */ | |
2168 | void * | |
6ef943a3 | 2169 | pmap_kenter_temporary(vm_paddr_t pa, int i) |
984263bc MD |
2170 | { |
2171 | pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); | |
2172 | return ((void *)crashdumpmap); | |
2173 | } | |
2174 | ||
2175 | #define MAX_INIT_PT (96) | |
06ecca5a | 2176 | |
984263bc | 2177 | /* |
06ecca5a MD |
2178 | * This routine preloads the ptes for a given object into the specified pmap. |
2179 | * This eliminates the blast of soft faults on process startup and | |
2180 | * immediately after an mmap. | |
984263bc MD |
2181 | */ |
2182 | void | |
083a7402 MD |
2183 | pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, |
2184 | vm_object_t object, vm_pindex_t pindex, | |
2185 | vm_size_t size, int limit) | |
984263bc MD |
2186 | { |
2187 | vm_offset_t tmpidx; | |
2188 | int psize; | |
2189 | vm_page_t p, mpte; | |
2190 | int objpgs; | |
2191 | ||
083a7402 | 2192 | if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) |
984263bc MD |
2193 | return; |
2194 | ||
06ecca5a MD |
2195 | #if 0 |
2196 | /* | |
2197 | * XXX you must be joking, entering PTE's into a user page table | |
2198 | * without any accounting? This could result in the page table | |
2199 | * being freed while it still contains mappings (free with PG_ZERO | |
2200 | * assumption leading to a non-zero page being marked PG_ZERO). | |
2201 | */ | |
984263bc MD |
2202 | /* |
2203 | * This code maps large physical mmap regions into the | |
2204 | * processor address space. Note that some shortcuts | |
2205 | * are taken, but the code works. | |
2206 | */ | |
2207 | if (pseflag && | |
06ecca5a MD |
2208 | (object->type == OBJT_DEVICE) && |
2209 | ((addr & (NBPDR - 1)) == 0) && | |
2210 | ((size & (NBPDR - 1)) == 0) ) { | |
984263bc MD |
2211 | int i; |
2212 | vm_page_t m[1]; | |
2213 | unsigned int ptepindex; | |
2214 | int npdes; | |
2215 | vm_offset_t ptepa; | |
2216 | ||
2217 | if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) | |
2218 | return; | |
2219 | ||
2220 | retry: | |
2221 | p = vm_page_lookup(object, pindex); | |
2222 | if (p && vm_page_sleep_busy(p, FALSE, "init4p")) | |
2223 | goto retry; | |
2224 | ||
2225 | if (p == NULL) { | |
2226 | p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); | |
2227 | if (p == NULL) | |
2228 | return; | |
2229 | m[0] = p; | |
2230 | ||
2231 | if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { | |
2232 | vm_page_free(p); | |
2233 | return; | |
2234 | } | |
2235 | ||
2236 | p = vm_page_lookup(object, pindex); | |
2237 | vm_page_wakeup(p); | |
2238 | } | |
2239 | ||
2240 | ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p); | |
2241 | if (ptepa & (NBPDR - 1)) { | |
2242 | return; | |
2243 | } | |
2244 | ||
2245 | p->valid = VM_PAGE_BITS_ALL; | |
2246 | ||
2247 | pmap->pm_stats.resident_count += size >> PAGE_SHIFT; | |
2248 | npdes = size >> PDRSHIFT; | |
06ecca5a | 2249 | for (i = 0; i < npdes; i++) { |
984263bc | 2250 | pmap->pm_pdir[ptepindex] = |
06ecca5a | 2251 | (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS); |
984263bc MD |
2252 | ptepa += NBPDR; |
2253 | ptepindex += 1; | |
2254 | } | |
2255 | vm_page_flag_set(p, PG_MAPPED); | |
0f7a3396 MD |
2256 | cpu_invltlb(); |
2257 | smp_invltlb(); | |
984263bc MD |
2258 | return; |
2259 | } | |
06ecca5a | 2260 | #endif |
984263bc MD |
2261 | |
2262 | psize = i386_btop(size); | |
2263 | ||
2264 | if ((object->type != OBJT_VNODE) || | |
2265 | ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && | |
2266 | (object->resident_page_count > MAX_INIT_PT))) { | |
2267 | return; | |
2268 | } | |
2269 | ||
2270 | if (psize + pindex > object->size) { | |
2271 | if (object->size < pindex) | |
2272 | return; | |
2273 | psize = object->size - pindex; | |
2274 | } | |
2275 | ||
06ecca5a | 2276 | |
984263bc | 2277 | /* |
06ecca5a | 2278 | * If we are processing a major portion of the object, then scan the |
984263bc | 2279 | * entire thing. |
06ecca5a | 2280 | * |
9acd5bbb MD |
2281 | * We cannot safely scan the object's memq unless we are in a |
2282 | * critical section since interrupts can remove pages from objects. | |
984263bc | 2283 | */ |
654a39f0 | 2284 | crit_enter(); |
06ecca5a | 2285 | mpte = NULL; |
984263bc MD |
2286 | if (psize > (object->resident_page_count >> 2)) { |
2287 | objpgs = psize; | |
2288 | ||
2289 | for (p = TAILQ_FIRST(&object->memq); | |
06ecca5a MD |
2290 | objpgs > 0 && p != NULL; |
2291 | p = TAILQ_NEXT(p, listq) | |
2292 | ) { | |
984263bc | 2293 | tmpidx = p->pindex; |
06ecca5a | 2294 | if (tmpidx < pindex) |
984263bc | 2295 | continue; |
984263bc | 2296 | tmpidx -= pindex; |
06ecca5a | 2297 | if (tmpidx >= psize) |
984263bc | 2298 | continue; |
06ecca5a | 2299 | |
984263bc MD |
2300 | /* |
2301 | * don't allow an madvise to blow away our really | |
2302 | * free pages allocating pv entries. | |
2303 | */ | |
2304 | if ((limit & MAP_PREFAULT_MADVISE) && | |
12e4aaff | 2305 | vmstats.v_free_count < vmstats.v_free_reserved) { |
984263bc MD |
2306 | break; |
2307 | } | |
2308 | if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && | |
2309 | (p->busy == 0) && | |
2310 | (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { | |
2311 | if ((p->queue - p->pc) == PQ_CACHE) | |
2312 | vm_page_deactivate(p); | |
2313 | vm_page_busy(p); | |
2314 | mpte = pmap_enter_quick(pmap, | |
2315 | addr + i386_ptob(tmpidx), p, mpte); | |
2316 | vm_page_flag_set(p, PG_MAPPED); | |
2317 | vm_page_wakeup(p); | |
2318 | } | |
2319 | objpgs -= 1; | |
2320 | } | |
2321 | } else { | |
2322 | /* | |
2323 | * else lookup the pages one-by-one. | |
2324 | */ | |
2325 | for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { | |
2326 | /* | |
2327 | * don't allow an madvise to blow away our really | |
2328 | * free pages allocating pv entries. | |
2329 | */ | |
2330 | if ((limit & MAP_PREFAULT_MADVISE) && | |
12e4aaff | 2331 | vmstats.v_free_count < vmstats.v_free_reserved) { |
984263bc MD |
2332 | break; |
2333 | } | |
2334 | p = vm_page_lookup(object, tmpidx + pindex); | |
2335 | if (p && | |
2336 | ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && | |
2337 | (p->busy == 0) && | |
2338 | (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { | |
2339 | if ((p->queue - p->pc) == PQ_CACHE) | |
2340 | vm_page_deactivate(p); | |
2341 | vm_page_busy(p); | |
2342 | mpte = pmap_enter_quick(pmap, | |
2343 | addr + i386_ptob(tmpidx), p, mpte); | |
2344 | vm_page_flag_set(p, PG_MAPPED); | |
2345 | vm_page_wakeup(p); | |
2346 | } | |
2347 | } | |
2348 | } | |
654a39f0 | 2349 | crit_exit(); |
984263bc MD |
2350 | } |
2351 | ||
2352 | /* | |
06ecca5a MD |
2353 | * pmap_prefault provides a quick way of clustering pagefaults into a |
2354 | * processes address space. It is a "cousin" of pmap_object_init_pt, | |
2355 | * except it runs at page fault time instead of mmap time. | |
984263bc MD |
2356 | */ |
2357 | #define PFBAK 4 | |
2358 | #define PFFOR 4 | |
2359 | #define PAGEORDER_SIZE (PFBAK+PFFOR) | |
2360 | ||
2361 | static int pmap_prefault_pageorder[] = { | |
2362 | -PAGE_SIZE, PAGE_SIZE, | |
2363 | -2 * PAGE_SIZE, 2 * PAGE_SIZE, | |
6302a396 | 2364 | -3 * PAGE_SIZE, 3 * PAGE_SIZE, |
984263bc MD |
2365 | -4 * PAGE_SIZE, 4 * PAGE_SIZE |
2366 | }; | |
2367 | ||
2368 | void | |
840de426 | 2369 | pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) |
984263bc MD |
2370 | { |
2371 | int i; | |
2372 | vm_offset_t starta; | |
2373 | vm_offset_t addr; | |
2374 | vm_pindex_t pindex; | |
2375 | vm_page_t m, mpte; | |
2376 | vm_object_t object; | |
2377 | ||
2378 | if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) | |
2379 | return; | |
2380 | ||
2381 | object = entry->object.vm_object; | |
2382 | ||
2383 | starta = addra - PFBAK * PAGE_SIZE; | |
06ecca5a | 2384 | if (starta < entry->start) |
984263bc | 2385 | starta = entry->start; |
06ecca5a | 2386 | else if (starta > addra) |
984263bc | 2387 | starta = 0; |
984263bc | 2388 | |
06ecca5a | 2389 | /* |
9acd5bbb MD |
2390 | * critical section protection is required to maintain the |
2391 | * page/object association, interrupts can free pages and remove | |
2392 | * them from their objects. | |
06ecca5a | 2393 | */ |
984263bc | 2394 | mpte = NULL; |
654a39f0 | 2395 | crit_enter(); |
984263bc MD |
2396 | for (i = 0; i < PAGEORDER_SIZE; i++) { |
2397 | vm_object_t lobject; | |
2398 | unsigned *pte; | |
2399 | ||
2400 | addr = addra + pmap_prefault_pageorder[i]; | |
2401 | if (addr > addra + (PFFOR * PAGE_SIZE)) | |
2402 | addr = 0; | |
2403 | ||
2404 | if (addr < starta || addr >= entry->end) | |
2405 | continue; | |
2406 | ||
2407 | if ((*pmap_pde(pmap, addr)) == NULL) | |
2408 | continue; | |
2409 | ||
2410 | pte = (unsigned *) vtopte(addr); | |
2411 | if (*pte) | |
2412 | continue; | |
2413 | ||
2414 | pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; | |
2415 | lobject = object; | |
06ecca5a | 2416 | |
984263bc | 2417 | for (m = vm_page_lookup(lobject, pindex); |
06ecca5a MD |
2418 | (!m && (lobject->type == OBJT_DEFAULT) && |
2419 | (lobject->backing_object)); | |
2420 | lobject = lobject->backing_object | |
2421 | ) { | |
984263bc MD |
2422 | if (lobject->backing_object_offset & PAGE_MASK) |
2423 | break; | |
2424 | pindex += (lobject->backing_object_offset >> PAGE_SHIFT); | |
2425 | m = vm_page_lookup(lobject->backing_object, pindex); | |
2426 | } | |
2427 | ||
2428 | /* | |
2429 | * give-up when a page is not in memory | |
2430 | */ | |
2431 | if (m == NULL) | |
2432 | break; | |
2433 | ||
2434 | if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && | |
2435 | (m->busy == 0) && | |
2436 | (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { | |
2437 | ||
2438 | if ((m->queue - m->pc) == PQ_CACHE) { | |
2439 | vm_page_deactivate(m); | |
2440 | } | |
2441 | vm_page_busy(m); | |
2442 | mpte = pmap_enter_quick(pmap, addr, m, mpte); | |
2443 | vm_page_flag_set(m, PG_MAPPED); | |
2444 | vm_page_wakeup(m); | |
2445 | } | |
2446 | } | |
654a39f0 | 2447 | crit_exit(); |
984263bc MD |
2448 | } |
2449 | ||
2450 | /* | |
2451 | * Routine: pmap_change_wiring | |
2452 | * Function: Change the wiring attribute for a map/virtual-address | |
2453 | * pair. | |
2454 | * In/out conditions: | |
2455 | * The mapping must already exist in the pmap. | |
2456 | */ | |
2457 | void | |
840de426 | 2458 | pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) |
984263bc | 2459 | { |
840de426 | 2460 | unsigned *pte; |
984263bc MD |
2461 | |
2462 | if (pmap == NULL) | |
2463 | return; | |
2464 | ||
2465 | pte = pmap_pte(pmap, va); | |
2466 | ||
2467 | if (wired && !pmap_pte_w(pte)) | |
2468 | pmap->pm_stats.wired_count++; | |
2469 | else if (!wired && pmap_pte_w(pte)) | |
2470 | pmap->pm_stats.wired_count--; | |
2471 | ||
2472 | /* | |
2473 | * Wiring is not a hardware characteristic so there is no need to | |
0f7a3396 MD |
2474 | * invalidate TLB. However, in an SMP environment we must use |
2475 | * a locked bus cycle to update the pte (if we are not using | |
2476 | * the pmap_inval_*() API that is)... it's ok to do this for simple | |
2477 | * wiring changes. | |
984263bc | 2478 | */ |
0f7a3396 MD |
2479 | #ifdef SMP |
2480 | if (wired) | |
2481 | atomic_set_int(pte, PG_W); | |
2482 | else | |
2483 | atomic_clear_int(pte, PG_W); | |
2484 | #else | |
2485 | if (wired) | |
2486 | atomic_set_int_nonlocked(pte, PG_W); | |
2487 | else | |
2488 | atomic_clear_int_nonlocked(pte, PG_W); | |
2489 | #endif | |
984263bc MD |
2490 | } |
2491 | ||
2492 | ||
2493 | ||
2494 | /* | |
2495 | * Copy the range specified by src_addr/len | |
2496 | * from the source map to the range dst_addr/len | |
2497 | * in the destination map. | |
2498 | * | |
2499 | * This routine is only advisory and need not do anything. | |
2500 | */ | |
984263bc | 2501 | void |
840de426 MD |
2502 | pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, |
2503 | vm_size_t len, vm_offset_t src_addr) | |
984263bc | 2504 | { |
0f7a3396 | 2505 | pmap_inval_info info; |
984263bc MD |
2506 | vm_offset_t addr; |
2507 | vm_offset_t end_addr = src_addr + len; | |
2508 | vm_offset_t pdnxt; | |
2509 | unsigned src_frame, dst_frame; | |
2510 | vm_page_t m; | |
2511 | ||
2512 | if (dst_addr != src_addr) | |
2513 | return; | |
2514 | ||
2515 | src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; | |
2516 | if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) { | |
2517 | return; | |
2518 | } | |
2519 | ||
2520 | dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; | |
2521 | if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { | |
2522 | APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); | |
984263bc MD |
2523 | /* The page directory is not shared between CPUs */ |
2524 | cpu_invltlb(); | |
984263bc | 2525 | } |
0f7a3396 MD |
2526 | pmap_inval_init(&info); |
2527 | pmap_inval_add(&info, dst_pmap, -1); | |
2528 | pmap_inval_add(&info, src_pmap, -1); | |
984263bc | 2529 | |
06ecca5a | 2530 | /* |
654a39f0 | 2531 | * critical section protection is required to maintain the page/object |
06ecca5a MD |
2532 | * association, interrupts can free pages and remove them from |
2533 | * their objects. | |
2534 | */ | |
654a39f0 | 2535 | crit_enter(); |
06ecca5a | 2536 | for (addr = src_addr; addr < end_addr; addr = pdnxt) { |
984263bc MD |
2537 | unsigned *src_pte, *dst_pte; |
2538 | vm_page_t dstmpte, srcmpte; | |
2539 | vm_offset_t srcptepaddr; | |
2540 | unsigned ptepindex; | |
2541 | ||
2542 | if (addr >= UPT_MIN_ADDRESS) | |
2543 | panic("pmap_copy: invalid to pmap_copy page tables\n"); | |
2544 | ||
2545 | /* | |
2546 | * Don't let optional prefaulting of pages make us go | |
2547 | * way below the low water mark of free pages or way | |
2548 | * above high water mark of used pv entries. | |
2549 | */ | |
12e4aaff | 2550 | if (vmstats.v_free_count < vmstats.v_free_reserved || |
984263bc MD |
2551 | pv_entry_count > pv_entry_high_water) |
2552 | break; | |
2553 | ||
2554 | pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); | |
2555 | ptepindex = addr >> PDRSHIFT; | |
2556 | ||
2557 | srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex]; | |
2558 | if (srcptepaddr == 0) | |
2559 | continue; | |
2560 | ||
2561 | if (srcptepaddr & PG_PS) { | |
2562 | if (dst_pmap->pm_pdir[ptepindex] == 0) { | |
2563 | dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr; | |
2564 | dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; | |
2565 | } | |
2566 | continue; | |
2567 | } | |
2568 | ||
2569 | srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); | |
2570 | if ((srcmpte == NULL) || | |
2571 | (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) | |
2572 | continue; | |
2573 | ||
2574 | if (pdnxt > end_addr) | |
2575 | pdnxt = end_addr; | |
2576 | ||
2577 | src_pte = (unsigned *) vtopte(addr); | |
2578 | dst_pte = (unsigned *) avtopte(addr); | |
2579 | while (addr < pdnxt) { | |
2580 | unsigned ptetemp; | |
2581 | ptetemp = *src_pte; | |
2582 | /* | |
2583 | * we only virtual copy managed pages | |
2584 | */ | |
2585 | if ((ptetemp & PG_MANAGED) != 0) { | |
2586 | /* | |
2587 | * We have to check after allocpte for the | |
2588 | * pte still being around... allocpte can | |
2589 | * block. | |
2590 | */ | |
2591 | dstmpte = pmap_allocpte(dst_pmap, addr); | |
2592 | if ((*dst_pte == 0) && (ptetemp = *src_pte)) { | |
2593 | /* | |
2594 | * Clear the modified and | |
2595 | * accessed (referenced) bits | |
2596 | * during the copy. | |
2597 | */ | |
2598 | m = PHYS_TO_VM_PAGE(ptetemp); | |
2599 | *dst_pte = ptetemp & ~(PG_M | PG_A); | |
2600 | dst_pmap->pm_stats.resident_count++; | |
2601 | pmap_insert_entry(dst_pmap, addr, | |
2602 | dstmpte, m); | |
2603 | } else { | |
0f7a3396 | 2604 | pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); |
984263bc MD |
2605 | } |
2606 | if (dstmpte->hold_count >= srcmpte->hold_count) | |
2607 | break; | |
2608 | } | |
2609 | addr += PAGE_SIZE; | |
2610 | src_pte++; | |
2611 | dst_pte++; | |
2612 | } | |
2613 | } | |
654a39f0 | 2614 | crit_exit(); |
0f7a3396 | 2615 | pmap_inval_flush(&info); |
984263bc MD |
2616 | } |
2617 | ||
2618 | /* | |
2619 | * Routine: pmap_kernel | |
2620 | * Function: | |
2621 | * Returns the physical map handle for the kernel. | |
2622 | */ | |
2623 | pmap_t | |
840de426 | 2624 | pmap_kernel(void) |
984263bc MD |
2625 | { |
2626 | return (kernel_pmap); | |
2627 | } | |
2628 | ||
2629 | /* | |
e0e69b7d MD |
2630 | * pmap_zero_page: |
2631 | * | |
2632 | * Zero the specified PA by mapping the page into KVM and clearing its | |
2633 | * contents. | |
2634 | * | |
2635 | * This function may be called from an interrupt and no locking is | |
2636 | * required. | |
984263bc MD |
2637 | */ |
2638 | void | |
6ef943a3 | 2639 | pmap_zero_page(vm_paddr_t phys) |
984263bc | 2640 | { |
85100692 | 2641 | struct mdglobaldata *gd = mdcpu; |
17a9f566 | 2642 | |
e0e69b7d | 2643 | crit_enter(); |
85100692 MD |
2644 | if (*(int *)gd->gd_CMAP3) |
2645 | panic("pmap_zero_page: CMAP3 busy"); | |
85100692 | 2646 | *(int *)gd->gd_CMAP3 = |
17a9f566 | 2647 | PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; |
85100692 | 2648 | cpu_invlpg(gd->gd_CADDR3); |
984263bc MD |
2649 | |
2650 | #if defined(I686_CPU) | |
2651 | if (cpu_class == CPUCLASS_686) | |
85100692 | 2652 | i686_pagezero(gd->gd_CADDR3); |
984263bc MD |
2653 | else |
2654 | #endif | |
85100692 | 2655 | bzero(gd->gd_CADDR3, PAGE_SIZE); |
85100692 | 2656 | *(int *) gd->gd_CMAP3 = 0; |
e0e69b7d | 2657 | crit_exit(); |
8100156a MD |
2658 | } |
2659 | ||
2660 | /* | |
2661 | * pmap_page_assertzero: | |
2662 | * | |
2663 | * Assert that a page is empty, panic if it isn't. | |
2664 | */ | |
2665 | void | |
2666 | pmap_page_assertzero(vm_paddr_t phys) | |
2667 | { | |
2668 | struct mdglobaldata *gd = mdcpu; | |
2669 | int i; | |
2670 | ||
2671 | crit_enter(); | |
2672 | if (*(int *)gd->gd_CMAP3) | |
2673 | panic("pmap_zero_page: CMAP3 busy"); | |
2674 | *(int *)gd->gd_CMAP3 = | |
2675 | PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; | |
2676 | cpu_invlpg(gd->gd_CADDR3); | |
2677 | for (i = 0; i < PAGE_SIZE; i += 4) { | |
2678 | if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) { | |
2679 | panic("pmap_page_assertzero() @ %p not zero!\n", | |
2680 | (void *)gd->gd_CADDR3); | |
2681 | } | |
2682 | } | |
2683 | *(int *) gd->gd_CMAP3 = 0; | |
2684 | crit_exit(); | |
984263bc MD |
2685 | } |
2686 | ||
2687 | /* | |
e0e69b7d MD |
2688 | * pmap_zero_page: |
2689 | * | |
2690 | * Zero part of a physical page by mapping it into memory and clearing | |
2691 | * its contents with bzero. | |
984263bc MD |
2692 | * |
2693 | * off and size may not cover an area beyond a single hardware page. | |
2694 | */ | |
2695 | void | |
6ef943a3 | 2696 | pmap_zero_page_area(vm_paddr_t phys, int off, int size) |
984263bc | 2697 | { |
85100692 | 2698 | struct mdglobaldata *gd = mdcpu; |
17a9f566 | 2699 | |
e0e69b7d | 2700 | crit_enter(); |
85100692 MD |
2701 | if (*(int *) gd->gd_CMAP3) |
2702 | panic("pmap_zero_page: CMAP3 busy"); | |
85100692 MD |
2703 | *(int *) gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; |
2704 | cpu_invlpg(gd->gd_CADDR3); | |
984263bc MD |
2705 | |
2706 | #if defined(I686_CPU) | |
2707 | if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) | |
85100692 | 2708 | i686_pagezero(gd->gd_CADDR3); |
984263bc MD |
2709 | else |
2710 | #endif | |
85100692 | 2711 | bzero((char *)gd->gd_CADDR3 + off, size); |
85100692 | 2712 | *(int *) gd->gd_CMAP3 = 0; |
e0e69b7d | 2713 | crit_exit(); |
984263bc MD |
2714 | } |
2715 | ||
2716 | /* | |
e0e69b7d MD |
2717 | * pmap_copy_page: |
2718 | * | |
2719 | * Copy the physical page from the source PA to the target PA. | |
2720 | * This function may be called from an interrupt. No locking | |
2721 | * is required. | |
984263bc MD |
2722 | */ |
2723 | void | |
6ef943a3 | 2724 | pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) |
984263bc | 2725 | { |
85100692 | 2726 | struct mdglobaldata *gd = mdcpu; |
17a9f566 | 2727 | |
e0e69b7d | 2728 | crit_enter(); |
85100692 MD |
2729 | if (*(int *) gd->gd_CMAP1) |
2730 | panic("pmap_copy_page: CMAP1 busy"); | |
2731 | if (*(int *) gd->gd_CMAP2) | |
2732 | panic("pmap_copy_page: CMAP2 busy"); | |
984263bc | 2733 | |
85100692 MD |
2734 | *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; |
2735 | *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; | |
984263bc | 2736 | |
85100692 MD |
2737 | cpu_invlpg(gd->gd_CADDR1); |
2738 | cpu_invlpg(gd->gd_CADDR2); | |
984263bc | 2739 | |
85100692 | 2740 | bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE); |
984263bc | 2741 | |
85100692 MD |
2742 | *(int *) gd->gd_CMAP1 = 0; |
2743 | *(int *) gd->gd_CMAP2 = 0; | |
e0e69b7d | 2744 | crit_exit(); |
984263bc MD |
2745 | } |
2746 | ||
f6bf3af1 MD |
2747 | /* |
2748 | * pmap_copy_page_frag: | |
2749 | * | |
2750 | * Copy the physical page from the source PA to the target PA. | |
2751 | * This function may be called from an interrupt. No locking | |
2752 | * is required. | |
2753 | */ | |
2754 | void | |
2755 | pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) | |
2756 | { | |
2757 | struct mdglobaldata *gd = mdcpu; | |
2758 | ||
2759 | crit_enter(); | |
2760 | if (*(int *) gd->gd_CMAP1) | |
2761 | panic("pmap_copy_page: CMAP1 busy"); | |
2762 | if (*(int *) gd->gd_CMAP2) | |
2763 | panic("pmap_copy_page: CMAP2 busy"); | |
2764 | ||
2765 | *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; | |
2766 | *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; | |
2767 | ||
2768 | cpu_invlpg(gd->gd_CADDR1); | |
2769 | cpu_invlpg(gd->gd_CADDR2); | |
2770 | ||
2771 | bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), | |
2772 | (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), | |
2773 | bytes); | |
2774 | ||
2775 | *(int *) gd->gd_CMAP1 = 0; | |
2776 | *(int *) gd->gd_CMAP2 = 0; | |
2777 | crit_exit(); | |
2778 | } | |
2779 | ||
984263bc MD |
2780 | /* |
2781 | * Returns true if the pmap's pv is one of the first | |
2782 | * 16 pvs linked to from this page. This count may | |
2783 | * be changed upwards or downwards in the future; it | |
2784 | * is only necessary that true be returned for a small | |
2785 | * subset of pmaps for proper page aging. | |
2786 | */ | |
2787 | boolean_t | |
840de426 | 2788 | pmap_page_exists_quick(pmap_t pmap, vm_page_t m) |
984263bc MD |
2789 | { |
2790 | pv_entry_t pv; | |
2791 | int loops = 0; | |
984263bc MD |
2792 | |
2793 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
2794 | return FALSE; | |
2795 | ||
9acd5bbb | 2796 | crit_enter(); |
984263bc MD |
2797 | |
2798 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
2799 | if (pv->pv_pmap == pmap) { | |
9acd5bbb | 2800 | crit_exit(); |
984263bc MD |
2801 | return TRUE; |
2802 | } | |
2803 | loops++; | |
2804 | if (loops >= 16) | |
2805 | break; | |
2806 | } | |
9acd5bbb | 2807 | crit_exit(); |
984263bc MD |
2808 | return (FALSE); |
2809 | } | |
2810 | ||
2811 | #define PMAP_REMOVE_PAGES_CURPROC_ONLY | |
2812 | /* | |
2813 | * Remove all pages from specified address space | |
2814 | * this aids process exit speeds. Also, this code | |
2815 | * is special cased for current process only, but | |
2816 | * can have the more generic (and slightly slower) | |
2817 | * mode enabled. This is much faster than pmap_remove | |
2818 | * in the case of running down an entire address space. | |
2819 | */ | |
2820 | void | |
840de426 | 2821 | pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) |
984263bc MD |
2822 | { |
2823 | unsigned *pte, tpte; | |
2824 | pv_entry_t pv, npv; | |
984263bc | 2825 | vm_page_t m; |
0f7a3396 | 2826 | pmap_inval_info info; |
984263bc MD |
2827 | |
2828 | #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY | |
2829 | if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) { | |
2830 | printf("warning: pmap_remove_pages called with non-current pmap\n"); | |
2831 | return; | |
2832 | } | |
2833 | #endif | |
2834 | ||
0f7a3396 | 2835 | pmap_inval_init(&info); |
9acd5bbb | 2836 | crit_enter(); |
984263bc MD |
2837 | for(pv = TAILQ_FIRST(&pmap->pm_pvlist); |
2838 | pv; | |
2839 | pv = npv) { | |
2840 | ||
2841 | if (pv->pv_va >= eva || pv->pv_va < sva) { | |
2842 | npv = TAILQ_NEXT(pv, pv_plist); | |
2843 | continue; | |
2844 | } | |
2845 | ||
2846 | #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY | |
2847 | pte = (unsigned *)vtopte(pv->pv_va); | |
2848 | #else | |
2849 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); | |
2850 | #endif | |
0f7a3396 | 2851 | pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); |
984263bc MD |
2852 | tpte = *pte; |
2853 | ||
2854 | /* | |
2855 | * We cannot remove wired pages from a process' mapping at this time | |
2856 | */ | |
2857 | if (tpte & PG_W) { | |
2858 | npv = TAILQ_NEXT(pv, pv_plist); | |
2859 | continue; | |
2860 | } | |
2861 | *pte = 0; | |
2862 | ||
2863 | m = PHYS_TO_VM_PAGE(tpte); | |
2864 | ||
2865 | KASSERT(m < &vm_page_array[vm_page_array_size], | |
2866 | ("pmap_remove_pages: bad tpte %x", tpte)); | |
2867 | ||
2868 | pv->pv_pmap->pm_stats.resident_count--; | |
2869 | ||
2870 | /* | |
2871 | * Update the vm_page_t clean and reference bits. | |
2872 | */ | |
2873 | if (tpte & PG_M) { | |
2874 | vm_page_dirty(m); | |
2875 | } | |
2876 | ||
2877 | ||
2878 | npv = TAILQ_NEXT(pv, pv_plist); | |
2879 | TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); | |
2880 | ||
2881 | m->md.pv_list_count--; | |
2882 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); | |
2883 | if (TAILQ_FIRST(&m->md.pv_list) == NULL) { | |
2884 | vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); | |
2885 | } | |
2886 | ||
0f7a3396 | 2887 | pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); |
984263bc MD |
2888 | free_pv_entry(pv); |
2889 | } | |
0f7a3396 | 2890 | pmap_inval_flush(&info); |
9acd5bbb | 2891 | crit_exit(); |
984263bc MD |
2892 | } |
2893 | ||
2894 | /* | |
2895 | * pmap_testbit tests bits in pte's | |
2896 | * note that the testbit/changebit routines are inline, | |
2897 | * and a lot of things compile-time evaluate. | |
2898 | */ | |
2899 | static boolean_t | |
840de426 | 2900 | pmap_testbit(vm_page_t m, int bit) |
984263bc MD |
2901 | { |
2902 | pv_entry_t pv; | |
2903 | unsigned *pte; | |
984263bc MD |
2904 | |
2905 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
2906 | return FALSE; | |
2907 | ||
2908 | if (TAILQ_FIRST(&m->md.pv_list) == NULL) | |
2909 | return FALSE; | |
2910 | ||
9acd5bbb | 2911 | crit_enter(); |
984263bc MD |
2912 | |
2913 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
2914 | /* | |
2915 | * if the bit being tested is the modified bit, then | |
2916 | * mark clean_map and ptes as never | |
2917 | * modified. | |
2918 | */ | |
2919 | if (bit & (PG_A|PG_M)) { | |
2920 | if (!pmap_track_modified(pv->pv_va)) | |
2921 | continue; | |
2922 | } | |
2923 | ||
2924 | #if defined(PMAP_DIAGNOSTIC) | |
2925 | if (!pv->pv_pmap) { | |
2926 | printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); | |
2927 | continue; | |
2928 | } | |
2929 | #endif | |
2930 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); | |
2931 | if (*pte & bit) { | |
9acd5bbb | 2932 | crit_exit(); |
984263bc MD |
2933 | return TRUE; |
2934 | } | |
2935 | } | |
9acd5bbb | 2936 | crit_exit(); |
984263bc MD |
2937 | return (FALSE); |
2938 | } | |
2939 | ||
2940 | /* | |
2941 | * this routine is used to modify bits in ptes | |
2942 | */ | |
2943 | static __inline void | |
840de426 | 2944 | pmap_changebit(vm_page_t m, int bit, boolean_t setem) |
984263bc | 2945 | { |
0f7a3396 | 2946 | struct pmap_inval_info info; |
840de426 MD |
2947 | pv_entry_t pv; |
2948 | unsigned *pte; | |
984263bc MD |
2949 | |
2950 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
2951 | return; | |
2952 | ||
0f7a3396 | 2953 | pmap_inval_init(&info); |
9acd5bbb | 2954 | crit_enter(); |
984263bc MD |
2955 | |
2956 | /* | |
2957 | * Loop over all current mappings setting/clearing as appropos If | |
2958 | * setting RO do we need to clear the VAC? | |
2959 | */ | |
2960 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
2961 | /* | |
2962 | * don't write protect pager mappings | |
2963 | */ | |
2964 | if (!setem && (bit == PG_RW)) { | |
2965 | if (!pmap_track_modified(pv->pv_va)) | |
2966 | continue; | |
2967 | } | |
2968 | ||
2969 | #if defined(PMAP_DIAGNOSTIC) | |
2970 | if (!pv->pv_pmap) { | |
2971 | printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); | |
2972 | continue; | |
2973 | } | |
2974 | #endif | |
2975 | ||
0f7a3396 MD |
2976 | /* |
2977 | * Careful here. We can use a locked bus instruction to | |
2978 | * clear PG_A or PG_M safely but we need to synchronize | |
2979 | * with the target cpus when we mess with PG_RW. | |
2980 | */ | |
984263bc | 2981 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); |
0f7a3396 MD |
2982 | if (bit == PG_RW) |
2983 | pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); | |
984263bc MD |
2984 | |
2985 | if (setem) { | |
0f7a3396 MD |
2986 | #ifdef SMP |
2987 | atomic_set_int(pte, bit); | |
2988 | #else | |
2989 | atomic_set_int_nonlocked(pte, bit); | |
2990 | #endif | |
984263bc MD |
2991 | } else { |
2992 | vm_offset_t pbits = *(vm_offset_t *)pte; | |
2993 | if (pbits & bit) { | |
2994 | if (bit == PG_RW) { | |
2995 | if (pbits & PG_M) { | |
2996 | vm_page_dirty(m); | |
2997 | } | |
0f7a3396 MD |
2998 | #ifdef SMP |
2999 | atomic_clear_int(pte, PG_M|PG_RW); | |
3000 | #else | |
3001 | atomic_clear_int_nonlocked(pte, PG_M|PG_RW); | |
3002 | #endif | |
984263bc | 3003 | } else { |
0f7a3396 MD |
3004 | #ifdef SMP |
3005 | atomic_clear_int(pte, bit); | |
3006 | #else | |
3007 | atomic_clear_int_nonlocked(pte, bit); | |
3008 | #endif | |
984263bc | 3009 | } |
984263bc MD |
3010 | } |
3011 | } | |
3012 | } | |
0f7a3396 | 3013 | pmap_inval_flush(&info); |
9acd5bbb | 3014 | crit_exit(); |
984263bc MD |
3015 | } |
3016 | ||
3017 | /* | |
3018 | * pmap_page_protect: | |
3019 | * | |
3020 | * Lower the permission for all mappings to a given page. | |
3021 | */ | |
3022 | void | |
3023 | pmap_page_protect(vm_page_t m, vm_prot_t prot) | |
3024 | { | |
3025 | if ((prot & VM_PROT_WRITE) == 0) { | |
3026 | if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { | |
3027 | pmap_changebit(m, PG_RW, FALSE); | |
3028 | } else { | |
3029 | pmap_remove_all(m); | |
3030 | } | |
3031 | } | |
3032 | } | |
3033 | ||
6ef943a3 | 3034 | vm_paddr_t |
840de426 | 3035 | pmap_phys_address(int ppn) |
984263bc MD |
3036 | { |
3037 | return (i386_ptob(ppn)); | |
3038 | } | |
3039 | ||
3040 | /* | |
3041 | * pmap_ts_referenced: | |
3042 | * | |
3043 | * Return a count of reference bits for a page, clearing those bits. | |
3044 | * It is not necessary for every reference bit to be cleared, but it | |
3045 | * is necessary that 0 only be returned when there are truly no | |
3046 | * reference bits set. | |
3047 | * | |
3048 | * XXX: The exact number of bits to check and clear is a matter that | |
3049 | * should be tested and standardized at some point in the future for | |
3050 | * optimal aging of shared pages. | |
3051 | */ | |
3052 | int | |
3053 | pmap_ts_referenced(vm_page_t m) | |
3054 | { | |
840de426 | 3055 | pv_entry_t pv, pvf, pvn; |
984263bc | 3056 | unsigned *pte; |
984263bc MD |
3057 | int rtval = 0; |
3058 | ||
3059 | if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) | |
3060 | return (rtval); | |
3061 | ||
9acd5bbb | 3062 | crit_enter(); |
984263bc MD |
3063 | |
3064 | if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { | |
3065 | ||
3066 | pvf = pv; | |
3067 | ||
3068 | do { | |
3069 | pvn = TAILQ_NEXT(pv, pv_list); | |
3070 | ||
3071 | TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); | |
3072 | ||
3073 | TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); | |
3074 | ||
3075 | if (!pmap_track_modified(pv->pv_va)) | |
3076 | continue; | |
3077 | ||
3078 | pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); | |
3079 | ||
3080 | if (pte && (*pte & PG_A)) { | |
0f7a3396 MD |
3081 | #ifdef SMP |
3082 | atomic_clear_int(pte, PG_A); | |
3083 | #else | |
3084 | atomic_clear_int_nonlocked(pte, PG_A); | |
3085 | #endif | |
984263bc MD |
3086 | rtval++; |
3087 | if (rtval > 4) { | |
3088 | break; | |
3089 | } | |
3090 | } | |
3091 | } while ((pv = pvn) != NULL && pv != pvf); | |
3092 | } | |
9acd5bbb | 3093 | crit_exit(); |
984263bc MD |
3094 | |
3095 | return (rtval); | |
3096 | } | |
3097 | ||
3098 | /* | |
3099 | * pmap_is_modified: | |
3100 | * | |
3101 | * Return whether or not the specified physical page was modified | |
3102 | * in any physical maps. | |
3103 | */ | |
3104 | boolean_t | |
3105 | pmap_is_modified(vm_page_t m) | |
3106 | { | |
3107 | return pmap_testbit(m, PG_M); | |
3108 | } | |
3109 | ||
3110 | /* | |
3111 | * Clear the modify bits on the specified physical page. | |
3112 | */ | |
3113 | void | |
3114 | pmap_clear_modify(vm_page_t m) | |
3115 | { | |
3116 | pmap_changebit(m, PG_M, FALSE); | |
3117 | } | |
3118 | ||
3119 | /* | |
3120 | * pmap_clear_reference: | |
3121 | * | |
3122 | * Clear the reference bit on the specified physical page. | |
3123 | */ | |
3124 | void | |
3125 | pmap_clear_reference(vm_page_t m) | |
3126 | { | |
3127 | pmap_changebit(m, PG_A, FALSE); | |
3128 | } | |
3129 | ||
3130 | /* | |
3131 | * Miscellaneous support routines follow | |
3132 | */ | |
3133 | ||
3134 | static void | |
840de426 | 3135 | i386_protection_init(void) |
984263bc | 3136 | { |
840de426 | 3137 | int *kp, prot; |
984263bc MD |
3138 | |
3139 | kp = protection_codes; | |
3140 | for (prot = 0; prot < 8; prot++) { | |
3141 | switch (prot) { | |
3142 | case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: | |
3143 | /* | |
3144 | * Read access is also 0. There isn't any execute bit, | |
3145 | * so just make it readable. | |
3146 | */ | |
3147 | case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: | |
3148 | case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: | |
3149 | case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: | |
3150 | *kp++ = 0; | |
3151 | break; | |
3152 | case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: | |
3153 | case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: | |
3154 | case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: | |
3155 | case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: | |
3156 | *kp++ = PG_RW; | |
3157 | break; | |
3158 | } | |
3159 | } | |
3160 | } | |
3161 | ||
3162 | /* | |
3163 | * Map a set of physical memory pages into the kernel virtual | |
3164 | * address space. Return a pointer to where it is mapped. This | |
3165 | * routine is intended to be used for mapping device memory, | |
3166 | * NOT real memory. | |
a2a5ad0d MD |
3167 | * |
3168 | * NOTE: we can't use pgeflag unless we invalidate the pages one at | |
3169 | * a time. | |
984263bc MD |
3170 | */ |
3171 | void * | |
6ef943a3 | 3172 | pmap_mapdev(vm_paddr_t pa, vm_size_t size) |
984263bc MD |
3173 | { |
3174 | vm_offset_t va, tmpva, offset; | |
3175 | unsigned *pte; | |
3176 | ||
3177 | offset = pa & PAGE_MASK; | |
3178 | size = roundup(offset + size, PAGE_SIZE); | |
3179 | ||
a02705a9 | 3180 | va = kmem_alloc_nofault(kernel_map, size); |
984263bc MD |
3181 | if (!va) |
3182 | panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); | |
3183 | ||
3184 | pa = pa & PG_FRAME; | |
3185 | for (tmpva = va; size > 0;) { | |
3186 | pte = (unsigned *)vtopte(tmpva); | |
a2a5ad0d | 3187 | *pte = pa | PG_RW | PG_V; /* | pgeflag; */ |
984263bc MD |
3188 | size -= PAGE_SIZE; |
3189 | tmpva += PAGE_SIZE; | |
3190 | pa += PAGE_SIZE; | |
3191 | } | |
0f7a3396 MD |
3192 | cpu_invltlb(); |
3193 | smp_invltlb(); | |
984263bc MD |
3194 | |
3195 | return ((void *)(va + offset)); | |
3196 | } | |
3197 | ||
3198 | void | |
840de426 | 3199 | pmap_unmapdev(vm_offset_t va, vm_size_t size) |
984263bc MD |
3200 | { |
3201 | vm_offset_t base, offset; | |
3202 | ||
3203 | base = va & PG_FRAME; | |
3204 | offset = va & PAGE_MASK; | |
3205 | size = roundup(offset + size, PAGE_SIZE); | |
0f579831 | 3206 | pmap_qremove(va, size >> PAGE_SHIFT); |
984263bc MD |
3207 | kmem_free(kernel_map, base, size); |
3208 | } | |
3209 | ||
3210 | /* | |
3211 | * perform the pmap work for mincore | |
3212 | */ | |
3213 | int | |
840de426 | 3214 | pmap_mincore(pmap_t pmap, vm_offset_t addr) |
984263bc | 3215 | { |
984263bc MD |
3216 | unsigned *ptep, pte; |
3217 | vm_page_t m; | |
3218 | int val = 0; | |
3219 | ||
3220 | ptep = pmap_pte(pmap, addr); | |
3221 | if (ptep == 0) { | |
3222 | return 0; | |
3223 | } | |
3224 | ||
3225 | if ((pte = *ptep) != 0) { | |
3226 | vm_offset_t pa; | |
3227 | ||
3228 | val = MINCORE_INCORE; | |
3229 | if ((pte & PG_MANAGED) == 0) | |
3230 | return val; | |
3231 | ||
3232 | pa = pte & PG_FRAME; | |
3233 | ||
3234 | m = PHYS_TO_VM_PAGE(pa); | |
3235 | ||
3236 | /* | |
3237 | * Modified by us | |
3238 | */ | |
3239 | if (pte & PG_M) | |
3240 | val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; | |
3241 | /* | |
3242 | * Modified by someone | |
3243 | */ | |
3244 | else if (m->dirty || pmap_is_modified(m)) | |
3245 | val |= MINCORE_MODIFIED_OTHER; | |
3246 | /* | |
3247 | * Referenced by us | |
3248 | */ | |
3249 | if (pte & PG_A) | |
3250 | val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; | |
3251 | ||
3252 | /* | |
3253 | * Referenced by someone | |
3254 | */ | |
3255 | else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { | |
3256 | val |= MINCORE_REFERENCED_OTHER; | |
3257 | vm_page_flag_set(m, PG_REFERENCED); | |
3258 | } | |
3259 | } | |
3260 | return val; | |
3261 | } | |
3262 | ||
3263 | void | |
3264 | pmap_activate(struct proc *p) | |
3265 | { | |
3266 | pmap_t pmap; | |
3267 | ||
3268 | pmap = vmspace_pmap(p->p_vmspace); | |
3269 | #if defined(SMP) | |
a2a5ad0d | 3270 | atomic_set_int(&pmap->pm_active, 1 << mycpu->gd_cpuid); |
984263bc MD |
3271 | #else |
3272 | pmap->pm_active |= 1; | |
3273 | #endif | |
3274 | #if defined(SWTCH_OPTIM_STATS) | |
3275 | tlb_flush_count++; | |
3276 | #endif | |
b7c628e4 MD |
3277 | p->p_thread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir); |
3278 | load_cr3(p->p_thread->td_pcb->pcb_cr3); | |
984263bc MD |
3279 | } |
3280 | ||
3281 | vm_offset_t | |
3282 | pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) | |
3283 | { | |
3284 | ||
3285 | if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { | |
3286 | return addr; | |
3287 | } | |
3288 | ||
3289 | addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); | |
3290 | return addr; | |
3291 | } | |
3292 | ||
3293 | ||
984263bc MD |
3294 | #if defined(DEBUG) |
3295 | ||
3ae0cd58 | 3296 | static void pads (pmap_t pm); |
c469b1c4 | 3297 | void pmap_pvdump (vm_paddr_t pa); |
984263bc MD |
3298 | |
3299 | /* print address space of pmap*/ | |
3300 | static void | |
840de426 | 3301 | pads(pmap_t pm) |
984263bc MD |
3302 | { |
3303 | unsigned va, i, j; | |
3304 | unsigned *ptep; | |
3305 | ||
3306 | if (pm == kernel_pmap) | |
3307 | return; | |
3308 | for (i = 0; i < 1024; i++) | |
3309 | if (pm->pm_pdir[i]) | |
3310 | for (j = 0; j < 1024; j++) { | |
3311 | va = (i << PDRSHIFT) + (j << PAGE_SHIFT); | |
3312 | if (pm == kernel_pmap && va < KERNBASE) | |
3313 | continue; | |
3314 | if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) | |
3315 | continue; | |
3316 | ptep = pmap_pte_quick(pm, va); | |
3317 | if (pmap_pte_v(ptep)) | |
3318 | printf("%x:%x ", va, *(int *) ptep); | |
3319 | }; | |
3320 | ||
3321 | } | |
3322 | ||
3323 | void | |
6ef943a3 | 3324 | pmap_pvdump(vm_paddr_t pa) |
984263bc | 3325 | { |
840de426 | 3326 | pv_entry_t pv; |
984263bc MD |
3327 | vm_page_t m; |
3328 | ||
c469b1c4 | 3329 | printf("pa %08llx", (long long)pa); |
984263bc MD |
3330 | m = PHYS_TO_VM_PAGE(pa); |
3331 | TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { | |
3332 | #ifdef used_to_be | |
3333 | printf(" -> pmap %p, va %x, flags %x", | |
3334 | (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags); | |
3335 | #endif | |
3336 | printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); | |
3337 | pads(pv->pv_pmap); | |
3338 | } | |
3339 | printf(" "); | |
3340 | } | |
3341 | #endif |