Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1991, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * The Mach Operating System project at Carnegie-Mellon University. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | * | |
36 | * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 | |
37 | * | |
38 | * | |
39 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
40 | * All rights reserved. | |
41 | * | |
42 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
43 | * | |
44 | * Permission to use, copy, modify and distribute this software and | |
45 | * its documentation is hereby granted, provided that both the copyright | |
46 | * notice and this permission notice appear in all copies of the | |
47 | * software, derivative works or modified versions, and any portions | |
48 | * thereof, and that both notices appear in supporting documentation. | |
49 | * | |
50 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
51 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
52 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
53 | * | |
54 | * Carnegie Mellon requests users of this software to return to | |
55 | * | |
56 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
57 | * School of Computer Science | |
58 | * Carnegie Mellon University | |
59 | * Pittsburgh PA 15213-3890 | |
60 | * | |
61 | * any improvements or extensions that they make and grant Carnegie the | |
62 | * rights to redistribute these changes. | |
63 | * | |
64 | * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $ | |
e1359933 | 65 | * $DragonFly: src/sys/vm/vm_map.c,v 1.18 2004/01/14 23:26:14 dillon Exp $ |
984263bc MD |
66 | */ |
67 | ||
68 | /* | |
69 | * Virtual memory mapping module. | |
70 | */ | |
71 | ||
72 | #include <sys/param.h> | |
73 | #include <sys/systm.h> | |
74 | #include <sys/proc.h> | |
fef0fdf2 | 75 | #include <sys/lock.h> |
984263bc MD |
76 | #include <sys/vmmeter.h> |
77 | #include <sys/mman.h> | |
78 | #include <sys/vnode.h> | |
79 | #include <sys/resourcevar.h> | |
fef0fdf2 | 80 | #include <sys/shm.h> |
984263bc MD |
81 | |
82 | #include <vm/vm.h> | |
83 | #include <vm/vm_param.h> | |
984263bc MD |
84 | #include <vm/pmap.h> |
85 | #include <vm/vm_map.h> | |
86 | #include <vm/vm_page.h> | |
87 | #include <vm/vm_object.h> | |
88 | #include <vm/vm_pager.h> | |
89 | #include <vm/vm_kern.h> | |
90 | #include <vm/vm_extern.h> | |
91 | #include <vm/swap_pager.h> | |
92 | #include <vm/vm_zone.h> | |
93 | ||
a108bf71 MD |
94 | #include <sys/thread2.h> |
95 | ||
984263bc MD |
96 | /* |
97 | * Virtual memory maps provide for the mapping, protection, | |
98 | * and sharing of virtual memory objects. In addition, | |
99 | * this module provides for an efficient virtual copy of | |
100 | * memory from one map to another. | |
101 | * | |
102 | * Synchronization is required prior to most operations. | |
103 | * | |
104 | * Maps consist of an ordered doubly-linked list of simple | |
105 | * entries; a single hint is used to speed up lookups. | |
106 | * | |
107 | * Since portions of maps are specified by start/end addresses, | |
108 | * which may not align with existing map entries, all | |
109 | * routines merely "clip" entries to these start/end values. | |
110 | * [That is, an entry is split into two, bordering at a | |
111 | * start or end value.] Note that these clippings may not | |
112 | * always be necessary (as the two resulting entries are then | |
113 | * not changed); however, the clipping is done for convenience. | |
114 | * | |
115 | * As mentioned above, virtual copy operations are performed | |
116 | * by copying VM object references from one map to | |
117 | * another, and then marking both regions as copy-on-write. | |
118 | */ | |
119 | ||
120 | /* | |
121 | * vm_map_startup: | |
122 | * | |
123 | * Initialize the vm_map module. Must be called before | |
124 | * any other vm_map routines. | |
125 | * | |
126 | * Map and entry structures are allocated from the general | |
127 | * purpose memory pool with some exceptions: | |
128 | * | |
129 | * - The kernel map and kmem submap are allocated statically. | |
130 | * - Kernel map entries are allocated out of a static pool. | |
131 | * | |
132 | * These restrictions are necessary since malloc() uses the | |
133 | * maps and requires map entries. | |
134 | */ | |
135 | ||
a108bf71 MD |
136 | static struct vm_zone mapentzone_store, mapzone_store; |
137 | static vm_zone_t mapentzone, mapzone, vmspace_zone; | |
138 | static struct vm_object mapentobj, mapobj; | |
984263bc MD |
139 | |
140 | static struct vm_map_entry map_entry_init[MAX_MAPENT]; | |
984263bc MD |
141 | static struct vm_map map_init[MAX_KMAP]; |
142 | ||
a108bf71 MD |
143 | static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *); |
144 | static void vm_map_entry_dispose (vm_map_t map, vm_map_entry_t entry, int *); | |
145 | static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t, int *); | |
146 | static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t, int *); | |
147 | static void vm_map_entry_delete (vm_map_t, vm_map_entry_t, int *); | |
1388df65 RG |
148 | static void vm_map_entry_unwire (vm_map_t, vm_map_entry_t); |
149 | static void vm_map_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t, | |
150 | vm_map_entry_t); | |
151 | static void vm_map_split (vm_map_entry_t); | |
a108bf71 | 152 | static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int *count, int flags); |
984263bc MD |
153 | |
154 | void | |
155 | vm_map_startup() | |
156 | { | |
157 | mapzone = &mapzone_store; | |
158 | zbootinit(mapzone, "MAP", sizeof (struct vm_map), | |
159 | map_init, MAX_KMAP); | |
984263bc MD |
160 | mapentzone = &mapentzone_store; |
161 | zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), | |
162 | map_entry_init, MAX_MAPENT); | |
163 | } | |
164 | ||
165 | /* | |
166 | * Allocate a vmspace structure, including a vm_map and pmap, | |
167 | * and initialize those structures. The refcnt is set to 1. | |
168 | * The remaining fields must be initialized by the caller. | |
169 | */ | |
170 | struct vmspace * | |
171 | vmspace_alloc(min, max) | |
172 | vm_offset_t min, max; | |
173 | { | |
174 | struct vmspace *vm; | |
175 | ||
176 | vm = zalloc(vmspace_zone); | |
177 | vm_map_init(&vm->vm_map, min, max); | |
178 | pmap_pinit(vmspace_pmap(vm)); | |
179 | vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ | |
180 | vm->vm_refcnt = 1; | |
181 | vm->vm_shm = NULL; | |
182 | vm->vm_exitingcnt = 0; | |
183 | return (vm); | |
184 | } | |
185 | ||
186 | void | |
a108bf71 MD |
187 | vm_init2(void) |
188 | { | |
189 | zinitna(mapentzone, &mapentobj, NULL, 0, 0, ZONE_USE_RESERVE, 1); | |
190 | zinitna(mapzone, &mapobj, NULL, 0, 0, 0, 1); | |
984263bc MD |
191 | vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3); |
192 | pmap_init2(); | |
193 | vm_object_init2(); | |
194 | } | |
195 | ||
196 | static __inline void | |
197 | vmspace_dofree(struct vmspace *vm) | |
198 | { | |
a108bf71 MD |
199 | int count; |
200 | ||
fef0fdf2 MD |
201 | /* |
202 | * Make sure any SysV shm is freed, it might not have in | |
203 | * exit1() | |
204 | */ | |
205 | shmexit(vm); | |
206 | ||
a722be49 MD |
207 | KKASSERT(vm->vm_upcalls == NULL); |
208 | ||
984263bc MD |
209 | /* |
210 | * Lock the map, to wait out all other references to it. | |
211 | * Delete all of the mappings and pages they hold, then call | |
212 | * the pmap module to reclaim anything left. | |
213 | */ | |
a108bf71 | 214 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc | 215 | vm_map_lock(&vm->vm_map); |
a108bf71 MD |
216 | vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, |
217 | vm->vm_map.max_offset, &count); | |
984263bc | 218 | vm_map_unlock(&vm->vm_map); |
a108bf71 | 219 | vm_map_entry_release(count); |
984263bc MD |
220 | |
221 | pmap_release(vmspace_pmap(vm)); | |
222 | zfree(vmspace_zone, vm); | |
223 | } | |
224 | ||
225 | void | |
226 | vmspace_free(struct vmspace *vm) | |
227 | { | |
228 | if (vm->vm_refcnt == 0) | |
229 | panic("vmspace_free: attempt to free already freed vmspace"); | |
230 | ||
231 | if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0) | |
232 | vmspace_dofree(vm); | |
233 | } | |
234 | ||
235 | void | |
236 | vmspace_exitfree(struct proc *p) | |
237 | { | |
238 | struct vmspace *vm; | |
239 | ||
240 | vm = p->p_vmspace; | |
241 | p->p_vmspace = NULL; | |
242 | ||
243 | /* | |
244 | * cleanup by parent process wait()ing on exiting child. vm_refcnt | |
245 | * may not be 0 (e.g. fork() and child exits without exec()ing). | |
246 | * exitingcnt may increment above 0 and drop back down to zero | |
247 | * several times while vm_refcnt is held non-zero. vm_refcnt | |
248 | * may also increment above 0 and drop back down to zero several | |
249 | * times while vm_exitingcnt is held non-zero. | |
250 | * | |
251 | * The last wait on the exiting child's vmspace will clean up | |
252 | * the remainder of the vmspace. | |
253 | */ | |
254 | if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0) | |
255 | vmspace_dofree(vm); | |
256 | } | |
257 | ||
258 | /* | |
259 | * vmspace_swap_count() - count the approximate swap useage in pages for a | |
260 | * vmspace. | |
261 | * | |
262 | * Swap useage is determined by taking the proportional swap used by | |
263 | * VM objects backing the VM map. To make up for fractional losses, | |
264 | * if the VM object has any swap use at all the associated map entries | |
265 | * count for at least 1 swap page. | |
266 | */ | |
267 | int | |
268 | vmspace_swap_count(struct vmspace *vmspace) | |
269 | { | |
270 | vm_map_t map = &vmspace->vm_map; | |
271 | vm_map_entry_t cur; | |
272 | int count = 0; | |
273 | ||
274 | for (cur = map->header.next; cur != &map->header; cur = cur->next) { | |
275 | vm_object_t object; | |
276 | ||
277 | if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && | |
278 | (object = cur->object.vm_object) != NULL && | |
279 | object->type == OBJT_SWAP | |
280 | ) { | |
281 | int n = (cur->end - cur->start) / PAGE_SIZE; | |
282 | ||
283 | if (object->un_pager.swp.swp_bcount) { | |
284 | count += object->un_pager.swp.swp_bcount * | |
285 | SWAP_META_PAGES * n / object->size + 1; | |
286 | } | |
287 | } | |
288 | } | |
289 | return(count); | |
290 | } | |
291 | ||
292 | ||
293 | /* | |
294 | * vm_map_create: | |
295 | * | |
296 | * Creates and returns a new empty VM map with | |
297 | * the given physical map structure, and having | |
298 | * the given lower and upper address bounds. | |
299 | */ | |
300 | vm_map_t | |
a108bf71 | 301 | vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) |
984263bc MD |
302 | { |
303 | vm_map_t result; | |
304 | ||
305 | result = zalloc(mapzone); | |
306 | vm_map_init(result, min, max); | |
307 | result->pmap = pmap; | |
308 | return (result); | |
309 | } | |
310 | ||
311 | /* | |
312 | * Initialize an existing vm_map structure | |
313 | * such as that in the vmspace structure. | |
314 | * The pmap is set elsewhere. | |
315 | */ | |
316 | void | |
a108bf71 | 317 | vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max) |
984263bc MD |
318 | { |
319 | map->header.next = map->header.prev = &map->header; | |
320 | map->nentries = 0; | |
321 | map->size = 0; | |
322 | map->system_map = 0; | |
323 | map->infork = 0; | |
324 | map->min_offset = min; | |
325 | map->max_offset = max; | |
326 | map->first_free = &map->header; | |
327 | map->hint = &map->header; | |
328 | map->timestamp = 0; | |
377d4740 | 329 | lockinit(&map->lock, 0, "thrd_sleep", 0, LK_NOPAUSE); |
984263bc MD |
330 | } |
331 | ||
a108bf71 MD |
332 | /* |
333 | * vm_map_entry_reserve: | |
334 | * | |
e1359933 MD |
335 | * Reserves vm_map_entry structures so code later on can manipulate |
336 | * map_entry structures within a locked map without blocking trying | |
337 | * to allocate a new vm_map_entry. | |
a108bf71 MD |
338 | */ |
339 | int | |
340 | vm_map_entry_reserve(int count) | |
341 | { | |
342 | struct globaldata *gd = mycpu; | |
343 | vm_map_entry_t entry; | |
344 | ||
345 | crit_enter(); | |
346 | gd->gd_vme_avail -= count; | |
347 | ||
348 | /* | |
349 | * Make sure we have enough structures in gd_vme_base to handle | |
350 | * the reservation request. | |
351 | */ | |
352 | while (gd->gd_vme_avail < 0) { | |
353 | entry = zalloc(mapentzone); | |
354 | entry->next = gd->gd_vme_base; | |
355 | gd->gd_vme_base = entry; | |
356 | ++gd->gd_vme_avail; | |
357 | } | |
358 | crit_exit(); | |
359 | return(count); | |
360 | } | |
361 | ||
362 | /* | |
363 | * vm_map_entry_release: | |
364 | * | |
365 | * Releases previously reserved vm_map_entry structures that were not | |
366 | * used. If we have too much junk in our per-cpu cache clean some of | |
367 | * it out. | |
368 | */ | |
369 | void | |
370 | vm_map_entry_release(int count) | |
371 | { | |
372 | struct globaldata *gd = mycpu; | |
373 | vm_map_entry_t entry; | |
374 | ||
375 | crit_enter(); | |
376 | gd->gd_vme_avail += count; | |
377 | while (gd->gd_vme_avail > MAP_RESERVE_SLOP) { | |
378 | entry = gd->gd_vme_base; | |
379 | KKASSERT(entry != NULL); | |
380 | gd->gd_vme_base = entry->next; | |
381 | --gd->gd_vme_avail; | |
382 | crit_exit(); | |
383 | zfree(mapentzone, entry); | |
384 | crit_enter(); | |
385 | } | |
386 | crit_exit(); | |
387 | } | |
388 | ||
389 | /* | |
390 | * vm_map_entry_kreserve: | |
391 | * | |
392 | * Reserve map entry structures for use in kernel_map or (if it exists) | |
393 | * kmem_map. These entries have *ALREADY* been reserved on a per-cpu | |
e1359933 MD |
394 | * basis when the map was inited. This function is used by zalloc() |
395 | * to avoid a recursion when zalloc() itself needs to allocate additional | |
396 | * kernel memory. | |
a108bf71 | 397 | * |
e1359933 MD |
398 | * This function should only be used when the caller intends to later |
399 | * call vm_map_entry_reserve() to 'normalize' the reserve cache. | |
a108bf71 MD |
400 | */ |
401 | int | |
402 | vm_map_entry_kreserve(int count) | |
403 | { | |
404 | struct globaldata *gd = mycpu; | |
405 | ||
406 | crit_enter(); | |
407 | gd->gd_vme_kdeficit += count; | |
408 | crit_exit(); | |
409 | KKASSERT(gd->gd_vme_base != NULL); | |
410 | return(count); | |
411 | } | |
412 | ||
413 | /* | |
414 | * vm_map_entry_krelease: | |
415 | * | |
416 | * Release previously reserved map entries for kernel_map or kmem_map | |
417 | * use. This routine determines how many entries were actually used and | |
418 | * replentishes the kernel reserve supply from vme_avail. | |
419 | * | |
420 | * If there is insufficient supply vme_avail will go negative, which is | |
421 | * ok. We cannot safely call zalloc in this function without getting | |
422 | * into a recursion deadlock. zalloc() will call vm_map_entry_reserve() | |
423 | * to regenerate the lost entries. | |
424 | */ | |
425 | void | |
426 | vm_map_entry_krelease(int count) | |
427 | { | |
428 | struct globaldata *gd = mycpu; | |
429 | ||
430 | crit_enter(); | |
431 | gd->gd_vme_kdeficit -= count; | |
432 | gd->gd_vme_avail -= gd->gd_vme_kdeficit; /* can go negative */ | |
433 | gd->gd_vme_kdeficit = 0; | |
434 | crit_exit(); | |
435 | } | |
436 | ||
984263bc | 437 | /* |
8a8d5d85 | 438 | * vm_map_entry_create: [ internal use only ] |
984263bc | 439 | * |
8a8d5d85 | 440 | * Allocates a VM map entry for insertion. No entry fields are filled |
a108bf71 MD |
441 | * in. |
442 | * | |
443 | * This routine may be called from an interrupt thread but not a FAST | |
444 | * interrupt. This routine may recurse the map lock. | |
984263bc | 445 | */ |
8a8d5d85 | 446 | static vm_map_entry_t |
a108bf71 | 447 | vm_map_entry_create(vm_map_t map, int *countp) |
984263bc | 448 | { |
a108bf71 MD |
449 | struct globaldata *gd = mycpu; |
450 | vm_map_entry_t entry; | |
8a8d5d85 | 451 | |
a108bf71 MD |
452 | KKASSERT(*countp > 0); |
453 | --*countp; | |
454 | crit_enter(); | |
455 | entry = gd->gd_vme_base; | |
456 | KASSERT(entry != NULL, ("gd_vme_base NULL! count %d", *countp)); | |
457 | gd->gd_vme_base = entry->next; | |
458 | crit_exit(); | |
459 | return(entry); | |
984263bc MD |
460 | } |
461 | ||
462 | /* | |
8a8d5d85 | 463 | * vm_map_entry_dispose: [ internal use only ] |
984263bc | 464 | * |
8a8d5d85 MD |
465 | * Dispose of a vm_map_entry that is no longer being referenced. This |
466 | * function may be called from an interrupt. | |
984263bc | 467 | */ |
8a8d5d85 | 468 | static void |
a108bf71 | 469 | vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry, int *countp) |
984263bc | 470 | { |
a108bf71 MD |
471 | struct globaldata *gd = mycpu; |
472 | ||
473 | ++*countp; | |
474 | crit_enter(); | |
475 | entry->next = gd->gd_vme_base; | |
476 | gd->gd_vme_base = entry; | |
477 | crit_exit(); | |
984263bc MD |
478 | } |
479 | ||
8a8d5d85 | 480 | |
984263bc MD |
481 | /* |
482 | * vm_map_entry_{un,}link: | |
483 | * | |
484 | * Insert/remove entries from maps. | |
485 | */ | |
486 | static __inline void | |
487 | vm_map_entry_link(vm_map_t map, | |
488 | vm_map_entry_t after_where, | |
489 | vm_map_entry_t entry) | |
490 | { | |
491 | map->nentries++; | |
492 | entry->prev = after_where; | |
493 | entry->next = after_where->next; | |
494 | entry->next->prev = entry; | |
495 | after_where->next = entry; | |
496 | } | |
497 | ||
498 | static __inline void | |
499 | vm_map_entry_unlink(vm_map_t map, | |
500 | vm_map_entry_t entry) | |
501 | { | |
502 | vm_map_entry_t prev; | |
503 | vm_map_entry_t next; | |
504 | ||
505 | if (entry->eflags & MAP_ENTRY_IN_TRANSITION) | |
506 | panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry); | |
507 | prev = entry->prev; | |
508 | next = entry->next; | |
509 | next->prev = prev; | |
510 | prev->next = next; | |
511 | map->nentries--; | |
512 | } | |
513 | ||
514 | /* | |
515 | * SAVE_HINT: | |
516 | * | |
517 | * Saves the specified entry as the hint for | |
518 | * future lookups. | |
519 | */ | |
520 | #define SAVE_HINT(map,value) \ | |
521 | (map)->hint = (value); | |
522 | ||
523 | /* | |
524 | * vm_map_lookup_entry: [ internal use only ] | |
525 | * | |
526 | * Finds the map entry containing (or | |
527 | * immediately preceding) the specified address | |
528 | * in the given map; the entry is returned | |
529 | * in the "entry" parameter. The boolean | |
530 | * result indicates whether the address is | |
531 | * actually contained in the map. | |
532 | */ | |
533 | boolean_t | |
534 | vm_map_lookup_entry(map, address, entry) | |
535 | vm_map_t map; | |
536 | vm_offset_t address; | |
537 | vm_map_entry_t *entry; /* OUT */ | |
538 | { | |
539 | vm_map_entry_t cur; | |
540 | vm_map_entry_t last; | |
541 | ||
542 | /* | |
543 | * Start looking either from the head of the list, or from the hint. | |
544 | */ | |
545 | ||
546 | cur = map->hint; | |
547 | ||
548 | if (cur == &map->header) | |
549 | cur = cur->next; | |
550 | ||
551 | if (address >= cur->start) { | |
552 | /* | |
553 | * Go from hint to end of list. | |
554 | * | |
555 | * But first, make a quick check to see if we are already looking | |
556 | * at the entry we want (which is usually the case). Note also | |
557 | * that we don't need to save the hint here... it is the same | |
558 | * hint (unless we are at the header, in which case the hint | |
559 | * didn't buy us anything anyway). | |
560 | */ | |
561 | last = &map->header; | |
562 | if ((cur != last) && (cur->end > address)) { | |
563 | *entry = cur; | |
564 | return (TRUE); | |
565 | } | |
566 | } else { | |
567 | /* | |
568 | * Go from start to hint, *inclusively* | |
569 | */ | |
570 | last = cur->next; | |
571 | cur = map->header.next; | |
572 | } | |
573 | ||
574 | /* | |
575 | * Search linearly | |
576 | */ | |
577 | ||
578 | while (cur != last) { | |
579 | if (cur->end > address) { | |
580 | if (address >= cur->start) { | |
581 | /* | |
582 | * Save this lookup for future hints, and | |
583 | * return | |
584 | */ | |
585 | ||
586 | *entry = cur; | |
587 | SAVE_HINT(map, cur); | |
588 | return (TRUE); | |
589 | } | |
590 | break; | |
591 | } | |
592 | cur = cur->next; | |
593 | } | |
594 | *entry = cur->prev; | |
595 | SAVE_HINT(map, *entry); | |
596 | return (FALSE); | |
597 | } | |
598 | ||
599 | /* | |
600 | * vm_map_insert: | |
601 | * | |
602 | * Inserts the given whole VM object into the target | |
603 | * map at the specified address range. The object's | |
604 | * size should match that of the address range. | |
605 | * | |
a108bf71 MD |
606 | * Requires that the map be locked, and leaves it so. Requires that |
607 | * sufficient vm_map_entry structures have been reserved and tracks | |
608 | * the use via countp. | |
984263bc MD |
609 | * |
610 | * If object is non-NULL, ref count must be bumped by caller | |
611 | * prior to making call to account for the new entry. | |
612 | */ | |
613 | int | |
a108bf71 MD |
614 | vm_map_insert(vm_map_t map, int *countp, |
615 | vm_object_t object, vm_ooffset_t offset, | |
984263bc MD |
616 | vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, |
617 | int cow) | |
618 | { | |
619 | vm_map_entry_t new_entry; | |
620 | vm_map_entry_t prev_entry; | |
621 | vm_map_entry_t temp_entry; | |
622 | vm_eflags_t protoeflags; | |
623 | ||
624 | /* | |
625 | * Check that the start and end points are not bogus. | |
626 | */ | |
627 | ||
628 | if ((start < map->min_offset) || (end > map->max_offset) || | |
629 | (start >= end)) | |
630 | return (KERN_INVALID_ADDRESS); | |
631 | ||
632 | /* | |
633 | * Find the entry prior to the proposed starting address; if it's part | |
634 | * of an existing entry, this range is bogus. | |
635 | */ | |
636 | ||
637 | if (vm_map_lookup_entry(map, start, &temp_entry)) | |
638 | return (KERN_NO_SPACE); | |
639 | ||
640 | prev_entry = temp_entry; | |
641 | ||
642 | /* | |
643 | * Assert that the next entry doesn't overlap the end point. | |
644 | */ | |
645 | ||
646 | if ((prev_entry->next != &map->header) && | |
647 | (prev_entry->next->start < end)) | |
648 | return (KERN_NO_SPACE); | |
649 | ||
650 | protoeflags = 0; | |
651 | ||
652 | if (cow & MAP_COPY_ON_WRITE) | |
653 | protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY; | |
654 | ||
655 | if (cow & MAP_NOFAULT) { | |
656 | protoeflags |= MAP_ENTRY_NOFAULT; | |
657 | ||
658 | KASSERT(object == NULL, | |
659 | ("vm_map_insert: paradoxical MAP_NOFAULT request")); | |
660 | } | |
661 | if (cow & MAP_DISABLE_SYNCER) | |
662 | protoeflags |= MAP_ENTRY_NOSYNC; | |
663 | if (cow & MAP_DISABLE_COREDUMP) | |
664 | protoeflags |= MAP_ENTRY_NOCOREDUMP; | |
665 | ||
666 | if (object) { | |
667 | /* | |
668 | * When object is non-NULL, it could be shared with another | |
669 | * process. We have to set or clear OBJ_ONEMAPPING | |
670 | * appropriately. | |
671 | */ | |
672 | if ((object->ref_count > 1) || (object->shadow_count != 0)) { | |
673 | vm_object_clear_flag(object, OBJ_ONEMAPPING); | |
674 | } | |
675 | } | |
676 | else if ((prev_entry != &map->header) && | |
677 | (prev_entry->eflags == protoeflags) && | |
678 | (prev_entry->end == start) && | |
679 | (prev_entry->wired_count == 0) && | |
680 | ((prev_entry->object.vm_object == NULL) || | |
681 | vm_object_coalesce(prev_entry->object.vm_object, | |
682 | OFF_TO_IDX(prev_entry->offset), | |
683 | (vm_size_t)(prev_entry->end - prev_entry->start), | |
684 | (vm_size_t)(end - prev_entry->end)))) { | |
685 | /* | |
686 | * We were able to extend the object. Determine if we | |
687 | * can extend the previous map entry to include the | |
688 | * new range as well. | |
689 | */ | |
690 | if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && | |
691 | (prev_entry->protection == prot) && | |
692 | (prev_entry->max_protection == max)) { | |
693 | map->size += (end - prev_entry->end); | |
694 | prev_entry->end = end; | |
a108bf71 | 695 | vm_map_simplify_entry(map, prev_entry, countp); |
984263bc MD |
696 | return (KERN_SUCCESS); |
697 | } | |
698 | ||
699 | /* | |
700 | * If we can extend the object but cannot extend the | |
701 | * map entry, we have to create a new map entry. We | |
702 | * must bump the ref count on the extended object to | |
703 | * account for it. object may be NULL. | |
704 | */ | |
705 | object = prev_entry->object.vm_object; | |
706 | offset = prev_entry->offset + | |
707 | (prev_entry->end - prev_entry->start); | |
708 | vm_object_reference(object); | |
709 | } | |
710 | ||
711 | /* | |
712 | * NOTE: if conditionals fail, object can be NULL here. This occurs | |
713 | * in things like the buffer map where we manage kva but do not manage | |
714 | * backing objects. | |
715 | */ | |
716 | ||
717 | /* | |
718 | * Create a new entry | |
719 | */ | |
720 | ||
a108bf71 | 721 | new_entry = vm_map_entry_create(map, countp); |
984263bc MD |
722 | new_entry->start = start; |
723 | new_entry->end = end; | |
724 | ||
725 | new_entry->eflags = protoeflags; | |
726 | new_entry->object.vm_object = object; | |
727 | new_entry->offset = offset; | |
728 | new_entry->avail_ssize = 0; | |
729 | ||
730 | new_entry->inheritance = VM_INHERIT_DEFAULT; | |
731 | new_entry->protection = prot; | |
732 | new_entry->max_protection = max; | |
733 | new_entry->wired_count = 0; | |
734 | ||
735 | /* | |
736 | * Insert the new entry into the list | |
737 | */ | |
738 | ||
739 | vm_map_entry_link(map, prev_entry, new_entry); | |
740 | map->size += new_entry->end - new_entry->start; | |
741 | ||
742 | /* | |
743 | * Update the free space hint | |
744 | */ | |
745 | if ((map->first_free == prev_entry) && | |
746 | (prev_entry->end >= new_entry->start)) { | |
747 | map->first_free = new_entry; | |
748 | } | |
749 | ||
750 | #if 0 | |
751 | /* | |
752 | * Temporarily removed to avoid MAP_STACK panic, due to | |
753 | * MAP_STACK being a huge hack. Will be added back in | |
754 | * when MAP_STACK (and the user stack mapping) is fixed. | |
755 | */ | |
756 | /* | |
757 | * It may be possible to simplify the entry | |
758 | */ | |
a108bf71 | 759 | vm_map_simplify_entry(map, new_entry, countp); |
984263bc MD |
760 | #endif |
761 | ||
762 | if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { | |
763 | pmap_object_init_pt(map->pmap, start, | |
764 | object, OFF_TO_IDX(offset), end - start, | |
765 | cow & MAP_PREFAULT_PARTIAL); | |
766 | } | |
767 | ||
768 | return (KERN_SUCCESS); | |
769 | } | |
770 | ||
771 | /* | |
772 | * Find sufficient space for `length' bytes in the given map, starting at | |
773 | * `start'. The map must be locked. Returns 0 on success, 1 on no space. | |
e9bb90e8 MD |
774 | * |
775 | * This function will returned an arbitrarily aligned pointer. If no | |
776 | * particular alignment is required you should pass align as 1. Note that | |
777 | * the map may return PAGE_SIZE aligned pointers if all the lengths used in | |
778 | * the map are a multiple of PAGE_SIZE, even if you pass a smaller align | |
779 | * argument. | |
780 | * | |
781 | * 'align' should be a power of 2 but is not required to be. | |
984263bc MD |
782 | */ |
783 | int | |
e9bb90e8 MD |
784 | vm_map_findspace( |
785 | vm_map_t map, | |
786 | vm_offset_t start, | |
787 | vm_size_t length, | |
788 | vm_offset_t align, | |
789 | vm_offset_t *addr) | |
984263bc MD |
790 | { |
791 | vm_map_entry_t entry, next; | |
792 | vm_offset_t end; | |
e9bb90e8 | 793 | vm_offset_t align_mask; |
984263bc MD |
794 | |
795 | if (start < map->min_offset) | |
796 | start = map->min_offset; | |
797 | if (start > map->max_offset) | |
798 | return (1); | |
799 | ||
e9bb90e8 MD |
800 | /* |
801 | * If the alignment is not a power of 2 we will have to use | |
802 | * a mod/division, set align_mask to a special value. | |
803 | */ | |
804 | if ((align | (align - 1)) + 1 != (align << 1)) | |
805 | align_mask = (vm_offset_t)-1; | |
806 | else | |
807 | align_mask = align - 1; | |
808 | ||
a108bf71 | 809 | retry: |
984263bc MD |
810 | /* |
811 | * Look for the first possible address; if there's already something | |
812 | * at this address, we have to start after it. | |
813 | */ | |
814 | if (start == map->min_offset) { | |
815 | if ((entry = map->first_free) != &map->header) | |
816 | start = entry->end; | |
817 | } else { | |
818 | vm_map_entry_t tmp; | |
819 | ||
820 | if (vm_map_lookup_entry(map, start, &tmp)) | |
821 | start = tmp->end; | |
822 | entry = tmp; | |
823 | } | |
824 | ||
825 | /* | |
826 | * Look through the rest of the map, trying to fit a new region in the | |
827 | * gap between existing regions, or after the very last region. | |
828 | */ | |
829 | for (;; start = (entry = next)->end) { | |
e9bb90e8 MD |
830 | /* |
831 | * Adjust the proposed start by the requested alignment, | |
832 | * be sure that we didn't wrap the address. | |
833 | */ | |
834 | if (align_mask == (vm_offset_t)-1) | |
835 | end = ((start + align - 1) / align) * align; | |
836 | else | |
837 | end = (start + align_mask) & ~align_mask; | |
838 | if (end < start) | |
839 | return (1); | |
840 | start = end; | |
984263bc MD |
841 | /* |
842 | * Find the end of the proposed new region. Be sure we didn't | |
e9bb90e8 MD |
843 | * go beyond the end of the map, or wrap around the address. |
844 | * Then check to see if this is the last entry or if the | |
845 | * proposed end fits in the gap between this and the next | |
846 | * entry. | |
984263bc MD |
847 | */ |
848 | end = start + length; | |
849 | if (end > map->max_offset || end < start) | |
850 | return (1); | |
851 | next = entry->next; | |
852 | if (next == &map->header || next->start >= end) | |
853 | break; | |
854 | } | |
855 | SAVE_HINT(map, entry); | |
984263bc MD |
856 | if (map == kernel_map) { |
857 | vm_offset_t ksize; | |
858 | if ((ksize = round_page(start + length)) > kernel_vm_end) { | |
859 | pmap_growkernel(ksize); | |
a108bf71 | 860 | goto retry; |
984263bc MD |
861 | } |
862 | } | |
a108bf71 | 863 | *addr = start; |
984263bc MD |
864 | return (0); |
865 | } | |
866 | ||
867 | /* | |
868 | * vm_map_find finds an unallocated region in the target address | |
869 | * map with the given length. The search is defined to be | |
870 | * first-fit from the specified address; the region found is | |
871 | * returned in the same parameter. | |
872 | * | |
873 | * If object is non-NULL, ref count must be bumped by caller | |
874 | * prior to making call to account for the new entry. | |
875 | */ | |
876 | int | |
877 | vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | |
878 | vm_offset_t *addr, /* IN/OUT */ | |
879 | vm_size_t length, boolean_t find_space, vm_prot_t prot, | |
880 | vm_prot_t max, int cow) | |
881 | { | |
882 | vm_offset_t start; | |
03aa8d99 | 883 | int result; |
a108bf71 | 884 | int count; |
984263bc MD |
885 | |
886 | start = *addr; | |
887 | ||
a108bf71 | 888 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
889 | vm_map_lock(map); |
890 | if (find_space) { | |
e9bb90e8 | 891 | if (vm_map_findspace(map, start, length, 1, addr)) { |
984263bc | 892 | vm_map_unlock(map); |
a108bf71 | 893 | vm_map_entry_release(count); |
984263bc MD |
894 | return (KERN_NO_SPACE); |
895 | } | |
896 | start = *addr; | |
897 | } | |
a108bf71 | 898 | result = vm_map_insert(map, &count, object, offset, |
984263bc MD |
899 | start, start + length, prot, max, cow); |
900 | vm_map_unlock(map); | |
a108bf71 | 901 | vm_map_entry_release(count); |
984263bc | 902 | |
984263bc MD |
903 | return (result); |
904 | } | |
905 | ||
906 | /* | |
907 | * vm_map_simplify_entry: | |
908 | * | |
909 | * Simplify the given map entry by merging with either neighbor. This | |
910 | * routine also has the ability to merge with both neighbors. | |
911 | * | |
912 | * The map must be locked. | |
913 | * | |
914 | * This routine guarentees that the passed entry remains valid (though | |
915 | * possibly extended). When merging, this routine may delete one or | |
916 | * both neighbors. No action is taken on entries which have their | |
917 | * in-transition flag set. | |
918 | */ | |
919 | void | |
a108bf71 | 920 | vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp) |
984263bc MD |
921 | { |
922 | vm_map_entry_t next, prev; | |
923 | vm_size_t prevsize, esize; | |
924 | ||
925 | if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) { | |
12e4aaff | 926 | ++mycpu->gd_cnt.v_intrans_coll; |
984263bc MD |
927 | return; |
928 | } | |
929 | ||
930 | prev = entry->prev; | |
931 | if (prev != &map->header) { | |
932 | prevsize = prev->end - prev->start; | |
933 | if ( (prev->end == entry->start) && | |
934 | (prev->object.vm_object == entry->object.vm_object) && | |
935 | (!prev->object.vm_object || | |
936 | (prev->offset + prevsize == entry->offset)) && | |
937 | (prev->eflags == entry->eflags) && | |
938 | (prev->protection == entry->protection) && | |
939 | (prev->max_protection == entry->max_protection) && | |
940 | (prev->inheritance == entry->inheritance) && | |
941 | (prev->wired_count == entry->wired_count)) { | |
942 | if (map->first_free == prev) | |
943 | map->first_free = entry; | |
944 | if (map->hint == prev) | |
945 | map->hint = entry; | |
946 | vm_map_entry_unlink(map, prev); | |
947 | entry->start = prev->start; | |
948 | entry->offset = prev->offset; | |
949 | if (prev->object.vm_object) | |
950 | vm_object_deallocate(prev->object.vm_object); | |
a108bf71 | 951 | vm_map_entry_dispose(map, prev, countp); |
984263bc MD |
952 | } |
953 | } | |
954 | ||
955 | next = entry->next; | |
956 | if (next != &map->header) { | |
957 | esize = entry->end - entry->start; | |
958 | if ((entry->end == next->start) && | |
959 | (next->object.vm_object == entry->object.vm_object) && | |
960 | (!entry->object.vm_object || | |
961 | (entry->offset + esize == next->offset)) && | |
962 | (next->eflags == entry->eflags) && | |
963 | (next->protection == entry->protection) && | |
964 | (next->max_protection == entry->max_protection) && | |
965 | (next->inheritance == entry->inheritance) && | |
966 | (next->wired_count == entry->wired_count)) { | |
967 | if (map->first_free == next) | |
968 | map->first_free = entry; | |
969 | if (map->hint == next) | |
970 | map->hint = entry; | |
971 | vm_map_entry_unlink(map, next); | |
972 | entry->end = next->end; | |
973 | if (next->object.vm_object) | |
974 | vm_object_deallocate(next->object.vm_object); | |
a108bf71 | 975 | vm_map_entry_dispose(map, next, countp); |
984263bc MD |
976 | } |
977 | } | |
978 | } | |
979 | /* | |
980 | * vm_map_clip_start: [ internal use only ] | |
981 | * | |
982 | * Asserts that the given entry begins at or after | |
983 | * the specified address; if necessary, | |
984 | * it splits the entry into two. | |
985 | */ | |
a108bf71 | 986 | #define vm_map_clip_start(map, entry, startaddr, countp) \ |
984263bc MD |
987 | { \ |
988 | if (startaddr > entry->start) \ | |
a108bf71 | 989 | _vm_map_clip_start(map, entry, startaddr, countp); \ |
984263bc MD |
990 | } |
991 | ||
992 | /* | |
993 | * This routine is called only when it is known that | |
994 | * the entry must be split. | |
995 | */ | |
996 | static void | |
a108bf71 | 997 | _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start, int *countp) |
984263bc MD |
998 | { |
999 | vm_map_entry_t new_entry; | |
1000 | ||
1001 | /* | |
1002 | * Split off the front portion -- note that we must insert the new | |
1003 | * entry BEFORE this one, so that this entry has the specified | |
1004 | * starting address. | |
1005 | */ | |
1006 | ||
a108bf71 | 1007 | vm_map_simplify_entry(map, entry, countp); |
984263bc MD |
1008 | |
1009 | /* | |
1010 | * If there is no object backing this entry, we might as well create | |
1011 | * one now. If we defer it, an object can get created after the map | |
1012 | * is clipped, and individual objects will be created for the split-up | |
1013 | * map. This is a bit of a hack, but is also about the best place to | |
1014 | * put this improvement. | |
1015 | */ | |
1016 | ||
1017 | if (entry->object.vm_object == NULL && !map->system_map) { | |
1018 | vm_object_t object; | |
1019 | object = vm_object_allocate(OBJT_DEFAULT, | |
1020 | atop(entry->end - entry->start)); | |
1021 | entry->object.vm_object = object; | |
1022 | entry->offset = 0; | |
1023 | } | |
1024 | ||
a108bf71 | 1025 | new_entry = vm_map_entry_create(map, countp); |
984263bc MD |
1026 | *new_entry = *entry; |
1027 | ||
1028 | new_entry->end = start; | |
1029 | entry->offset += (start - entry->start); | |
1030 | entry->start = start; | |
1031 | ||
1032 | vm_map_entry_link(map, entry->prev, new_entry); | |
1033 | ||
1034 | if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { | |
1035 | vm_object_reference(new_entry->object.vm_object); | |
1036 | } | |
1037 | } | |
1038 | ||
1039 | /* | |
1040 | * vm_map_clip_end: [ internal use only ] | |
1041 | * | |
1042 | * Asserts that the given entry ends at or before | |
1043 | * the specified address; if necessary, | |
1044 | * it splits the entry into two. | |
1045 | */ | |
1046 | ||
a108bf71 | 1047 | #define vm_map_clip_end(map, entry, endaddr, countp) \ |
984263bc MD |
1048 | { \ |
1049 | if (endaddr < entry->end) \ | |
a108bf71 | 1050 | _vm_map_clip_end(map, entry, endaddr, countp); \ |
984263bc MD |
1051 | } |
1052 | ||
1053 | /* | |
1054 | * This routine is called only when it is known that | |
1055 | * the entry must be split. | |
1056 | */ | |
1057 | static void | |
a108bf71 | 1058 | _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, int *countp) |
984263bc MD |
1059 | { |
1060 | vm_map_entry_t new_entry; | |
1061 | ||
1062 | /* | |
1063 | * If there is no object backing this entry, we might as well create | |
1064 | * one now. If we defer it, an object can get created after the map | |
1065 | * is clipped, and individual objects will be created for the split-up | |
1066 | * map. This is a bit of a hack, but is also about the best place to | |
1067 | * put this improvement. | |
1068 | */ | |
1069 | ||
1070 | if (entry->object.vm_object == NULL && !map->system_map) { | |
1071 | vm_object_t object; | |
1072 | object = vm_object_allocate(OBJT_DEFAULT, | |
1073 | atop(entry->end - entry->start)); | |
1074 | entry->object.vm_object = object; | |
1075 | entry->offset = 0; | |
1076 | } | |
1077 | ||
1078 | /* | |
1079 | * Create a new entry and insert it AFTER the specified entry | |
1080 | */ | |
1081 | ||
a108bf71 | 1082 | new_entry = vm_map_entry_create(map, countp); |
984263bc MD |
1083 | *new_entry = *entry; |
1084 | ||
1085 | new_entry->start = entry->end = end; | |
1086 | new_entry->offset += (end - entry->start); | |
1087 | ||
1088 | vm_map_entry_link(map, entry, new_entry); | |
1089 | ||
1090 | if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { | |
1091 | vm_object_reference(new_entry->object.vm_object); | |
1092 | } | |
1093 | } | |
1094 | ||
1095 | /* | |
1096 | * VM_MAP_RANGE_CHECK: [ internal use only ] | |
1097 | * | |
1098 | * Asserts that the starting and ending region | |
1099 | * addresses fall within the valid range of the map. | |
1100 | */ | |
1101 | #define VM_MAP_RANGE_CHECK(map, start, end) \ | |
1102 | { \ | |
1103 | if (start < vm_map_min(map)) \ | |
1104 | start = vm_map_min(map); \ | |
1105 | if (end > vm_map_max(map)) \ | |
1106 | end = vm_map_max(map); \ | |
1107 | if (start > end) \ | |
1108 | start = end; \ | |
1109 | } | |
1110 | ||
1111 | /* | |
1112 | * vm_map_transition_wait: [ kernel use only ] | |
1113 | * | |
1114 | * Used to block when an in-transition collison occurs. The map | |
1115 | * is unlocked for the sleep and relocked before the return. | |
1116 | */ | |
1117 | static | |
1118 | void | |
1119 | vm_map_transition_wait(vm_map_t map) | |
1120 | { | |
1121 | vm_map_unlock(map); | |
377d4740 | 1122 | tsleep(map, 0, "vment", 0); |
984263bc MD |
1123 | vm_map_lock(map); |
1124 | } | |
1125 | ||
1126 | /* | |
1127 | * CLIP_CHECK_BACK | |
1128 | * CLIP_CHECK_FWD | |
1129 | * | |
1130 | * When we do blocking operations with the map lock held it is | |
1131 | * possible that a clip might have occured on our in-transit entry, | |
1132 | * requiring an adjustment to the entry in our loop. These macros | |
1133 | * help the pageable and clip_range code deal with the case. The | |
1134 | * conditional costs virtually nothing if no clipping has occured. | |
1135 | */ | |
1136 | ||
1137 | #define CLIP_CHECK_BACK(entry, save_start) \ | |
1138 | do { \ | |
1139 | while (entry->start != save_start) { \ | |
1140 | entry = entry->prev; \ | |
1141 | KASSERT(entry != &map->header, ("bad entry clip")); \ | |
1142 | } \ | |
1143 | } while(0) | |
1144 | ||
1145 | #define CLIP_CHECK_FWD(entry, save_end) \ | |
1146 | do { \ | |
1147 | while (entry->end != save_end) { \ | |
1148 | entry = entry->next; \ | |
1149 | KASSERT(entry != &map->header, ("bad entry clip")); \ | |
1150 | } \ | |
1151 | } while(0) | |
1152 | ||
1153 | ||
1154 | /* | |
1155 | * vm_map_clip_range: [ kernel use only ] | |
1156 | * | |
1157 | * Clip the specified range and return the base entry. The | |
1158 | * range may cover several entries starting at the returned base | |
1159 | * and the first and last entry in the covering sequence will be | |
1160 | * properly clipped to the requested start and end address. | |
1161 | * | |
1162 | * If no holes are allowed you should pass the MAP_CLIP_NO_HOLES | |
1163 | * flag. | |
1164 | * | |
1165 | * The MAP_ENTRY_IN_TRANSITION flag will be set for the entries | |
1166 | * covered by the requested range. | |
1167 | * | |
1168 | * The map must be exclusively locked on entry and will remain locked | |
1169 | * on return. If no range exists or the range contains holes and you | |
1170 | * specified that no holes were allowed, NULL will be returned. This | |
1171 | * routine may temporarily unlock the map in order avoid a deadlock when | |
1172 | * sleeping. | |
1173 | */ | |
1174 | static | |
1175 | vm_map_entry_t | |
a108bf71 MD |
1176 | vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, |
1177 | int *countp, int flags) | |
984263bc MD |
1178 | { |
1179 | vm_map_entry_t start_entry; | |
1180 | vm_map_entry_t entry; | |
1181 | ||
1182 | /* | |
1183 | * Locate the entry and effect initial clipping. The in-transition | |
1184 | * case does not occur very often so do not try to optimize it. | |
1185 | */ | |
1186 | again: | |
1187 | if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) | |
1188 | return (NULL); | |
1189 | entry = start_entry; | |
1190 | if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { | |
1191 | entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; | |
12e4aaff MD |
1192 | ++mycpu->gd_cnt.v_intrans_coll; |
1193 | ++mycpu->gd_cnt.v_intrans_wait; | |
984263bc MD |
1194 | vm_map_transition_wait(map); |
1195 | /* | |
1196 | * entry and/or start_entry may have been clipped while | |
1197 | * we slept, or may have gone away entirely. We have | |
1198 | * to restart from the lookup. | |
1199 | */ | |
1200 | goto again; | |
1201 | } | |
1202 | /* | |
1203 | * Since we hold an exclusive map lock we do not have to restart | |
1204 | * after clipping, even though clipping may block in zalloc. | |
1205 | */ | |
a108bf71 MD |
1206 | vm_map_clip_start(map, entry, start, countp); |
1207 | vm_map_clip_end(map, entry, end, countp); | |
984263bc MD |
1208 | entry->eflags |= MAP_ENTRY_IN_TRANSITION; |
1209 | ||
1210 | /* | |
1211 | * Scan entries covered by the range. When working on the next | |
1212 | * entry a restart need only re-loop on the current entry which | |
1213 | * we have already locked, since 'next' may have changed. Also, | |
1214 | * even though entry is safe, it may have been clipped so we | |
1215 | * have to iterate forwards through the clip after sleeping. | |
1216 | */ | |
1217 | while (entry->next != &map->header && entry->next->start < end) { | |
1218 | vm_map_entry_t next = entry->next; | |
1219 | ||
1220 | if (flags & MAP_CLIP_NO_HOLES) { | |
1221 | if (next->start > entry->end) { | |
1222 | vm_map_unclip_range(map, start_entry, | |
a108bf71 | 1223 | start, entry->end, countp, flags); |
984263bc MD |
1224 | return(NULL); |
1225 | } | |
1226 | } | |
1227 | ||
1228 | if (next->eflags & MAP_ENTRY_IN_TRANSITION) { | |
1229 | vm_offset_t save_end = entry->end; | |
1230 | next->eflags |= MAP_ENTRY_NEEDS_WAKEUP; | |
12e4aaff MD |
1231 | ++mycpu->gd_cnt.v_intrans_coll; |
1232 | ++mycpu->gd_cnt.v_intrans_wait; | |
984263bc MD |
1233 | vm_map_transition_wait(map); |
1234 | ||
1235 | /* | |
1236 | * clips might have occured while we blocked. | |
1237 | */ | |
1238 | CLIP_CHECK_FWD(entry, save_end); | |
1239 | CLIP_CHECK_BACK(start_entry, start); | |
1240 | continue; | |
1241 | } | |
1242 | /* | |
1243 | * No restart necessary even though clip_end may block, we | |
1244 | * are holding the map lock. | |
1245 | */ | |
a108bf71 | 1246 | vm_map_clip_end(map, next, end, countp); |
984263bc MD |
1247 | next->eflags |= MAP_ENTRY_IN_TRANSITION; |
1248 | entry = next; | |
1249 | } | |
1250 | if (flags & MAP_CLIP_NO_HOLES) { | |
1251 | if (entry->end != end) { | |
1252 | vm_map_unclip_range(map, start_entry, | |
a108bf71 | 1253 | start, entry->end, countp, flags); |
984263bc MD |
1254 | return(NULL); |
1255 | } | |
1256 | } | |
1257 | return(start_entry); | |
1258 | } | |
1259 | ||
1260 | /* | |
1261 | * vm_map_unclip_range: [ kernel use only ] | |
1262 | * | |
1263 | * Undo the effect of vm_map_clip_range(). You should pass the same | |
1264 | * flags and the same range that you passed to vm_map_clip_range(). | |
1265 | * This code will clear the in-transition flag on the entries and | |
1266 | * wake up anyone waiting. This code will also simplify the sequence | |
1267 | * and attempt to merge it with entries before and after the sequence. | |
1268 | * | |
1269 | * The map must be locked on entry and will remain locked on return. | |
1270 | * | |
1271 | * Note that you should also pass the start_entry returned by | |
1272 | * vm_map_clip_range(). However, if you block between the two calls | |
1273 | * with the map unlocked please be aware that the start_entry may | |
1274 | * have been clipped and you may need to scan it backwards to find | |
1275 | * the entry corresponding with the original start address. You are | |
1276 | * responsible for this, vm_map_unclip_range() expects the correct | |
1277 | * start_entry to be passed to it and will KASSERT otherwise. | |
1278 | */ | |
1279 | static | |
1280 | void | |
1281 | vm_map_unclip_range( | |
1282 | vm_map_t map, | |
1283 | vm_map_entry_t start_entry, | |
1284 | vm_offset_t start, | |
1285 | vm_offset_t end, | |
a108bf71 | 1286 | int *countp, |
984263bc MD |
1287 | int flags) |
1288 | { | |
1289 | vm_map_entry_t entry; | |
1290 | ||
1291 | entry = start_entry; | |
1292 | ||
1293 | KASSERT(entry->start == start, ("unclip_range: illegal base entry")); | |
1294 | while (entry != &map->header && entry->start < end) { | |
1295 | KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry)); | |
1296 | KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped")); | |
1297 | entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; | |
1298 | if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { | |
1299 | entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; | |
1300 | wakeup(map); | |
1301 | } | |
1302 | entry = entry->next; | |
1303 | } | |
1304 | ||
1305 | /* | |
1306 | * Simplification does not block so there is no restart case. | |
1307 | */ | |
1308 | entry = start_entry; | |
1309 | while (entry != &map->header && entry->start < end) { | |
a108bf71 | 1310 | vm_map_simplify_entry(map, entry, countp); |
984263bc MD |
1311 | entry = entry->next; |
1312 | } | |
1313 | } | |
1314 | ||
1315 | /* | |
1316 | * vm_map_submap: [ kernel use only ] | |
1317 | * | |
1318 | * Mark the given range as handled by a subordinate map. | |
1319 | * | |
1320 | * This range must have been created with vm_map_find, | |
1321 | * and no other operations may have been performed on this | |
1322 | * range prior to calling vm_map_submap. | |
1323 | * | |
1324 | * Only a limited number of operations can be performed | |
1325 | * within this rage after calling vm_map_submap: | |
1326 | * vm_fault | |
1327 | * [Don't try vm_map_copy!] | |
1328 | * | |
1329 | * To remove a submapping, one must first remove the | |
1330 | * range from the superior map, and then destroy the | |
1331 | * submap (if desired). [Better yet, don't try it.] | |
1332 | */ | |
1333 | int | |
a108bf71 | 1334 | vm_map_submap(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap) |
984263bc MD |
1335 | { |
1336 | vm_map_entry_t entry; | |
1337 | int result = KERN_INVALID_ARGUMENT; | |
a108bf71 | 1338 | int count; |
984263bc | 1339 | |
a108bf71 | 1340 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
1341 | vm_map_lock(map); |
1342 | ||
1343 | VM_MAP_RANGE_CHECK(map, start, end); | |
1344 | ||
1345 | if (vm_map_lookup_entry(map, start, &entry)) { | |
a108bf71 | 1346 | vm_map_clip_start(map, entry, start, &count); |
984263bc MD |
1347 | } else { |
1348 | entry = entry->next; | |
1349 | } | |
1350 | ||
a108bf71 | 1351 | vm_map_clip_end(map, entry, end, &count); |
984263bc MD |
1352 | |
1353 | if ((entry->start == start) && (entry->end == end) && | |
1354 | ((entry->eflags & MAP_ENTRY_COW) == 0) && | |
1355 | (entry->object.vm_object == NULL)) { | |
1356 | entry->object.sub_map = submap; | |
1357 | entry->eflags |= MAP_ENTRY_IS_SUB_MAP; | |
1358 | result = KERN_SUCCESS; | |
1359 | } | |
1360 | vm_map_unlock(map); | |
a108bf71 | 1361 | vm_map_entry_release(count); |
984263bc MD |
1362 | |
1363 | return (result); | |
1364 | } | |
1365 | ||
1366 | /* | |
1367 | * vm_map_protect: | |
1368 | * | |
1369 | * Sets the protection of the specified address | |
1370 | * region in the target map. If "set_max" is | |
1371 | * specified, the maximum protection is to be set; | |
1372 | * otherwise, only the current protection is affected. | |
1373 | */ | |
1374 | int | |
1375 | vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, | |
1376 | vm_prot_t new_prot, boolean_t set_max) | |
1377 | { | |
1378 | vm_map_entry_t current; | |
1379 | vm_map_entry_t entry; | |
a108bf71 | 1380 | int count; |
984263bc | 1381 | |
a108bf71 | 1382 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
1383 | vm_map_lock(map); |
1384 | ||
1385 | VM_MAP_RANGE_CHECK(map, start, end); | |
1386 | ||
1387 | if (vm_map_lookup_entry(map, start, &entry)) { | |
a108bf71 | 1388 | vm_map_clip_start(map, entry, start, &count); |
984263bc MD |
1389 | } else { |
1390 | entry = entry->next; | |
1391 | } | |
1392 | ||
1393 | /* | |
1394 | * Make a first pass to check for protection violations. | |
1395 | */ | |
1396 | ||
1397 | current = entry; | |
1398 | while ((current != &map->header) && (current->start < end)) { | |
1399 | if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { | |
1400 | vm_map_unlock(map); | |
a108bf71 | 1401 | vm_map_entry_release(count); |
984263bc MD |
1402 | return (KERN_INVALID_ARGUMENT); |
1403 | } | |
1404 | if ((new_prot & current->max_protection) != new_prot) { | |
1405 | vm_map_unlock(map); | |
a108bf71 | 1406 | vm_map_entry_release(count); |
984263bc MD |
1407 | return (KERN_PROTECTION_FAILURE); |
1408 | } | |
1409 | current = current->next; | |
1410 | } | |
1411 | ||
1412 | /* | |
1413 | * Go back and fix up protections. [Note that clipping is not | |
1414 | * necessary the second time.] | |
1415 | */ | |
984263bc MD |
1416 | current = entry; |
1417 | ||
1418 | while ((current != &map->header) && (current->start < end)) { | |
1419 | vm_prot_t old_prot; | |
1420 | ||
a108bf71 | 1421 | vm_map_clip_end(map, current, end, &count); |
984263bc MD |
1422 | |
1423 | old_prot = current->protection; | |
1424 | if (set_max) | |
1425 | current->protection = | |
1426 | (current->max_protection = new_prot) & | |
1427 | old_prot; | |
1428 | else | |
1429 | current->protection = new_prot; | |
1430 | ||
1431 | /* | |
1432 | * Update physical map if necessary. Worry about copy-on-write | |
1433 | * here -- CHECK THIS XXX | |
1434 | */ | |
1435 | ||
1436 | if (current->protection != old_prot) { | |
1437 | #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ | |
1438 | VM_PROT_ALL) | |
1439 | ||
1440 | pmap_protect(map->pmap, current->start, | |
1441 | current->end, | |
1442 | current->protection & MASK(current)); | |
1443 | #undef MASK | |
1444 | } | |
1445 | ||
a108bf71 | 1446 | vm_map_simplify_entry(map, current, &count); |
984263bc MD |
1447 | |
1448 | current = current->next; | |
1449 | } | |
1450 | ||
1451 | vm_map_unlock(map); | |
a108bf71 | 1452 | vm_map_entry_release(count); |
984263bc MD |
1453 | return (KERN_SUCCESS); |
1454 | } | |
1455 | ||
1456 | /* | |
1457 | * vm_map_madvise: | |
1458 | * | |
1459 | * This routine traverses a processes map handling the madvise | |
1460 | * system call. Advisories are classified as either those effecting | |
1461 | * the vm_map_entry structure, or those effecting the underlying | |
1462 | * objects. | |
1463 | */ | |
1464 | ||
1465 | int | |
a108bf71 | 1466 | vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, int behav) |
984263bc MD |
1467 | { |
1468 | vm_map_entry_t current, entry; | |
1469 | int modify_map = 0; | |
a108bf71 | 1470 | int count; |
984263bc MD |
1471 | |
1472 | /* | |
1473 | * Some madvise calls directly modify the vm_map_entry, in which case | |
1474 | * we need to use an exclusive lock on the map and we need to perform | |
1475 | * various clipping operations. Otherwise we only need a read-lock | |
1476 | * on the map. | |
1477 | */ | |
1478 | ||
a108bf71 MD |
1479 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
1480 | ||
984263bc MD |
1481 | switch(behav) { |
1482 | case MADV_NORMAL: | |
1483 | case MADV_SEQUENTIAL: | |
1484 | case MADV_RANDOM: | |
1485 | case MADV_NOSYNC: | |
1486 | case MADV_AUTOSYNC: | |
1487 | case MADV_NOCORE: | |
1488 | case MADV_CORE: | |
1489 | modify_map = 1; | |
1490 | vm_map_lock(map); | |
1491 | break; | |
1492 | case MADV_WILLNEED: | |
1493 | case MADV_DONTNEED: | |
1494 | case MADV_FREE: | |
1495 | vm_map_lock_read(map); | |
1496 | break; | |
1497 | default: | |
a108bf71 | 1498 | vm_map_entry_release(count); |
984263bc MD |
1499 | return (KERN_INVALID_ARGUMENT); |
1500 | } | |
1501 | ||
1502 | /* | |
1503 | * Locate starting entry and clip if necessary. | |
1504 | */ | |
1505 | ||
1506 | VM_MAP_RANGE_CHECK(map, start, end); | |
1507 | ||
1508 | if (vm_map_lookup_entry(map, start, &entry)) { | |
1509 | if (modify_map) | |
a108bf71 | 1510 | vm_map_clip_start(map, entry, start, &count); |
984263bc MD |
1511 | } else { |
1512 | entry = entry->next; | |
1513 | } | |
1514 | ||
1515 | if (modify_map) { | |
1516 | /* | |
1517 | * madvise behaviors that are implemented in the vm_map_entry. | |
1518 | * | |
1519 | * We clip the vm_map_entry so that behavioral changes are | |
1520 | * limited to the specified address range. | |
1521 | */ | |
1522 | for (current = entry; | |
1523 | (current != &map->header) && (current->start < end); | |
1524 | current = current->next | |
1525 | ) { | |
1526 | if (current->eflags & MAP_ENTRY_IS_SUB_MAP) | |
1527 | continue; | |
1528 | ||
a108bf71 | 1529 | vm_map_clip_end(map, current, end, &count); |
984263bc MD |
1530 | |
1531 | switch (behav) { | |
1532 | case MADV_NORMAL: | |
1533 | vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); | |
1534 | break; | |
1535 | case MADV_SEQUENTIAL: | |
1536 | vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); | |
1537 | break; | |
1538 | case MADV_RANDOM: | |
1539 | vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); | |
1540 | break; | |
1541 | case MADV_NOSYNC: | |
1542 | current->eflags |= MAP_ENTRY_NOSYNC; | |
1543 | break; | |
1544 | case MADV_AUTOSYNC: | |
1545 | current->eflags &= ~MAP_ENTRY_NOSYNC; | |
1546 | break; | |
1547 | case MADV_NOCORE: | |
1548 | current->eflags |= MAP_ENTRY_NOCOREDUMP; | |
1549 | break; | |
1550 | case MADV_CORE: | |
1551 | current->eflags &= ~MAP_ENTRY_NOCOREDUMP; | |
1552 | break; | |
1553 | default: | |
1554 | break; | |
1555 | } | |
a108bf71 | 1556 | vm_map_simplify_entry(map, current, &count); |
984263bc MD |
1557 | } |
1558 | vm_map_unlock(map); | |
1559 | } else { | |
1560 | vm_pindex_t pindex; | |
1561 | int count; | |
1562 | ||
1563 | /* | |
1564 | * madvise behaviors that are implemented in the underlying | |
1565 | * vm_object. | |
1566 | * | |
1567 | * Since we don't clip the vm_map_entry, we have to clip | |
1568 | * the vm_object pindex and count. | |
1569 | */ | |
1570 | for (current = entry; | |
1571 | (current != &map->header) && (current->start < end); | |
1572 | current = current->next | |
1573 | ) { | |
1574 | vm_offset_t useStart; | |
1575 | ||
1576 | if (current->eflags & MAP_ENTRY_IS_SUB_MAP) | |
1577 | continue; | |
1578 | ||
1579 | pindex = OFF_TO_IDX(current->offset); | |
1580 | count = atop(current->end - current->start); | |
1581 | useStart = current->start; | |
1582 | ||
1583 | if (current->start < start) { | |
1584 | pindex += atop(start - current->start); | |
1585 | count -= atop(start - current->start); | |
1586 | useStart = start; | |
1587 | } | |
1588 | if (current->end > end) | |
1589 | count -= atop(current->end - end); | |
1590 | ||
1591 | if (count <= 0) | |
1592 | continue; | |
1593 | ||
1594 | vm_object_madvise(current->object.vm_object, | |
1595 | pindex, count, behav); | |
1596 | if (behav == MADV_WILLNEED) { | |
1597 | pmap_object_init_pt( | |
1598 | map->pmap, | |
1599 | useStart, | |
1600 | current->object.vm_object, | |
1601 | pindex, | |
1602 | (count << PAGE_SHIFT), | |
1603 | MAP_PREFAULT_MADVISE | |
1604 | ); | |
1605 | } | |
1606 | } | |
1607 | vm_map_unlock_read(map); | |
1608 | } | |
a108bf71 | 1609 | vm_map_entry_release(count); |
984263bc MD |
1610 | return(0); |
1611 | } | |
1612 | ||
1613 | ||
1614 | /* | |
1615 | * vm_map_inherit: | |
1616 | * | |
1617 | * Sets the inheritance of the specified address | |
1618 | * range in the target map. Inheritance | |
1619 | * affects how the map will be shared with | |
1620 | * child maps at the time of vm_map_fork. | |
1621 | */ | |
1622 | int | |
1623 | vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, | |
1624 | vm_inherit_t new_inheritance) | |
1625 | { | |
1626 | vm_map_entry_t entry; | |
1627 | vm_map_entry_t temp_entry; | |
a108bf71 | 1628 | int count; |
984263bc MD |
1629 | |
1630 | switch (new_inheritance) { | |
1631 | case VM_INHERIT_NONE: | |
1632 | case VM_INHERIT_COPY: | |
1633 | case VM_INHERIT_SHARE: | |
1634 | break; | |
1635 | default: | |
1636 | return (KERN_INVALID_ARGUMENT); | |
1637 | } | |
1638 | ||
a108bf71 | 1639 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
1640 | vm_map_lock(map); |
1641 | ||
1642 | VM_MAP_RANGE_CHECK(map, start, end); | |
1643 | ||
1644 | if (vm_map_lookup_entry(map, start, &temp_entry)) { | |
1645 | entry = temp_entry; | |
a108bf71 | 1646 | vm_map_clip_start(map, entry, start, &count); |
984263bc MD |
1647 | } else |
1648 | entry = temp_entry->next; | |
1649 | ||
1650 | while ((entry != &map->header) && (entry->start < end)) { | |
a108bf71 | 1651 | vm_map_clip_end(map, entry, end, &count); |
984263bc MD |
1652 | |
1653 | entry->inheritance = new_inheritance; | |
1654 | ||
a108bf71 | 1655 | vm_map_simplify_entry(map, entry, &count); |
984263bc MD |
1656 | |
1657 | entry = entry->next; | |
1658 | } | |
984263bc | 1659 | vm_map_unlock(map); |
a108bf71 | 1660 | vm_map_entry_release(count); |
984263bc MD |
1661 | return (KERN_SUCCESS); |
1662 | } | |
1663 | ||
1664 | /* | |
1665 | * Implement the semantics of mlock | |
1666 | */ | |
1667 | int | |
cde87949 | 1668 | vm_map_unwire(map, start, real_end, new_pageable) |
984263bc MD |
1669 | vm_map_t map; |
1670 | vm_offset_t start; | |
1671 | vm_offset_t real_end; | |
1672 | boolean_t new_pageable; | |
1673 | { | |
1674 | vm_map_entry_t entry; | |
1675 | vm_map_entry_t start_entry; | |
1676 | vm_offset_t end; | |
1677 | int rv = KERN_SUCCESS; | |
a108bf71 | 1678 | int count; |
984263bc | 1679 | |
a108bf71 | 1680 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
1681 | vm_map_lock(map); |
1682 | VM_MAP_RANGE_CHECK(map, start, real_end); | |
1683 | end = real_end; | |
1684 | ||
a108bf71 | 1685 | start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES); |
984263bc MD |
1686 | if (start_entry == NULL) { |
1687 | vm_map_unlock(map); | |
a108bf71 | 1688 | vm_map_entry_release(count); |
984263bc MD |
1689 | return (KERN_INVALID_ADDRESS); |
1690 | } | |
1691 | ||
1692 | if (new_pageable == 0) { | |
1693 | entry = start_entry; | |
1694 | while ((entry != &map->header) && (entry->start < end)) { | |
1695 | vm_offset_t save_start; | |
1696 | vm_offset_t save_end; | |
1697 | ||
1698 | /* | |
1699 | * Already user wired or hard wired (trivial cases) | |
1700 | */ | |
1701 | if (entry->eflags & MAP_ENTRY_USER_WIRED) { | |
1702 | entry = entry->next; | |
1703 | continue; | |
1704 | } | |
1705 | if (entry->wired_count != 0) { | |
1706 | entry->wired_count++; | |
1707 | entry->eflags |= MAP_ENTRY_USER_WIRED; | |
1708 | entry = entry->next; | |
1709 | continue; | |
1710 | } | |
1711 | ||
1712 | /* | |
1713 | * A new wiring requires instantiation of appropriate | |
1714 | * management structures and the faulting in of the | |
1715 | * page. | |
1716 | */ | |
1717 | if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { | |
1718 | int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; | |
1719 | if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) { | |
1720 | ||
1721 | vm_object_shadow(&entry->object.vm_object, | |
1722 | &entry->offset, | |
1723 | atop(entry->end - entry->start)); | |
1724 | entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; | |
1725 | ||
1726 | } else if (entry->object.vm_object == NULL && | |
1727 | !map->system_map) { | |
1728 | ||
1729 | entry->object.vm_object = | |
1730 | vm_object_allocate(OBJT_DEFAULT, | |
1731 | atop(entry->end - entry->start)); | |
1732 | entry->offset = (vm_offset_t) 0; | |
1733 | ||
1734 | } | |
1735 | } | |
1736 | entry->wired_count++; | |
1737 | entry->eflags |= MAP_ENTRY_USER_WIRED; | |
1738 | ||
1739 | /* | |
1740 | * Now fault in the area. The map lock needs to be | |
1741 | * manipulated to avoid deadlocks. The in-transition | |
1742 | * flag protects the entries. | |
1743 | */ | |
1744 | save_start = entry->start; | |
1745 | save_end = entry->end; | |
1746 | vm_map_unlock(map); | |
1747 | map->timestamp++; | |
1748 | rv = vm_fault_user_wire(map, save_start, save_end); | |
1749 | vm_map_lock(map); | |
1750 | if (rv) { | |
1751 | CLIP_CHECK_BACK(entry, save_start); | |
1752 | for (;;) { | |
1753 | KASSERT(entry->wired_count == 1, ("bad wired_count on entry")); | |
1754 | entry->eflags &= ~MAP_ENTRY_USER_WIRED; | |
1755 | entry->wired_count = 0; | |
1756 | if (entry->end == save_end) | |
1757 | break; | |
1758 | entry = entry->next; | |
1759 | KASSERT(entry != &map->header, ("bad entry clip during backout")); | |
1760 | } | |
1761 | end = save_start; /* unwire the rest */ | |
1762 | break; | |
1763 | } | |
1764 | /* | |
1765 | * note that even though the entry might have been | |
1766 | * clipped, the USER_WIRED flag we set prevents | |
1767 | * duplication so we do not have to do a | |
1768 | * clip check. | |
1769 | */ | |
1770 | entry = entry->next; | |
1771 | } | |
1772 | ||
1773 | /* | |
1774 | * If we failed fall through to the unwiring section to | |
1775 | * unwire what we had wired so far. 'end' has already | |
1776 | * been adjusted. | |
1777 | */ | |
1778 | if (rv) | |
1779 | new_pageable = 1; | |
1780 | ||
1781 | /* | |
1782 | * start_entry might have been clipped if we unlocked the | |
1783 | * map and blocked. No matter how clipped it has gotten | |
1784 | * there should be a fragment that is on our start boundary. | |
1785 | */ | |
1786 | CLIP_CHECK_BACK(start_entry, start); | |
1787 | } | |
1788 | ||
1789 | /* | |
1790 | * Deal with the unwiring case. | |
1791 | */ | |
1792 | if (new_pageable) { | |
1793 | /* | |
1794 | * This is the unwiring case. We must first ensure that the | |
1795 | * range to be unwired is really wired down. We know there | |
1796 | * are no holes. | |
1797 | */ | |
1798 | entry = start_entry; | |
1799 | while ((entry != &map->header) && (entry->start < end)) { | |
1800 | if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { | |
1801 | rv = KERN_INVALID_ARGUMENT; | |
1802 | goto done; | |
1803 | } | |
1804 | KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry)); | |
1805 | entry = entry->next; | |
1806 | } | |
1807 | ||
1808 | /* | |
1809 | * Now decrement the wiring count for each region. If a region | |
1810 | * becomes completely unwired, unwire its physical pages and | |
1811 | * mappings. | |
1812 | */ | |
b4eddbac DR |
1813 | /* |
1814 | * The map entries are processed in a loop, checking to | |
1815 | * make sure the entry is wired and asserting it has a wired | |
1816 | * count. However, another loop was inserted more-or-less in | |
1817 | * the middle of the unwiring path. This loop picks up the | |
1818 | * "entry" loop variable from the first loop without first | |
1819 | * setting it to start_entry. Naturally, the secound loop | |
1820 | * is never entered and the pages backing the entries are | |
1821 | * never unwired. This can lead to a leak of wired pages. | |
1822 | */ | |
1823 | entry = start_entry; | |
984263bc MD |
1824 | while ((entry != &map->header) && (entry->start < end)) { |
1825 | KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED, ("expected USER_WIRED on entry %p", entry)); | |
1826 | entry->eflags &= ~MAP_ENTRY_USER_WIRED; | |
1827 | entry->wired_count--; | |
1828 | if (entry->wired_count == 0) | |
1829 | vm_fault_unwire(map, entry->start, entry->end); | |
1830 | entry = entry->next; | |
1831 | } | |
1832 | } | |
1833 | done: | |
a108bf71 | 1834 | vm_map_unclip_range(map, start_entry, start, real_end, &count, |
984263bc MD |
1835 | MAP_CLIP_NO_HOLES); |
1836 | map->timestamp++; | |
1837 | vm_map_unlock(map); | |
a108bf71 | 1838 | vm_map_entry_release(count); |
984263bc MD |
1839 | return (rv); |
1840 | } | |
1841 | ||
1842 | /* | |
cde87949 | 1843 | * vm_map_wire: |
984263bc MD |
1844 | * |
1845 | * Sets the pageability of the specified address | |
1846 | * range in the target map. Regions specified | |
1847 | * as not pageable require locked-down physical | |
1848 | * memory and physical page maps. | |
1849 | * | |
1850 | * The map must not be locked, but a reference | |
1851 | * must remain to the map throughout the call. | |
a108bf71 MD |
1852 | * |
1853 | * This function may be called via the zalloc path and must properly | |
1854 | * reserve map entries for kernel_map. | |
984263bc MD |
1855 | */ |
1856 | int | |
e1359933 | 1857 | vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags) |
984263bc MD |
1858 | { |
1859 | vm_map_entry_t entry; | |
1860 | vm_map_entry_t start_entry; | |
1861 | vm_offset_t end; | |
1862 | int rv = KERN_SUCCESS; | |
a108bf71 | 1863 | int count; |
984263bc MD |
1864 | int s; |
1865 | ||
e1359933 | 1866 | if (kmflags & KM_KRESERVE) |
a108bf71 | 1867 | count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); |
a108bf71 MD |
1868 | else |
1869 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); | |
984263bc MD |
1870 | vm_map_lock(map); |
1871 | VM_MAP_RANGE_CHECK(map, start, real_end); | |
1872 | end = real_end; | |
1873 | ||
a108bf71 | 1874 | start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES); |
984263bc MD |
1875 | if (start_entry == NULL) { |
1876 | vm_map_unlock(map); | |
a108bf71 MD |
1877 | rv = KERN_INVALID_ADDRESS; |
1878 | goto failure; | |
984263bc | 1879 | } |
e1359933 | 1880 | if ((kmflags & KM_PAGEABLE) == 0) { |
984263bc MD |
1881 | /* |
1882 | * Wiring. | |
1883 | * | |
1884 | * 1. Holding the write lock, we create any shadow or zero-fill | |
1885 | * objects that need to be created. Then we clip each map | |
1886 | * entry to the region to be wired and increment its wiring | |
1887 | * count. We create objects before clipping the map entries | |
1888 | * to avoid object proliferation. | |
1889 | * | |
1890 | * 2. We downgrade to a read lock, and call vm_fault_wire to | |
1891 | * fault in the pages for any newly wired area (wired_count is | |
1892 | * 1). | |
1893 | * | |
1894 | * Downgrading to a read lock for vm_fault_wire avoids a | |
1895 | * possible deadlock with another process that may have faulted | |
1896 | * on one of the pages to be wired (it would mark the page busy, | |
1897 | * blocking us, then in turn block on the map lock that we | |
1898 | * hold). Because of problems in the recursive lock package, | |
1899 | * we cannot upgrade to a write lock in vm_map_lookup. Thus, | |
1900 | * any actions that require the write lock must be done | |
1901 | * beforehand. Because we keep the read lock on the map, the | |
1902 | * copy-on-write status of the entries we modify here cannot | |
1903 | * change. | |
1904 | */ | |
1905 | ||
1906 | entry = start_entry; | |
1907 | while ((entry != &map->header) && (entry->start < end)) { | |
1908 | /* | |
1909 | * Trivial case if the entry is already wired | |
1910 | */ | |
1911 | if (entry->wired_count) { | |
1912 | entry->wired_count++; | |
1913 | entry = entry->next; | |
1914 | continue; | |
1915 | } | |
1916 | ||
1917 | /* | |
1918 | * The entry is being newly wired, we have to setup | |
1919 | * appropriate management structures. A shadow | |
1920 | * object is required for a copy-on-write region, | |
1921 | * or a normal object for a zero-fill region. We | |
1922 | * do not have to do this for entries that point to sub | |
1923 | * maps because we won't hold the lock on the sub map. | |
1924 | */ | |
1925 | if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { | |
1926 | int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; | |
1927 | if (copyflag && | |
1928 | ((entry->protection & VM_PROT_WRITE) != 0)) { | |
1929 | ||
1930 | vm_object_shadow(&entry->object.vm_object, | |
1931 | &entry->offset, | |
1932 | atop(entry->end - entry->start)); | |
1933 | entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; | |
1934 | } else if (entry->object.vm_object == NULL && | |
1935 | !map->system_map) { | |
1936 | entry->object.vm_object = | |
1937 | vm_object_allocate(OBJT_DEFAULT, | |
1938 | atop(entry->end - entry->start)); | |
1939 | entry->offset = (vm_offset_t) 0; | |
1940 | } | |
1941 | } | |
1942 | ||
1943 | entry->wired_count++; | |
1944 | entry = entry->next; | |
1945 | } | |
1946 | ||
1947 | /* | |
1948 | * Pass 2. | |
1949 | */ | |
1950 | ||
1951 | /* | |
1952 | * HACK HACK HACK HACK | |
1953 | * | |
1954 | * Unlock the map to avoid deadlocks. The in-transit flag | |
1955 | * protects us from most changes but note that | |
1956 | * clipping may still occur. To prevent clipping from | |
1957 | * occuring after the unlock, except for when we are | |
1958 | * blocking in vm_fault_wire, we must run at splvm(). | |
1959 | * Otherwise our accesses to entry->start and entry->end | |
1960 | * could be corrupted. We have to set splvm() prior to | |
1961 | * unlocking so start_entry does not change out from | |
1962 | * under us at the very beginning of the loop. | |
1963 | * | |
1964 | * HACK HACK HACK HACK | |
1965 | */ | |
1966 | ||
1967 | s = splvm(); | |
1968 | vm_map_unlock(map); | |
1969 | ||
1970 | entry = start_entry; | |
1971 | while (entry != &map->header && entry->start < end) { | |
1972 | /* | |
1973 | * If vm_fault_wire fails for any page we need to undo | |
1974 | * what has been done. We decrement the wiring count | |
1975 | * for those pages which have not yet been wired (now) | |
1976 | * and unwire those that have (later). | |
1977 | */ | |
1978 | vm_offset_t save_start = entry->start; | |
1979 | vm_offset_t save_end = entry->end; | |
1980 | ||
1981 | if (entry->wired_count == 1) | |
1982 | rv = vm_fault_wire(map, entry->start, entry->end); | |
1983 | if (rv) { | |
1984 | CLIP_CHECK_BACK(entry, save_start); | |
1985 | for (;;) { | |
1986 | KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly")); | |
1987 | entry->wired_count = 0; | |
1988 | if (entry->end == save_end) | |
1989 | break; | |
1990 | entry = entry->next; | |
1991 | KASSERT(entry != &map->header, ("bad entry clip during backout")); | |
1992 | } | |
1993 | end = save_start; | |
1994 | break; | |
1995 | } | |
1996 | CLIP_CHECK_FWD(entry, save_end); | |
1997 | entry = entry->next; | |
1998 | } | |
1999 | splx(s); | |
2000 | ||
2001 | /* | |
2002 | * relock. start_entry is still IN_TRANSITION and must | |
2003 | * still exist, but may have been clipped (handled just | |
2004 | * below). | |
2005 | */ | |
2006 | vm_map_lock(map); | |
2007 | ||
2008 | /* | |
2009 | * If a failure occured undo everything by falling through | |
2010 | * to the unwiring code. 'end' has already been adjusted | |
2011 | * appropriately. | |
2012 | */ | |
2013 | if (rv) | |
e1359933 | 2014 | kmflags |= KM_PAGEABLE; |
984263bc MD |
2015 | |
2016 | /* | |
2017 | * start_entry might have been clipped if we unlocked the | |
2018 | * map and blocked. No matter how clipped it has gotten | |
2019 | * there should be a fragment that is on our start boundary. | |
2020 | */ | |
2021 | CLIP_CHECK_BACK(start_entry, start); | |
2022 | } | |
2023 | ||
e1359933 | 2024 | if (kmflags & KM_PAGEABLE) { |
984263bc MD |
2025 | /* |
2026 | * This is the unwiring case. We must first ensure that the | |
2027 | * range to be unwired is really wired down. We know there | |
2028 | * are no holes. | |
2029 | */ | |
2030 | entry = start_entry; | |
2031 | while ((entry != &map->header) && (entry->start < end)) { | |
2032 | if (entry->wired_count == 0) { | |
2033 | rv = KERN_INVALID_ARGUMENT; | |
2034 | goto done; | |
2035 | } | |
2036 | entry = entry->next; | |
2037 | } | |
2038 | ||
2039 | /* | |
2040 | * Now decrement the wiring count for each region. If a region | |
2041 | * becomes completely unwired, unwire its physical pages and | |
2042 | * mappings. | |
2043 | */ | |
2044 | entry = start_entry; | |
2045 | while ((entry != &map->header) && (entry->start < end)) { | |
2046 | entry->wired_count--; | |
2047 | if (entry->wired_count == 0) | |
2048 | vm_fault_unwire(map, entry->start, entry->end); | |
2049 | entry = entry->next; | |
2050 | } | |
2051 | } | |
2052 | done: | |
a108bf71 | 2053 | vm_map_unclip_range(map, start_entry, start, real_end, &count, |
984263bc MD |
2054 | MAP_CLIP_NO_HOLES); |
2055 | map->timestamp++; | |
2056 | vm_map_unlock(map); | |
a108bf71 | 2057 | failure: |
e1359933 | 2058 | if (kmflags & KM_KRESERVE) |
a108bf71 | 2059 | vm_map_entry_krelease(count); |
a108bf71 MD |
2060 | else |
2061 | vm_map_entry_release(count); | |
984263bc MD |
2062 | return (rv); |
2063 | } | |
2064 | ||
a108bf71 MD |
2065 | /* |
2066 | * vm_map_set_wired_quick() | |
2067 | * | |
2068 | * Mark a newly allocated address range as wired but do not fault in | |
2069 | * the pages. The caller is expected to load the pages into the object. | |
2070 | * | |
2071 | * The map must be locked on entry and will remain locked on return. | |
2072 | */ | |
2073 | void | |
2074 | vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *countp) | |
2075 | { | |
2076 | vm_map_entry_t scan; | |
2077 | vm_map_entry_t entry; | |
2078 | ||
2079 | entry = vm_map_clip_range(map, addr, addr + size, countp, MAP_CLIP_NO_HOLES); | |
2080 | for (scan = entry; scan != &map->header && scan->start < addr + size; scan = scan->next) { | |
2081 | KKASSERT(entry->wired_count == 0); | |
2082 | entry->wired_count = 1; | |
2083 | } | |
2084 | vm_map_unclip_range(map, entry, addr, addr + size, countp, MAP_CLIP_NO_HOLES); | |
2085 | } | |
2086 | ||
984263bc MD |
2087 | /* |
2088 | * vm_map_clean | |
2089 | * | |
2090 | * Push any dirty cached pages in the address range to their pager. | |
2091 | * If syncio is TRUE, dirty pages are written synchronously. | |
2092 | * If invalidate is TRUE, any cached pages are freed as well. | |
2093 | * | |
2094 | * Returns an error if any part of the specified range is not mapped. | |
2095 | */ | |
2096 | int | |
2097 | vm_map_clean(map, start, end, syncio, invalidate) | |
2098 | vm_map_t map; | |
2099 | vm_offset_t start; | |
2100 | vm_offset_t end; | |
2101 | boolean_t syncio; | |
2102 | boolean_t invalidate; | |
2103 | { | |
2104 | vm_map_entry_t current; | |
2105 | vm_map_entry_t entry; | |
2106 | vm_size_t size; | |
2107 | vm_object_t object; | |
2108 | vm_ooffset_t offset; | |
2109 | ||
2110 | vm_map_lock_read(map); | |
2111 | VM_MAP_RANGE_CHECK(map, start, end); | |
2112 | if (!vm_map_lookup_entry(map, start, &entry)) { | |
2113 | vm_map_unlock_read(map); | |
2114 | return (KERN_INVALID_ADDRESS); | |
2115 | } | |
2116 | /* | |
2117 | * Make a first pass to check for holes. | |
2118 | */ | |
2119 | for (current = entry; current->start < end; current = current->next) { | |
2120 | if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { | |
2121 | vm_map_unlock_read(map); | |
2122 | return (KERN_INVALID_ARGUMENT); | |
2123 | } | |
2124 | if (end > current->end && | |
2125 | (current->next == &map->header || | |
2126 | current->end != current->next->start)) { | |
2127 | vm_map_unlock_read(map); | |
2128 | return (KERN_INVALID_ADDRESS); | |
2129 | } | |
2130 | } | |
2131 | ||
2132 | if (invalidate) | |
2133 | pmap_remove(vm_map_pmap(map), start, end); | |
2134 | /* | |
2135 | * Make a second pass, cleaning/uncaching pages from the indicated | |
2136 | * objects as we go. | |
2137 | */ | |
2138 | for (current = entry; current->start < end; current = current->next) { | |
2139 | offset = current->offset + (start - current->start); | |
2140 | size = (end <= current->end ? end : current->end) - start; | |
2141 | if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { | |
2142 | vm_map_t smap; | |
2143 | vm_map_entry_t tentry; | |
2144 | vm_size_t tsize; | |
2145 | ||
2146 | smap = current->object.sub_map; | |
2147 | vm_map_lock_read(smap); | |
2148 | (void) vm_map_lookup_entry(smap, offset, &tentry); | |
2149 | tsize = tentry->end - offset; | |
2150 | if (tsize < size) | |
2151 | size = tsize; | |
2152 | object = tentry->object.vm_object; | |
2153 | offset = tentry->offset + (offset - tentry->start); | |
2154 | vm_map_unlock_read(smap); | |
2155 | } else { | |
2156 | object = current->object.vm_object; | |
2157 | } | |
2158 | /* | |
2159 | * Note that there is absolutely no sense in writing out | |
2160 | * anonymous objects, so we track down the vnode object | |
2161 | * to write out. | |
2162 | * We invalidate (remove) all pages from the address space | |
2163 | * anyway, for semantic correctness. | |
2164 | * | |
2165 | * note: certain anonymous maps, such as MAP_NOSYNC maps, | |
2166 | * may start out with a NULL object. | |
2167 | */ | |
2168 | while (object && object->backing_object) { | |
2169 | object = object->backing_object; | |
2170 | offset += object->backing_object_offset; | |
2171 | if (object->size < OFF_TO_IDX( offset + size)) | |
2172 | size = IDX_TO_OFF(object->size) - offset; | |
2173 | } | |
2174 | if (object && (object->type == OBJT_VNODE) && | |
2175 | (current->protection & VM_PROT_WRITE)) { | |
2176 | /* | |
2177 | * Flush pages if writing is allowed, invalidate them | |
2178 | * if invalidation requested. Pages undergoing I/O | |
2179 | * will be ignored by vm_object_page_remove(). | |
2180 | * | |
2181 | * We cannot lock the vnode and then wait for paging | |
2182 | * to complete without deadlocking against vm_fault. | |
2183 | * Instead we simply call vm_object_page_remove() and | |
2184 | * allow it to block internally on a page-by-page | |
2185 | * basis when it encounters pages undergoing async | |
2186 | * I/O. | |
2187 | */ | |
2188 | int flags; | |
2189 | ||
2190 | vm_object_reference(object); | |
dadab5e9 | 2191 | vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread); |
984263bc MD |
2192 | flags = (syncio || invalidate) ? OBJPC_SYNC : 0; |
2193 | flags |= invalidate ? OBJPC_INVAL : 0; | |
2194 | vm_object_page_clean(object, | |
2195 | OFF_TO_IDX(offset), | |
2196 | OFF_TO_IDX(offset + size + PAGE_MASK), | |
2197 | flags); | |
dadab5e9 | 2198 | VOP_UNLOCK(object->handle, 0, curthread); |
984263bc MD |
2199 | vm_object_deallocate(object); |
2200 | } | |
2201 | if (object && invalidate && | |
2202 | ((object->type == OBJT_VNODE) || | |
2203 | (object->type == OBJT_DEVICE))) { | |
2204 | vm_object_reference(object); | |
2205 | vm_object_page_remove(object, | |
2206 | OFF_TO_IDX(offset), | |
2207 | OFF_TO_IDX(offset + size + PAGE_MASK), | |
2208 | FALSE); | |
2209 | vm_object_deallocate(object); | |
2210 | } | |
2211 | start += size; | |
2212 | } | |
2213 | ||
2214 | vm_map_unlock_read(map); | |
2215 | return (KERN_SUCCESS); | |
2216 | } | |
2217 | ||
2218 | /* | |
2219 | * vm_map_entry_unwire: [ internal use only ] | |
2220 | * | |
2221 | * Make the region specified by this entry pageable. | |
2222 | * | |
2223 | * The map in question should be locked. | |
2224 | * [This is the reason for this routine's existence.] | |
2225 | */ | |
2226 | static void | |
a108bf71 | 2227 | vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) |
984263bc MD |
2228 | { |
2229 | vm_fault_unwire(map, entry->start, entry->end); | |
2230 | entry->wired_count = 0; | |
2231 | } | |
2232 | ||
2233 | /* | |
2234 | * vm_map_entry_delete: [ internal use only ] | |
2235 | * | |
2236 | * Deallocate the given entry from the target map. | |
2237 | */ | |
2238 | static void | |
a108bf71 | 2239 | vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp) |
984263bc MD |
2240 | { |
2241 | vm_map_entry_unlink(map, entry); | |
2242 | map->size -= entry->end - entry->start; | |
2243 | ||
2244 | if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { | |
2245 | vm_object_deallocate(entry->object.vm_object); | |
2246 | } | |
2247 | ||
a108bf71 | 2248 | vm_map_entry_dispose(map, entry, countp); |
984263bc MD |
2249 | } |
2250 | ||
2251 | /* | |
2252 | * vm_map_delete: [ internal use only ] | |
2253 | * | |
2254 | * Deallocates the given address range from the target | |
2255 | * map. | |
2256 | */ | |
2257 | int | |
a108bf71 | 2258 | vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, int *countp) |
984263bc MD |
2259 | { |
2260 | vm_object_t object; | |
2261 | vm_map_entry_t entry; | |
2262 | vm_map_entry_t first_entry; | |
2263 | ||
2264 | /* | |
2265 | * Find the start of the region, and clip it | |
2266 | */ | |
2267 | ||
2268 | again: | |
2269 | if (!vm_map_lookup_entry(map, start, &first_entry)) | |
2270 | entry = first_entry->next; | |
2271 | else { | |
2272 | entry = first_entry; | |
a108bf71 | 2273 | vm_map_clip_start(map, entry, start, countp); |
984263bc MD |
2274 | /* |
2275 | * Fix the lookup hint now, rather than each time though the | |
2276 | * loop. | |
2277 | */ | |
2278 | SAVE_HINT(map, entry->prev); | |
2279 | } | |
2280 | ||
2281 | /* | |
2282 | * Save the free space hint | |
2283 | */ | |
2284 | ||
2285 | if (entry == &map->header) { | |
2286 | map->first_free = &map->header; | |
2287 | } else if (map->first_free->start >= start) { | |
2288 | map->first_free = entry->prev; | |
2289 | } | |
2290 | ||
2291 | /* | |
2292 | * Step through all entries in this region | |
2293 | */ | |
2294 | ||
2295 | while ((entry != &map->header) && (entry->start < end)) { | |
2296 | vm_map_entry_t next; | |
2297 | vm_offset_t s, e; | |
2298 | vm_pindex_t offidxstart, offidxend, count; | |
2299 | ||
2300 | /* | |
2301 | * If we hit an in-transition entry we have to sleep and | |
2302 | * retry. It's easier (and not really slower) to just retry | |
2303 | * since this case occurs so rarely and the hint is already | |
2304 | * pointing at the right place. We have to reset the | |
2305 | * start offset so as not to accidently delete an entry | |
2306 | * another process just created in vacated space. | |
2307 | */ | |
2308 | if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { | |
2309 | entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; | |
2310 | start = entry->start; | |
12e4aaff MD |
2311 | ++mycpu->gd_cnt.v_intrans_coll; |
2312 | ++mycpu->gd_cnt.v_intrans_wait; | |
984263bc MD |
2313 | vm_map_transition_wait(map); |
2314 | goto again; | |
2315 | } | |
a108bf71 | 2316 | vm_map_clip_end(map, entry, end, countp); |
984263bc MD |
2317 | |
2318 | s = entry->start; | |
2319 | e = entry->end; | |
2320 | next = entry->next; | |
2321 | ||
2322 | offidxstart = OFF_TO_IDX(entry->offset); | |
2323 | count = OFF_TO_IDX(e - s); | |
2324 | object = entry->object.vm_object; | |
2325 | ||
2326 | /* | |
2327 | * Unwire before removing addresses from the pmap; otherwise, | |
2328 | * unwiring will put the entries back in the pmap. | |
2329 | */ | |
2330 | if (entry->wired_count != 0) { | |
2331 | vm_map_entry_unwire(map, entry); | |
2332 | } | |
2333 | ||
2334 | offidxend = offidxstart + count; | |
2335 | ||
2336 | if ((object == kernel_object) || (object == kmem_object)) { | |
2337 | vm_object_page_remove(object, offidxstart, offidxend, FALSE); | |
2338 | } else { | |
2339 | pmap_remove(map->pmap, s, e); | |
2340 | if (object != NULL && | |
2341 | object->ref_count != 1 && | |
2342 | (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING && | |
2343 | (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { | |
2344 | vm_object_collapse(object); | |
2345 | vm_object_page_remove(object, offidxstart, offidxend, FALSE); | |
2346 | if (object->type == OBJT_SWAP) { | |
2347 | swap_pager_freespace(object, offidxstart, count); | |
2348 | } | |
2349 | if (offidxend >= object->size && | |
2350 | offidxstart < object->size) { | |
2351 | object->size = offidxstart; | |
2352 | } | |
2353 | } | |
2354 | } | |
2355 | ||
2356 | /* | |
2357 | * Delete the entry (which may delete the object) only after | |
2358 | * removing all pmap entries pointing to its pages. | |
2359 | * (Otherwise, its page frames may be reallocated, and any | |
2360 | * modify bits will be set in the wrong object!) | |
2361 | */ | |
a108bf71 | 2362 | vm_map_entry_delete(map, entry, countp); |
984263bc MD |
2363 | entry = next; |
2364 | } | |
2365 | return (KERN_SUCCESS); | |
2366 | } | |
2367 | ||
2368 | /* | |
2369 | * vm_map_remove: | |
2370 | * | |
2371 | * Remove the given address range from the target map. | |
2372 | * This is the exported form of vm_map_delete. | |
2373 | */ | |
2374 | int | |
a108bf71 | 2375 | vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) |
984263bc | 2376 | { |
03aa8d99 | 2377 | int result; |
a108bf71 | 2378 | int count; |
984263bc | 2379 | |
a108bf71 | 2380 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
2381 | vm_map_lock(map); |
2382 | VM_MAP_RANGE_CHECK(map, start, end); | |
a108bf71 | 2383 | result = vm_map_delete(map, start, end, &count); |
984263bc | 2384 | vm_map_unlock(map); |
a108bf71 | 2385 | vm_map_entry_release(count); |
984263bc | 2386 | |
984263bc MD |
2387 | return (result); |
2388 | } | |
2389 | ||
2390 | /* | |
2391 | * vm_map_check_protection: | |
2392 | * | |
2393 | * Assert that the target map allows the specified | |
2394 | * privilege on the entire address region given. | |
2395 | * The entire region must be allocated. | |
2396 | */ | |
2397 | boolean_t | |
2398 | vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, | |
2399 | vm_prot_t protection) | |
2400 | { | |
2401 | vm_map_entry_t entry; | |
2402 | vm_map_entry_t tmp_entry; | |
2403 | ||
2404 | if (!vm_map_lookup_entry(map, start, &tmp_entry)) { | |
2405 | return (FALSE); | |
2406 | } | |
2407 | entry = tmp_entry; | |
2408 | ||
2409 | while (start < end) { | |
2410 | if (entry == &map->header) { | |
2411 | return (FALSE); | |
2412 | } | |
2413 | /* | |
2414 | * No holes allowed! | |
2415 | */ | |
2416 | ||
2417 | if (start < entry->start) { | |
2418 | return (FALSE); | |
2419 | } | |
2420 | /* | |
2421 | * Check protection associated with entry. | |
2422 | */ | |
2423 | ||
2424 | if ((entry->protection & protection) != protection) { | |
2425 | return (FALSE); | |
2426 | } | |
2427 | /* go to next entry */ | |
2428 | ||
2429 | start = entry->end; | |
2430 | entry = entry->next; | |
2431 | } | |
2432 | return (TRUE); | |
2433 | } | |
2434 | ||
2435 | /* | |
2436 | * Split the pages in a map entry into a new object. This affords | |
2437 | * easier removal of unused pages, and keeps object inheritance from | |
2438 | * being a negative impact on memory usage. | |
2439 | */ | |
2440 | static void | |
a108bf71 | 2441 | vm_map_split(vm_map_entry_t entry) |
984263bc MD |
2442 | { |
2443 | vm_page_t m; | |
2444 | vm_object_t orig_object, new_object, source; | |
2445 | vm_offset_t s, e; | |
2446 | vm_pindex_t offidxstart, offidxend, idx; | |
2447 | vm_size_t size; | |
2448 | vm_ooffset_t offset; | |
2449 | ||
2450 | orig_object = entry->object.vm_object; | |
2451 | if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) | |
2452 | return; | |
2453 | if (orig_object->ref_count <= 1) | |
2454 | return; | |
2455 | ||
2456 | offset = entry->offset; | |
2457 | s = entry->start; | |
2458 | e = entry->end; | |
2459 | ||
2460 | offidxstart = OFF_TO_IDX(offset); | |
2461 | offidxend = offidxstart + OFF_TO_IDX(e - s); | |
2462 | size = offidxend - offidxstart; | |
2463 | ||
2464 | new_object = vm_pager_allocate(orig_object->type, | |
2465 | NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL); | |
2466 | if (new_object == NULL) | |
2467 | return; | |
2468 | ||
2469 | source = orig_object->backing_object; | |
2470 | if (source != NULL) { | |
2471 | vm_object_reference(source); /* Referenced by new_object */ | |
2472 | LIST_INSERT_HEAD(&source->shadow_head, | |
2473 | new_object, shadow_list); | |
2474 | vm_object_clear_flag(source, OBJ_ONEMAPPING); | |
2475 | new_object->backing_object_offset = | |
2476 | orig_object->backing_object_offset + IDX_TO_OFF(offidxstart); | |
2477 | new_object->backing_object = source; | |
2478 | source->shadow_count++; | |
2479 | source->generation++; | |
2480 | } | |
2481 | ||
2482 | for (idx = 0; idx < size; idx++) { | |
2483 | vm_page_t m; | |
2484 | ||
2485 | retry: | |
2486 | m = vm_page_lookup(orig_object, offidxstart + idx); | |
2487 | if (m == NULL) | |
2488 | continue; | |
2489 | ||
2490 | /* | |
2491 | * We must wait for pending I/O to complete before we can | |
2492 | * rename the page. | |
2493 | * | |
2494 | * We do not have to VM_PROT_NONE the page as mappings should | |
2495 | * not be changed by this operation. | |
2496 | */ | |
2497 | if (vm_page_sleep_busy(m, TRUE, "spltwt")) | |
2498 | goto retry; | |
2499 | ||
2500 | vm_page_busy(m); | |
2501 | vm_page_rename(m, new_object, idx); | |
2502 | /* page automatically made dirty by rename and cache handled */ | |
2503 | vm_page_busy(m); | |
2504 | } | |
2505 | ||
2506 | if (orig_object->type == OBJT_SWAP) { | |
2507 | vm_object_pip_add(orig_object, 1); | |
2508 | /* | |
2509 | * copy orig_object pages into new_object | |
2510 | * and destroy unneeded pages in | |
2511 | * shadow object. | |
2512 | */ | |
2513 | swap_pager_copy(orig_object, new_object, offidxstart, 0); | |
2514 | vm_object_pip_wakeup(orig_object); | |
2515 | } | |
2516 | ||
2517 | for (idx = 0; idx < size; idx++) { | |
2518 | m = vm_page_lookup(new_object, idx); | |
2519 | if (m) { | |
2520 | vm_page_wakeup(m); | |
2521 | } | |
2522 | } | |
2523 | ||
2524 | entry->object.vm_object = new_object; | |
2525 | entry->offset = 0LL; | |
2526 | vm_object_deallocate(orig_object); | |
2527 | } | |
2528 | ||
2529 | /* | |
2530 | * vm_map_copy_entry: | |
2531 | * | |
2532 | * Copies the contents of the source entry to the destination | |
2533 | * entry. The entries *must* be aligned properly. | |
2534 | */ | |
2535 | static void | |
a108bf71 MD |
2536 | vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, |
2537 | vm_map_entry_t src_entry, vm_map_entry_t dst_entry) | |
984263bc MD |
2538 | { |
2539 | vm_object_t src_object; | |
2540 | ||
2541 | if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) | |
2542 | return; | |
2543 | ||
2544 | if (src_entry->wired_count == 0) { | |
2545 | ||
2546 | /* | |
2547 | * If the source entry is marked needs_copy, it is already | |
2548 | * write-protected. | |
2549 | */ | |
2550 | if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { | |
2551 | pmap_protect(src_map->pmap, | |
2552 | src_entry->start, | |
2553 | src_entry->end, | |
2554 | src_entry->protection & ~VM_PROT_WRITE); | |
2555 | } | |
2556 | ||
2557 | /* | |
2558 | * Make a copy of the object. | |
2559 | */ | |
2560 | if ((src_object = src_entry->object.vm_object) != NULL) { | |
2561 | ||
2562 | if ((src_object->handle == NULL) && | |
2563 | (src_object->type == OBJT_DEFAULT || | |
2564 | src_object->type == OBJT_SWAP)) { | |
2565 | vm_object_collapse(src_object); | |
2566 | if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { | |
2567 | vm_map_split(src_entry); | |
2568 | src_object = src_entry->object.vm_object; | |
2569 | } | |
2570 | } | |
2571 | ||
2572 | vm_object_reference(src_object); | |
2573 | vm_object_clear_flag(src_object, OBJ_ONEMAPPING); | |
2574 | dst_entry->object.vm_object = src_object; | |
2575 | src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); | |
2576 | dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); | |
2577 | dst_entry->offset = src_entry->offset; | |
2578 | } else { | |
2579 | dst_entry->object.vm_object = NULL; | |
2580 | dst_entry->offset = 0; | |
2581 | } | |
2582 | ||
2583 | pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, | |
2584 | dst_entry->end - dst_entry->start, src_entry->start); | |
2585 | } else { | |
2586 | /* | |
2587 | * Of course, wired down pages can't be set copy-on-write. | |
2588 | * Cause wired pages to be copied into the new map by | |
2589 | * simulating faults (the new pages are pageable) | |
2590 | */ | |
2591 | vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); | |
2592 | } | |
2593 | } | |
2594 | ||
2595 | /* | |
2596 | * vmspace_fork: | |
2597 | * Create a new process vmspace structure and vm_map | |
2598 | * based on those of an existing process. The new map | |
2599 | * is based on the old map, according to the inheritance | |
2600 | * values on the regions in that map. | |
2601 | * | |
2602 | * The source map must not be locked. | |
2603 | */ | |
2604 | struct vmspace * | |
a108bf71 | 2605 | vmspace_fork(struct vmspace *vm1) |
984263bc MD |
2606 | { |
2607 | struct vmspace *vm2; | |
2608 | vm_map_t old_map = &vm1->vm_map; | |
2609 | vm_map_t new_map; | |
2610 | vm_map_entry_t old_entry; | |
2611 | vm_map_entry_t new_entry; | |
2612 | vm_object_t object; | |
a108bf71 | 2613 | int count; |
984263bc MD |
2614 | |
2615 | vm_map_lock(old_map); | |
2616 | old_map->infork = 1; | |
2617 | ||
2618 | vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); | |
2619 | bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, | |
2620 | (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); | |
2621 | new_map = &vm2->vm_map; /* XXX */ | |
2622 | new_map->timestamp = 1; | |
2623 | ||
a108bf71 | 2624 | count = 0; |
984263bc | 2625 | old_entry = old_map->header.next; |
a108bf71 MD |
2626 | while (old_entry != &old_map->header) { |
2627 | ++count; | |
2628 | old_entry = old_entry->next; | |
2629 | } | |
984263bc | 2630 | |
a108bf71 MD |
2631 | count = vm_map_entry_reserve(count + MAP_RESERVE_COUNT); |
2632 | ||
2633 | old_entry = old_map->header.next; | |
984263bc MD |
2634 | while (old_entry != &old_map->header) { |
2635 | if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) | |
2636 | panic("vm_map_fork: encountered a submap"); | |
2637 | ||
2638 | switch (old_entry->inheritance) { | |
2639 | case VM_INHERIT_NONE: | |
2640 | break; | |
2641 | ||
2642 | case VM_INHERIT_SHARE: | |
2643 | /* | |
2644 | * Clone the entry, creating the shared object if necessary. | |
2645 | */ | |
2646 | object = old_entry->object.vm_object; | |
2647 | if (object == NULL) { | |
2648 | object = vm_object_allocate(OBJT_DEFAULT, | |
2649 | atop(old_entry->end - old_entry->start)); | |
2650 | old_entry->object.vm_object = object; | |
2651 | old_entry->offset = (vm_offset_t) 0; | |
2652 | } | |
2653 | ||
2654 | /* | |
2655 | * Add the reference before calling vm_object_shadow | |
2656 | * to insure that a shadow object is created. | |
2657 | */ | |
2658 | vm_object_reference(object); | |
2659 | if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { | |
2660 | vm_object_shadow(&old_entry->object.vm_object, | |
2661 | &old_entry->offset, | |
2662 | atop(old_entry->end - old_entry->start)); | |
2663 | old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; | |
2664 | /* Transfer the second reference too. */ | |
2665 | vm_object_reference( | |
2666 | old_entry->object.vm_object); | |
2667 | vm_object_deallocate(object); | |
2668 | object = old_entry->object.vm_object; | |
2669 | } | |
2670 | vm_object_clear_flag(object, OBJ_ONEMAPPING); | |
2671 | ||
2672 | /* | |
2673 | * Clone the entry, referencing the shared object. | |
2674 | */ | |
a108bf71 | 2675 | new_entry = vm_map_entry_create(new_map, &count); |
984263bc MD |
2676 | *new_entry = *old_entry; |
2677 | new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; | |
2678 | new_entry->wired_count = 0; | |
2679 | ||
2680 | /* | |
2681 | * Insert the entry into the new map -- we know we're | |
2682 | * inserting at the end of the new map. | |
2683 | */ | |
2684 | ||
2685 | vm_map_entry_link(new_map, new_map->header.prev, | |
2686 | new_entry); | |
2687 | ||
2688 | /* | |
2689 | * Update the physical map | |
2690 | */ | |
2691 | ||
2692 | pmap_copy(new_map->pmap, old_map->pmap, | |
2693 | new_entry->start, | |
2694 | (old_entry->end - old_entry->start), | |
2695 | old_entry->start); | |
2696 | break; | |
2697 | ||
2698 | case VM_INHERIT_COPY: | |
2699 | /* | |
2700 | * Clone the entry and link into the map. | |
2701 | */ | |
a108bf71 | 2702 | new_entry = vm_map_entry_create(new_map, &count); |
984263bc MD |
2703 | *new_entry = *old_entry; |
2704 | new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; | |
2705 | new_entry->wired_count = 0; | |
2706 | new_entry->object.vm_object = NULL; | |
2707 | vm_map_entry_link(new_map, new_map->header.prev, | |
2708 | new_entry); | |
2709 | vm_map_copy_entry(old_map, new_map, old_entry, | |
2710 | new_entry); | |
2711 | break; | |
2712 | } | |
2713 | old_entry = old_entry->next; | |
2714 | } | |
2715 | ||
2716 | new_map->size = old_map->size; | |
2717 | old_map->infork = 0; | |
2718 | vm_map_unlock(old_map); | |
a108bf71 | 2719 | vm_map_entry_release(count); |
984263bc MD |
2720 | |
2721 | return (vm2); | |
2722 | } | |
2723 | ||
2724 | int | |
2725 | vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, | |
2726 | vm_prot_t prot, vm_prot_t max, int cow) | |
2727 | { | |
2728 | vm_map_entry_t prev_entry; | |
2729 | vm_map_entry_t new_stack_entry; | |
2730 | vm_size_t init_ssize; | |
2731 | int rv; | |
a108bf71 | 2732 | int count; |
984263bc MD |
2733 | |
2734 | if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS) | |
2735 | return (KERN_NO_SPACE); | |
2736 | ||
2737 | if (max_ssize < sgrowsiz) | |
2738 | init_ssize = max_ssize; | |
2739 | else | |
2740 | init_ssize = sgrowsiz; | |
2741 | ||
a108bf71 | 2742 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
2743 | vm_map_lock(map); |
2744 | ||
2745 | /* If addr is already mapped, no go */ | |
2746 | if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { | |
2747 | vm_map_unlock(map); | |
a108bf71 | 2748 | vm_map_entry_release(count); |
984263bc MD |
2749 | return (KERN_NO_SPACE); |
2750 | } | |
2751 | ||
2752 | /* If we would blow our VMEM resource limit, no go */ | |
2753 | if (map->size + init_ssize > | |
2754 | curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) { | |
2755 | vm_map_unlock(map); | |
a108bf71 | 2756 | vm_map_entry_release(count); |
984263bc MD |
2757 | return (KERN_NO_SPACE); |
2758 | } | |
2759 | ||
2760 | /* If we can't accomodate max_ssize in the current mapping, | |
2761 | * no go. However, we need to be aware that subsequent user | |
2762 | * mappings might map into the space we have reserved for | |
2763 | * stack, and currently this space is not protected. | |
2764 | * | |
2765 | * Hopefully we will at least detect this condition | |
2766 | * when we try to grow the stack. | |
2767 | */ | |
2768 | if ((prev_entry->next != &map->header) && | |
2769 | (prev_entry->next->start < addrbos + max_ssize)) { | |
2770 | vm_map_unlock(map); | |
a108bf71 | 2771 | vm_map_entry_release(count); |
984263bc MD |
2772 | return (KERN_NO_SPACE); |
2773 | } | |
2774 | ||
2775 | /* We initially map a stack of only init_ssize. We will | |
2776 | * grow as needed later. Since this is to be a grow | |
2777 | * down stack, we map at the top of the range. | |
2778 | * | |
2779 | * Note: we would normally expect prot and max to be | |
2780 | * VM_PROT_ALL, and cow to be 0. Possibly we should | |
2781 | * eliminate these as input parameters, and just | |
2782 | * pass these values here in the insert call. | |
2783 | */ | |
a108bf71 MD |
2784 | rv = vm_map_insert(map, &count, |
2785 | NULL, 0, addrbos + max_ssize - init_ssize, | |
984263bc MD |
2786 | addrbos + max_ssize, prot, max, cow); |
2787 | ||
2788 | /* Now set the avail_ssize amount */ | |
2789 | if (rv == KERN_SUCCESS){ | |
2790 | if (prev_entry != &map->header) | |
a108bf71 | 2791 | vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize, &count); |
984263bc MD |
2792 | new_stack_entry = prev_entry->next; |
2793 | if (new_stack_entry->end != addrbos + max_ssize || | |
2794 | new_stack_entry->start != addrbos + max_ssize - init_ssize) | |
2795 | panic ("Bad entry start/end for new stack entry"); | |
2796 | else | |
2797 | new_stack_entry->avail_ssize = max_ssize - init_ssize; | |
2798 | } | |
2799 | ||
2800 | vm_map_unlock(map); | |
a108bf71 | 2801 | vm_map_entry_release(count); |
984263bc MD |
2802 | return (rv); |
2803 | } | |
2804 | ||
2805 | /* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the | |
2806 | * desired address is already mapped, or if we successfully grow | |
2807 | * the stack. Also returns KERN_SUCCESS if addr is outside the | |
2808 | * stack range (this is strange, but preserves compatibility with | |
2809 | * the grow function in vm_machdep.c). | |
2810 | */ | |
2811 | int | |
2812 | vm_map_growstack (struct proc *p, vm_offset_t addr) | |
2813 | { | |
2814 | vm_map_entry_t prev_entry; | |
2815 | vm_map_entry_t stack_entry; | |
2816 | vm_map_entry_t new_stack_entry; | |
2817 | struct vmspace *vm = p->p_vmspace; | |
2818 | vm_map_t map = &vm->vm_map; | |
2819 | vm_offset_t end; | |
a108bf71 MD |
2820 | int grow_amount; |
2821 | int rv = KERN_SUCCESS; | |
2822 | int is_procstack; | |
2823 | int use_read_lock = 1; | |
2824 | int count; | |
984263bc | 2825 | |
a108bf71 | 2826 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
984263bc MD |
2827 | Retry: |
2828 | if (use_read_lock) | |
2829 | vm_map_lock_read(map); | |
2830 | else | |
2831 | vm_map_lock(map); | |
2832 | ||
2833 | /* If addr is already in the entry range, no need to grow.*/ | |
2834 | if (vm_map_lookup_entry(map, addr, &prev_entry)) | |
2835 | goto done; | |
2836 | ||
2837 | if ((stack_entry = prev_entry->next) == &map->header) | |
2838 | goto done; | |
2839 | if (prev_entry == &map->header) | |
2840 | end = stack_entry->start - stack_entry->avail_ssize; | |
2841 | else | |
2842 | end = prev_entry->end; | |
2843 | ||
2844 | /* This next test mimics the old grow function in vm_machdep.c. | |
2845 | * It really doesn't quite make sense, but we do it anyway | |
2846 | * for compatibility. | |
2847 | * | |
2848 | * If not growable stack, return success. This signals the | |
2849 | * caller to proceed as he would normally with normal vm. | |
2850 | */ | |
2851 | if (stack_entry->avail_ssize < 1 || | |
2852 | addr >= stack_entry->start || | |
2853 | addr < stack_entry->start - stack_entry->avail_ssize) { | |
2854 | goto done; | |
2855 | } | |
2856 | ||
2857 | /* Find the minimum grow amount */ | |
2858 | grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE); | |
2859 | if (grow_amount > stack_entry->avail_ssize) { | |
2860 | rv = KERN_NO_SPACE; | |
2861 | goto done; | |
2862 | } | |
2863 | ||
2864 | /* If there is no longer enough space between the entries | |
2865 | * nogo, and adjust the available space. Note: this | |
2866 | * should only happen if the user has mapped into the | |
2867 | * stack area after the stack was created, and is | |
2868 | * probably an error. | |
2869 | * | |
2870 | * This also effectively destroys any guard page the user | |
2871 | * might have intended by limiting the stack size. | |
2872 | */ | |
2873 | if (grow_amount > stack_entry->start - end) { | |
2874 | if (use_read_lock && vm_map_lock_upgrade(map)) { | |
2875 | use_read_lock = 0; | |
2876 | goto Retry; | |
2877 | } | |
2878 | use_read_lock = 0; | |
2879 | stack_entry->avail_ssize = stack_entry->start - end; | |
2880 | rv = KERN_NO_SPACE; | |
2881 | goto done; | |
2882 | } | |
2883 | ||
2884 | is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr; | |
2885 | ||
2886 | /* If this is the main process stack, see if we're over the | |
2887 | * stack limit. | |
2888 | */ | |
2889 | if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > | |
2890 | p->p_rlimit[RLIMIT_STACK].rlim_cur)) { | |
2891 | rv = KERN_NO_SPACE; | |
2892 | goto done; | |
2893 | } | |
2894 | ||
2895 | /* Round up the grow amount modulo SGROWSIZ */ | |
2896 | grow_amount = roundup (grow_amount, sgrowsiz); | |
2897 | if (grow_amount > stack_entry->avail_ssize) { | |
2898 | grow_amount = stack_entry->avail_ssize; | |
2899 | } | |
2900 | if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > | |
2901 | p->p_rlimit[RLIMIT_STACK].rlim_cur)) { | |
2902 | grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur - | |
2903 | ctob(vm->vm_ssize); | |
2904 | } | |
2905 | ||
2906 | /* If we would blow our VMEM resource limit, no go */ | |
be77b5f9 | 2907 | if (map->size + grow_amount > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { |
984263bc MD |
2908 | rv = KERN_NO_SPACE; |
2909 | goto done; | |
2910 | } | |
2911 | ||
2912 | if (use_read_lock && vm_map_lock_upgrade(map)) { | |
2913 | use_read_lock = 0; | |
2914 | goto Retry; | |
2915 | } | |
2916 | use_read_lock = 0; | |
2917 | ||
2918 | /* Get the preliminary new entry start value */ | |
2919 | addr = stack_entry->start - grow_amount; | |
2920 | ||
2921 | /* If this puts us into the previous entry, cut back our growth | |
2922 | * to the available space. Also, see the note above. | |
2923 | */ | |
2924 | if (addr < end) { | |
2925 | stack_entry->avail_ssize = stack_entry->start - end; | |
2926 | addr = end; | |
2927 | } | |
2928 | ||
a108bf71 MD |
2929 | rv = vm_map_insert(map, &count, |
2930 | NULL, 0, addr, stack_entry->start, | |
984263bc MD |
2931 | VM_PROT_ALL, |
2932 | VM_PROT_ALL, | |
2933 | 0); | |
2934 | ||
2935 | /* Adjust the available stack space by the amount we grew. */ | |
2936 | if (rv == KERN_SUCCESS) { | |
2937 | if (prev_entry != &map->header) | |
a108bf71 | 2938 | vm_map_clip_end(map, prev_entry, addr, &count); |
984263bc MD |
2939 | new_stack_entry = prev_entry->next; |
2940 | if (new_stack_entry->end != stack_entry->start || | |
2941 | new_stack_entry->start != addr) | |
2942 | panic ("Bad stack grow start/end in new stack entry"); | |
2943 | else { | |
2944 | new_stack_entry->avail_ssize = stack_entry->avail_ssize - | |
2945 | (new_stack_entry->end - | |
2946 | new_stack_entry->start); | |
2947 | if (is_procstack) | |
2948 | vm->vm_ssize += btoc(new_stack_entry->end - | |
2949 | new_stack_entry->start); | |
2950 | } | |
2951 | } | |
2952 | ||
2953 | done: | |
2954 | if (use_read_lock) | |
2955 | vm_map_unlock_read(map); | |
2956 | else | |
2957 | vm_map_unlock(map); | |
a108bf71 | 2958 | vm_map_entry_release(count); |
984263bc MD |
2959 | return (rv); |
2960 | } | |
2961 | ||
2962 | /* | |
2963 | * Unshare the specified VM space for exec. If other processes are | |
2964 | * mapped to it, then create a new one. The new vmspace is null. | |
2965 | */ | |
2966 | ||
2967 | void | |
a108bf71 MD |
2968 | vmspace_exec(struct proc *p) |
2969 | { | |
984263bc MD |
2970 | struct vmspace *oldvmspace = p->p_vmspace; |
2971 | struct vmspace *newvmspace; | |
2972 | vm_map_t map = &p->p_vmspace->vm_map; | |
2973 | ||
2974 | newvmspace = vmspace_alloc(map->min_offset, map->max_offset); | |
2975 | bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy, | |
2976 | (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy); | |
2977 | /* | |
2978 | * This code is written like this for prototype purposes. The | |
2979 | * goal is to avoid running down the vmspace here, but let the | |
2980 | * other process's that are still using the vmspace to finally | |
2981 | * run it down. Even though there is little or no chance of blocking | |
2982 | * here, it is a good idea to keep this form for future mods. | |
2983 | */ | |
2984 | vmspace_free(oldvmspace); | |
2985 | p->p_vmspace = newvmspace; | |
2986 | pmap_pinit2(vmspace_pmap(newvmspace)); | |
2987 | if (p == curproc) | |
2988 | pmap_activate(p); | |
2989 | } | |
2990 | ||
2991 | /* | |
2992 | * Unshare the specified VM space for forcing COW. This | |
2993 | * is called by rfork, for the (RFMEM|RFPROC) == 0 case. | |
2994 | */ | |
2995 | ||
2996 | void | |
a108bf71 MD |
2997 | vmspace_unshare(struct proc *p) |
2998 | { | |
984263bc MD |
2999 | struct vmspace *oldvmspace = p->p_vmspace; |
3000 | struct vmspace *newvmspace; | |
3001 | ||
3002 | if (oldvmspace->vm_refcnt == 1) | |
3003 | return; | |
3004 | newvmspace = vmspace_fork(oldvmspace); | |
3005 | vmspace_free(oldvmspace); | |
3006 | p->p_vmspace = newvmspace; | |
3007 | pmap_pinit2(vmspace_pmap(newvmspace)); | |
3008 | if (p == curproc) | |
3009 | pmap_activate(p); | |
3010 | } | |
984263bc MD |
3011 | |
3012 | /* | |
3013 | * vm_map_lookup: | |
3014 | * | |
3015 | * Finds the VM object, offset, and | |
3016 | * protection for a given virtual address in the | |
3017 | * specified map, assuming a page fault of the | |
3018 | * type specified. | |
3019 | * | |
3020 | * Leaves the map in question locked for read; return | |
3021 | * values are guaranteed until a vm_map_lookup_done | |
3022 | * call is performed. Note that the map argument | |
3023 | * is in/out; the returned map must be used in | |
3024 | * the call to vm_map_lookup_done. | |
3025 | * | |
3026 | * A handle (out_entry) is returned for use in | |
3027 | * vm_map_lookup_done, to make that fast. | |
3028 | * | |
3029 | * If a lookup is requested with "write protection" | |
3030 | * specified, the map may be changed to perform virtual | |
3031 | * copying operations, although the data referenced will | |
3032 | * remain the same. | |
3033 | */ | |
3034 | int | |
3035 | vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ | |
3036 | vm_offset_t vaddr, | |
3037 | vm_prot_t fault_typea, | |
3038 | vm_map_entry_t *out_entry, /* OUT */ | |
3039 | vm_object_t *object, /* OUT */ | |
3040 | vm_pindex_t *pindex, /* OUT */ | |
3041 | vm_prot_t *out_prot, /* OUT */ | |
3042 | boolean_t *wired) /* OUT */ | |
3043 | { | |
3044 | vm_map_entry_t entry; | |
3045 | vm_map_t map = *var_map; | |
3046 | vm_prot_t prot; | |
3047 | vm_prot_t fault_type = fault_typea; | |
3048 | int use_read_lock = 1; | |
3049 | int rv = KERN_SUCCESS; | |
3050 | ||
3051 | RetryLookup: | |
3052 | if (use_read_lock) | |
3053 | vm_map_lock_read(map); | |
3054 | else | |
3055 | vm_map_lock(map); | |
3056 | ||
3057 | /* | |
3058 | * If the map has an interesting hint, try it before calling full | |
3059 | * blown lookup routine. | |
3060 | */ | |
3061 | entry = map->hint; | |
3062 | *out_entry = entry; | |
3063 | ||
3064 | if ((entry == &map->header) || | |
3065 | (vaddr < entry->start) || (vaddr >= entry->end)) { | |
3066 | vm_map_entry_t tmp_entry; | |
3067 | ||
3068 | /* | |
3069 | * Entry was either not a valid hint, or the vaddr was not | |
3070 | * contained in the entry, so do a full lookup. | |
3071 | */ | |
3072 | if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { | |
3073 | rv = KERN_INVALID_ADDRESS; | |
3074 | goto done; | |
3075 | } | |
3076 | ||
3077 | entry = tmp_entry; | |
3078 | *out_entry = entry; | |
3079 | } | |
3080 | ||
3081 | /* | |
3082 | * Handle submaps. | |
3083 | */ | |
3084 | ||
3085 | if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { | |
3086 | vm_map_t old_map = map; | |
3087 | ||
3088 | *var_map = map = entry->object.sub_map; | |
3089 | if (use_read_lock) | |
3090 | vm_map_unlock_read(old_map); | |
3091 | else | |
3092 | vm_map_unlock(old_map); | |
3093 | use_read_lock = 1; | |
3094 | goto RetryLookup; | |
3095 | } | |
3096 | ||
3097 | /* | |
3098 | * Check whether this task is allowed to have this page. | |
3099 | * Note the special case for MAP_ENTRY_COW | |
3100 | * pages with an override. This is to implement a forced | |
3101 | * COW for debuggers. | |
3102 | */ | |
3103 | ||
3104 | if (fault_type & VM_PROT_OVERRIDE_WRITE) | |
3105 | prot = entry->max_protection; | |
3106 | else | |
3107 | prot = entry->protection; | |
3108 | ||
3109 | fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); | |
3110 | if ((fault_type & prot) != fault_type) { | |
3111 | rv = KERN_PROTECTION_FAILURE; | |
3112 | goto done; | |
3113 | } | |
3114 | ||
3115 | if ((entry->eflags & MAP_ENTRY_USER_WIRED) && | |
3116 | (entry->eflags & MAP_ENTRY_COW) && | |
3117 | (fault_type & VM_PROT_WRITE) && | |
3118 | (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) { | |
3119 | rv = KERN_PROTECTION_FAILURE; | |
3120 | goto done; | |
3121 | } | |
3122 | ||
3123 | /* | |
3124 | * If this page is not pageable, we have to get it for all possible | |
3125 | * accesses. | |
3126 | */ | |
3127 | ||
3128 | *wired = (entry->wired_count != 0); | |
3129 | if (*wired) | |
3130 | prot = fault_type = entry->protection; | |
3131 | ||
3132 | /* | |
3133 | * If the entry was copy-on-write, we either ... | |
3134 | */ | |
3135 | ||
3136 | if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { | |
3137 | /* | |
3138 | * If we want to write the page, we may as well handle that | |
3139 | * now since we've got the map locked. | |
3140 | * | |
3141 | * If we don't need to write the page, we just demote the | |
3142 | * permissions allowed. | |
3143 | */ | |
3144 | ||
3145 | if (fault_type & VM_PROT_WRITE) { | |
3146 | /* | |
3147 | * Make a new object, and place it in the object | |
3148 | * chain. Note that no new references have appeared | |
3149 | * -- one just moved from the map to the new | |
3150 | * object. | |
3151 | */ | |
3152 | ||
3153 | if (use_read_lock && vm_map_lock_upgrade(map)) { | |
3154 | use_read_lock = 0; | |
3155 | goto RetryLookup; | |
3156 | } | |
3157 | use_read_lock = 0; | |
3158 | ||
3159 | vm_object_shadow( | |
3160 | &entry->object.vm_object, | |
3161 | &entry->offset, | |
3162 | atop(entry->end - entry->start)); | |
3163 | ||
3164 | entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; | |
3165 | } else { | |
3166 | /* | |
3167 | * We're attempting to read a copy-on-write page -- | |
3168 | * don't allow writes. | |
3169 | */ | |
3170 | ||
3171 | prot &= ~VM_PROT_WRITE; | |
3172 | } | |
3173 | } | |
3174 | ||
3175 | /* | |
3176 | * Create an object if necessary. | |
3177 | */ | |
3178 | if (entry->object.vm_object == NULL && | |
3179 | !map->system_map) { | |
3180 | if (use_read_lock && vm_map_lock_upgrade(map)) { | |
3181 | use_read_lock = 0; | |
3182 | goto RetryLookup; | |
3183 | } | |
3184 | use_read_lock = 0; | |
3185 | entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, | |
3186 | atop(entry->end - entry->start)); | |
3187 | entry->offset = 0; | |
3188 | } | |
3189 | ||
3190 | /* | |
3191 | * Return the object/offset from this entry. If the entry was | |
3192 | * copy-on-write or empty, it has been fixed up. | |
3193 | */ | |
3194 | ||
3195 | *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); | |
3196 | *object = entry->object.vm_object; | |
3197 | ||
3198 | /* | |
3199 | * Return whether this is the only map sharing this data. On | |
3200 | * success we return with a read lock held on the map. On failure | |
3201 | * we return with the map unlocked. | |
3202 | */ | |
3203 | *out_prot = prot; | |
3204 | done: | |
3205 | if (rv == KERN_SUCCESS) { | |
3206 | if (use_read_lock == 0) | |
3207 | vm_map_lock_downgrade(map); | |
3208 | } else if (use_read_lock) { | |
3209 | vm_map_unlock_read(map); | |
3210 | } else { | |
3211 | vm_map_unlock(map); | |
3212 | } | |
3213 | return (rv); | |
3214 | } | |
3215 | ||
3216 | /* | |
3217 | * vm_map_lookup_done: | |
3218 | * | |
3219 | * Releases locks acquired by a vm_map_lookup | |
3220 | * (according to the handle returned by that lookup). | |
3221 | */ | |
3222 | ||
3223 | void | |
a108bf71 | 3224 | vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count) |
984263bc MD |
3225 | { |
3226 | /* | |
3227 | * Unlock the main-level map | |
3228 | */ | |
984263bc | 3229 | vm_map_unlock_read(map); |
a108bf71 MD |
3230 | if (count) |
3231 | vm_map_entry_release(count); | |
984263bc MD |
3232 | } |
3233 | ||
3234 | /* | |
3235 | * Implement uiomove with VM operations. This handles (and collateral changes) | |
3236 | * support every combination of source object modification, and COW type | |
3237 | * operations. | |
3238 | */ | |
3239 | int | |
3240 | vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) | |
3241 | vm_map_t mapa; | |
3242 | vm_object_t srcobject; | |
3243 | off_t cp; | |
3244 | int cnta; | |
3245 | vm_offset_t uaddra; | |
3246 | int *npages; | |
3247 | { | |
3248 | vm_map_t map; | |
3249 | vm_object_t first_object, oldobject, object; | |
3250 | vm_map_entry_t entry; | |
3251 | vm_prot_t prot; | |
3252 | boolean_t wired; | |
3253 | int tcnt, rv; | |
3254 | vm_offset_t uaddr, start, end, tend; | |
3255 | vm_pindex_t first_pindex, osize, oindex; | |
3256 | off_t ooffset; | |
3257 | int cnt; | |
a108bf71 | 3258 | int count; |
984263bc MD |
3259 | |
3260 | if (npages) | |
3261 | *npages = 0; | |
3262 | ||
3263 | cnt = cnta; | |
3264 | uaddr = uaddra; | |
3265 | ||
3266 | while (cnt > 0) { | |
3267 | map = mapa; | |
3268 | ||
a108bf71 MD |
3269 | count = vm_map_entry_reserve(MAP_RESERVE_COUNT); |
3270 | ||
984263bc MD |
3271 | if ((vm_map_lookup(&map, uaddr, |
3272 | VM_PROT_READ, &entry, &first_object, | |
3273 | &first_pindex, &prot, &wired)) != KERN_SUCCESS) { | |
3274 | return EFAULT; | |
3275 | } | |
3276 | ||
a108bf71 | 3277 | vm_map_clip_start(map, entry, uaddr, &count); |
984263bc MD |
3278 | |
3279 | tcnt = cnt; | |
3280 | tend = uaddr + tcnt; | |
3281 | if (tend > entry->end) { | |
3282 | tcnt = entry->end - uaddr; | |
3283 | tend = entry->end; | |
3284 | } | |
3285 | ||
a108bf71 | 3286 | vm_map_clip_end(map, entry, tend, &count); |
984263bc MD |
3287 | |
3288 | start = entry->start; | |
3289 | end = entry->end; | |
3290 | ||
3291 | osize = atop(tcnt); | |
3292 | ||
3293 | oindex = OFF_TO_IDX(cp); | |
3294 | if (npages) { | |
3295 | vm_pindex_t idx; | |
3296 | for (idx = 0; idx < osize; idx++) { | |
3297 | vm_page_t m; | |
3298 | if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) { | |
a108bf71 | 3299 | vm_map_lookup_done(map, entry, count); |
984263bc MD |
3300 | return 0; |
3301 | } | |
3302 | /* | |
3303 | * disallow busy or invalid pages, but allow | |
3304 | * m->busy pages if they are entirely valid. | |
3305 | */ | |
3306 | if ((m->flags & PG_BUSY) || | |
3307 | ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { | |
a108bf71 | 3308 | vm_map_lookup_done(map, entry, count); |
984263bc MD |
3309 | return 0; |
3310 | } | |
3311 | } | |
3312 | } | |
3313 | ||
3314 | /* | |
3315 | * If we are changing an existing map entry, just redirect | |
3316 | * the object, and change mappings. | |
3317 | */ | |
3318 | if ((first_object->type == OBJT_VNODE) && | |
3319 | ((oldobject = entry->object.vm_object) == first_object)) { | |
3320 | ||
3321 | if ((entry->offset != cp) || (oldobject != srcobject)) { | |
3322 | /* | |
3323 | * Remove old window into the file | |
3324 | */ | |
3325 | pmap_remove (map->pmap, uaddr, tend); | |
3326 | ||
3327 | /* | |
3328 | * Force copy on write for mmaped regions | |
3329 | */ | |
3330 | vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); | |
3331 | ||
3332 | /* | |
3333 | * Point the object appropriately | |
3334 | */ | |
3335 | if (oldobject != srcobject) { | |
3336 | ||
3337 | /* | |
3338 | * Set the object optimization hint flag | |
3339 | */ | |
3340 | vm_object_set_flag(srcobject, OBJ_OPT); | |
3341 | vm_object_reference(srcobject); | |
3342 | entry->object.vm_object = srcobject; | |
3343 | ||
3344 | if (oldobject) { | |
3345 | vm_object_deallocate(oldobject); | |
3346 | } | |
3347 | } | |
3348 | ||
3349 | entry->offset = cp; | |
3350 | map->timestamp++; | |
3351 | } else { | |
3352 | pmap_remove (map->pmap, uaddr, tend); | |
3353 | } | |
3354 | ||
3355 | } else if ((first_object->ref_count == 1) && | |
3356 | (first_object->size == osize) && | |
3357 | ((first_object->type == OBJT_DEFAULT) || | |
3358 | (first_object->type == OBJT_SWAP)) ) { | |
3359 | ||
3360 | oldobject = first_object->backing_object; | |
3361 | ||
3362 | if ((first_object->backing_object_offset != cp) || | |
3363 | (oldobject != srcobject)) { | |
3364 | /* | |
3365 | * Remove old window into the file | |
3366 | */ | |
3367 | pmap_remove (map->pmap, uaddr, tend); | |
3368 | ||
3369 | /* | |
3370 | * Remove unneeded old pages | |
3371 | */ | |
3372 | vm_object_page_remove(first_object, 0, 0, 0); | |
3373 | ||
3374 | /* | |
3375 | * Invalidate swap space | |
3376 | */ | |
3377 | if (first_object->type == OBJT_SWAP) { | |
3378 | swap_pager_freespace(first_object, | |
3379 | 0, | |
3380 | first_object->size); | |
3381 | } | |
3382 | ||
3383 | /* | |
3384 | * Force copy on write for mmaped regions | |
3385 | */ | |
3386 | vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); | |
3387 | ||
3388 | /* | |
3389 | * Point the object appropriately | |
3390 | */ | |
3391 | if (oldobject != srcobject) { | |
3392 | ||
3393 | /* | |
3394 | * Set the object optimization hint flag | |
3395 | */ | |
3396 | vm_object_set_flag(srcobject, OBJ_OPT); | |
3397 | vm_object_reference(srcobject); | |
3398 | ||
3399 | if (oldobject) { | |
3400 | LIST_REMOVE( | |
3401 | first_object, shadow_list); | |
3402 | oldobject->shadow_count--; | |
3403 | /* XXX bump generation? */ | |
3404 | vm_object_deallocate(oldobject); | |
3405 | } | |
3406 | ||
3407 | LIST_INSERT_HEAD(&srcobject->shadow_head, | |
3408 | first_object, shadow_list); | |
3409 | srcobject->shadow_count++; | |
3410 | /* XXX bump generation? */ | |
3411 | ||
3412 | first_object->backing_object = srcobject; | |
3413 | } | |
3414 | first_object->backing_object_offset = cp; | |
3415 | map->timestamp++; | |
3416 | } else { | |
3417 | pmap_remove (map->pmap, uaddr, tend); | |
3418 | } | |
3419 | /* | |
3420 | * Otherwise, we have to do a logical mmap. | |
3421 | */ | |
3422 | } else { | |
3423 | ||
3424 | vm_object_set_flag(srcobject, OBJ_OPT); | |
3425 | vm_object_reference(srcobject); | |
3426 | ||
3427 | pmap_remove (map->pmap, uaddr, tend); | |
3428 | ||
3429 | vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); | |
3430 | vm_map_lock_upgrade(map); | |
3431 | ||
3432 | if (entry == &map->header) { | |
3433 | map->first_free = &map->header; | |
3434 | } else if (map->first_free->start >= start) { | |
3435 | map->first_free = entry->prev; | |
3436 | } | |
3437 | ||
3438 | SAVE_HINT(map, entry->prev); | |
a108bf71 | 3439 | vm_map_entry_delete(map, entry, &count); |
984263bc MD |
3440 | |
3441 | object = srcobject; | |
3442 | ooffset = cp; | |
3443 | ||
a108bf71 MD |
3444 | rv = vm_map_insert(map, &count, |
3445 | object, ooffset, start, tend, | |
984263bc MD |
3446 | VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE); |
3447 | ||
3448 | if (rv != KERN_SUCCESS) | |
3449 | panic("vm_uiomove: could not insert new entry: %d", rv); | |
3450 | } | |
3451 | ||
3452 | /* | |
3453 | * Map the window directly, if it is already in memory | |
3454 | */ | |
3455 | pmap_object_init_pt(map->pmap, uaddr, | |
3456 | srcobject, oindex, tcnt, 0); | |
3457 | ||
3458 | map->timestamp++; | |
3459 | vm_map_unlock(map); | |
a108bf71 | 3460 | vm_map_entry_release(count); |
984263bc MD |
3461 | |
3462 | cnt -= tcnt; | |
3463 | uaddr += tcnt; | |
3464 | cp += tcnt; | |
3465 | if (npages) | |
3466 | *npages += osize; | |
3467 | } | |
3468 | return 0; | |
3469 | } | |
3470 | ||
3471 | /* | |
3472 | * Performs the copy_on_write operations necessary to allow the virtual copies | |
3473 | * into user space to work. This has to be called for write(2) system calls | |
3474 | * from other processes, file unlinking, and file size shrinkage. | |
3475 | */ | |
3476 | void | |
3477 | vm_freeze_copyopts(object, froma, toa) | |
3478 | vm_object_t object; | |
3479 | vm_pindex_t froma, toa; | |
3480 | { | |
3481 | int rv; | |
3482 | vm_object_t robject; | |
3483 | vm_pindex_t idx; | |
3484 | ||
3485 | if ((object == NULL) || | |
3486 | ((object->flags & OBJ_OPT) == 0)) | |
3487 | return; | |
3488 | ||
3489 | if (object->shadow_count > object->ref_count) | |
3490 | panic("vm_freeze_copyopts: sc > rc"); | |
3491 | ||
3492 | while((robject = LIST_FIRST(&object->shadow_head)) != NULL) { | |
3493 | vm_pindex_t bo_pindex; | |
3494 | vm_page_t m_in, m_out; | |
3495 | ||
3496 | bo_pindex = OFF_TO_IDX(robject->backing_object_offset); | |
3497 | ||
3498 | vm_object_reference(robject); | |
3499 | ||
3500 | vm_object_pip_wait(robject, "objfrz"); | |
3501 | ||
3502 | if (robject->ref_count == 1) { | |
3503 | vm_object_deallocate(robject); | |
3504 | continue; | |
3505 | } | |
3506 | ||
3507 | vm_object_pip_add(robject, 1); | |
3508 | ||
3509 | for (idx = 0; idx < robject->size; idx++) { | |
3510 | ||
3511 | m_out = vm_page_grab(robject, idx, | |
3512 | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); | |
3513 | ||
3514 | if (m_out->valid == 0) { | |
3515 | m_in = vm_page_grab(object, bo_pindex + idx, | |
3516 | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); | |
3517 | if (m_in->valid == 0) { | |
3518 | rv = vm_pager_get_pages(object, &m_in, 1, 0); | |
3519 | if (rv != VM_PAGER_OK) { | |
3520 | printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex); | |
3521 | continue; | |
3522 | } | |
3523 | vm_page_deactivate(m_in); | |
3524 | } | |
3525 | ||
3526 | vm_page_protect(m_in, VM_PROT_NONE); | |
3527 | pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out)); | |
3528 | m_out->valid = m_in->valid; | |
3529 | vm_page_dirty(m_out); | |
3530 | vm_page_activate(m_out); | |
3531 | vm_page_wakeup(m_in); | |
3532 | } | |
3533 | vm_page_wakeup(m_out); | |
3534 | } | |
3535 | ||
3536 | object->shadow_count--; | |
3537 | object->ref_count--; | |
3538 | LIST_REMOVE(robject, shadow_list); | |
3539 | robject->backing_object = NULL; | |
3540 | robject->backing_object_offset = 0; | |
3541 | ||
3542 | vm_object_pip_wakeup(robject); | |
3543 | vm_object_deallocate(robject); | |
3544 | } | |
3545 | ||
3546 | vm_object_clear_flag(object, OBJ_OPT); | |
3547 | } | |
3548 | ||
3549 | #include "opt_ddb.h" | |
3550 | #ifdef DDB | |
3551 | #include <sys/kernel.h> | |
3552 | ||
3553 | #include <ddb/ddb.h> | |
3554 | ||
3555 | /* | |
3556 | * vm_map_print: [ debug ] | |
3557 | */ | |
3558 | DB_SHOW_COMMAND(map, vm_map_print) | |
3559 | { | |
3560 | static int nlines; | |
3561 | /* XXX convert args. */ | |
3562 | vm_map_t map = (vm_map_t)addr; | |
3563 | boolean_t full = have_addr; | |
3564 | ||
3565 | vm_map_entry_t entry; | |
3566 | ||
3567 | db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", | |
3568 | (void *)map, | |
3569 | (void *)map->pmap, map->nentries, map->timestamp); | |
3570 | nlines++; | |
3571 | ||
3572 | if (!full && db_indent) | |
3573 | return; | |
3574 | ||
3575 | db_indent += 2; | |
3576 | for (entry = map->header.next; entry != &map->header; | |
3577 | entry = entry->next) { | |
3578 | db_iprintf("map entry %p: start=%p, end=%p\n", | |
3579 | (void *)entry, (void *)entry->start, (void *)entry->end); | |
3580 | nlines++; | |
3581 | { | |
3582 | static char *inheritance_name[4] = | |
3583 | {"share", "copy", "none", "donate_copy"}; | |
3584 | ||
3585 | db_iprintf(" prot=%x/%x/%s", | |
3586 | entry->protection, | |
3587 | entry->max_protection, | |
3588 | inheritance_name[(int)(unsigned char)entry->inheritance]); | |
3589 | if (entry->wired_count != 0) | |
3590 | db_printf(", wired"); | |
3591 | } | |
3592 | if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { | |
3593 | /* XXX no %qd in kernel. Truncate entry->offset. */ | |
3594 | db_printf(", share=%p, offset=0x%lx\n", | |
3595 | (void *)entry->object.sub_map, | |
3596 | (long)entry->offset); | |
3597 | nlines++; | |
3598 | if ((entry->prev == &map->header) || | |
3599 | (entry->prev->object.sub_map != | |
3600 | entry->object.sub_map)) { | |
3601 | db_indent += 2; | |
3602 | vm_map_print((db_expr_t)(intptr_t) | |
3603 | entry->object.sub_map, | |
3604 | full, 0, (char *)0); | |
3605 | db_indent -= 2; | |
3606 | } | |
3607 | } else { | |
3608 | /* XXX no %qd in kernel. Truncate entry->offset. */ | |
3609 | db_printf(", object=%p, offset=0x%lx", | |
3610 | (void *)entry->object.vm_object, | |
3611 | (long)entry->offset); | |
3612 | if (entry->eflags & MAP_ENTRY_COW) | |
3613 | db_printf(", copy (%s)", | |
3614 | (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); | |
3615 | db_printf("\n"); | |
3616 | nlines++; | |
3617 | ||
3618 | if ((entry->prev == &map->header) || | |
3619 | (entry->prev->object.vm_object != | |
3620 | entry->object.vm_object)) { | |
3621 | db_indent += 2; | |
3622 | vm_object_print((db_expr_t)(intptr_t) | |
3623 | entry->object.vm_object, | |
3624 | full, 0, (char *)0); | |
3625 | nlines += 4; | |
3626 | db_indent -= 2; | |
3627 | } | |
3628 | } | |
3629 | } | |
3630 | db_indent -= 2; | |
3631 | if (db_indent == 0) | |
3632 | nlines = 0; | |
3633 | } | |
3634 | ||
3635 | ||
3636 | DB_SHOW_COMMAND(procvm, procvm) | |
3637 | { | |
3638 | struct proc *p; | |
3639 | ||
3640 | if (have_addr) { | |
3641 | p = (struct proc *) addr; | |
3642 | } else { | |
3643 | p = curproc; | |
3644 | } | |
3645 | ||
3646 | db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", | |
3647 | (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, | |
3648 | (void *)vmspace_pmap(p->p_vmspace)); | |
3649 | ||
3650 | vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL); | |
3651 | } | |
3652 | ||
3653 | #endif /* DDB */ |