Commit | Line | Data |
---|---|---|
133aabc4 | 1 | /* |
af2b4857 MD |
2 | * (MPSAFE) |
3 | * | |
133aabc4 MD |
4 | * Copyright (c) 2006 The DragonFly Project. All rights reserved. |
5 | * | |
6 | * This code is derived from software contributed to The DragonFly Project | |
7 | * by Matthew Dillon <dillon@backplane.com> | |
8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | |
12 | * | |
13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in | |
17 | * the documentation and/or other materials provided with the | |
18 | * distribution. | |
19 | * 3. Neither the name of The DragonFly Project nor the names of its | |
20 | * contributors may be used to endorse or promote products derived | |
21 | * from this software without specific, prior written permission. | |
22 | * | |
23 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
24 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
25 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
26 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
27 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
28 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
29 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
30 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
31 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
32 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
33 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
133aabc4 MD |
35 | */ |
36 | ||
37 | #include <sys/param.h> | |
38 | #include <sys/kernel.h> | |
39 | #include <sys/systm.h> | |
80d831e1 | 40 | #include <sys/sysmsg.h> |
021a4ed4 | 41 | #include <sys/kern_syscall.h> |
d3313941 | 42 | #include <sys/mman.h> |
af2b4857 | 43 | #include <sys/thread.h> |
d3313941 MD |
44 | #include <sys/proc.h> |
45 | #include <sys/malloc.h> | |
46 | #include <sys/sysctl.h> | |
47 | #include <sys/vkernel.h> | |
4a22e893 | 48 | #include <sys/vmspace.h> |
d3313941 MD |
49 | |
50 | #include <vm/vm_extern.h> | |
51 | #include <vm/pmap.h> | |
52 | ||
c439ad8f MD |
53 | #include <machine/vmparam.h> |
54 | ||
39005e16 | 55 | static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, |
d95d5e03 | 56 | void *id, int havetoken); |
76f1911e MD |
57 | static int vmspace_entry_delete(struct vmspace_entry *ve, |
58 | struct vkernel_proc *vkp, int refs); | |
59 | static void vmspace_entry_cache_ref(struct vmspace_entry *ve); | |
60 | static void vmspace_entry_cache_drop(struct vmspace_entry *ve); | |
d95d5e03 | 61 | static void vmspace_entry_drop(struct vmspace_entry *ve); |
d3313941 MD |
62 | |
63 | static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); | |
133aabc4 MD |
64 | |
65 | /* | |
d3313941 | 66 | * vmspace_create (void *id, int type, void *data) |
133aabc4 | 67 | * |
d3313941 MD |
68 | * Create a VMSPACE under the control of the caller with the specified id. |
69 | * An id of NULL cannot be used. The type and data fields must currently | |
70 | * be 0. | |
133aabc4 | 71 | * |
d3313941 | 72 | * The vmspace starts out completely empty. Memory may be mapped into the |
4d4f84f5 | 73 | * VMSPACE with vmspace_mmap(). |
3919ced0 | 74 | * |
af2b4857 | 75 | * No requirements. |
d3313941 MD |
76 | */ |
77 | int | |
80d831e1 MD |
78 | sys_vmspace_create(struct sysmsg *sysmsg, |
79 | const struct vmspace_create_args *uap) | |
d3313941 | 80 | { |
d3313941 | 81 | struct vmspace_entry *ve; |
39005e16 | 82 | struct vkernel_proc *vkp; |
af2b4857 | 83 | struct proc *p = curproc; |
3919ced0 | 84 | int error; |
d3313941 MD |
85 | |
86 | if (vkernel_enable == 0) | |
87 | return (EOPNOTSUPP); | |
88 | ||
89 | /* | |
90 | * Create a virtual kernel side-structure for the process if one | |
91 | * does not exist. | |
af2b4857 MD |
92 | * |
93 | * Implement a simple resolution for SMP races. | |
d3313941 | 94 | */ |
af2b4857 | 95 | if ((vkp = p->p_vkernel) == NULL) { |
39005e16 | 96 | vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); |
a8d3ab53 | 97 | lwkt_gettoken(&p->p_token); |
af2b4857 MD |
98 | if (p->p_vkernel == NULL) { |
99 | vkp->refs = 1; | |
a3c18566 | 100 | lwkt_token_init(&vkp->token, "vkernel"); |
af2b4857 MD |
101 | RB_INIT(&vkp->root); |
102 | p->p_vkernel = vkp; | |
103 | } else { | |
104 | kfree(vkp, M_VKERNEL); | |
105 | vkp = p->p_vkernel; | |
106 | } | |
a8d3ab53 | 107 | lwkt_reltoken(&p->p_token); |
d3313941 MD |
108 | } |
109 | ||
110 | /* | |
af2b4857 | 111 | * Create a new VMSPACE, disallow conflicting ids |
d3313941 | 112 | */ |
d3313941 | 113 | ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); |
88181b08 | 114 | ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); |
d3313941 | 115 | ve->id = uap->id; |
1e0d343f MD |
116 | ve->refs = 0; /* active refs (none) */ |
117 | ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */ | |
d3313941 | 118 | pmap_pinit2(vmspace_pmap(ve->vmspace)); |
af2b4857 MD |
119 | |
120 | lwkt_gettoken(&vkp->token); | |
121 | if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) { | |
93f86408 | 122 | vmspace_rel(ve->vmspace); |
a2ee730d | 123 | ve->vmspace = NULL; /* safety */ |
af2b4857 MD |
124 | kfree(ve, M_VKERNEL); |
125 | error = EEXIST; | |
126 | } else { | |
127 | error = 0; | |
128 | } | |
129 | lwkt_reltoken(&vkp->token); | |
a86ce0cd | 130 | |
3919ced0 | 131 | return (error); |
d3313941 MD |
132 | } |
133 | ||
134 | /* | |
af2b4857 | 135 | * Destroy a VMSPACE given its identifier. |
3919ced0 | 136 | * |
af2b4857 | 137 | * No requirements. |
133aabc4 MD |
138 | */ |
139 | int | |
80d831e1 MD |
140 | sys_vmspace_destroy(struct sysmsg *sysmsg, |
141 | const struct vmspace_destroy_args *uap) | |
133aabc4 | 142 | { |
39005e16 | 143 | struct vkernel_proc *vkp; |
d3313941 | 144 | struct vmspace_entry *ve; |
3919ced0 | 145 | int error; |
d3313941 | 146 | |
76f1911e MD |
147 | if ((vkp = curproc->p_vkernel) == NULL) |
148 | return EINVAL; | |
149 | ||
150 | /* | |
151 | * vkp->token protects the deletion against a new RB tree search. | |
152 | */ | |
af2b4857 | 153 | lwkt_gettoken(&vkp->token); |
76f1911e MD |
154 | error = ENOENT; |
155 | if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) { | |
1e0d343f MD |
156 | error = vmspace_entry_delete(ve, vkp, 1); |
157 | if (error == 0) | |
158 | vmspace_entry_cache_drop(ve); | |
3919ced0 | 159 | } |
af2b4857 | 160 | lwkt_reltoken(&vkp->token); |
76f1911e | 161 | |
3919ced0 | 162 | return(error); |
133aabc4 MD |
163 | } |
164 | ||
165 | /* | |
4e7c41c5 MD |
166 | * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, |
167 | * struct vextframe *vframe); | |
133aabc4 | 168 | * |
d3313941 MD |
169 | * Transfer control to a VMSPACE. Control is returned after the specified |
170 | * number of microseconds or if a page fault, signal, trap, or system call | |
021a4ed4 | 171 | * occurs. The context is updated as appropriate. |
3919ced0 | 172 | * |
af2b4857 | 173 | * No requirements. |
133aabc4 MD |
174 | */ |
175 | int | |
80d831e1 MD |
176 | sys_vmspace_ctl(struct sysmsg *sysmsg, |
177 | const struct vmspace_ctl_args *uap) | |
133aabc4 | 178 | { |
25206fdb | 179 | struct vmspace_ctl_args ua = *uap; |
39005e16 MD |
180 | struct vkernel_proc *vkp; |
181 | struct vkernel_lwp *vklp; | |
a86ce0cd | 182 | struct vmspace_entry *ve = NULL; |
39005e16 | 183 | struct lwp *lp; |
4a22e893 MD |
184 | struct proc *p; |
185 | int framesz; | |
186 | int error; | |
d3313941 | 187 | |
39005e16 MD |
188 | lp = curthread->td_lwp; |
189 | p = lp->lwp_proc; | |
287ebb09 | 190 | |
af2b4857 MD |
191 | if ((vkp = p->p_vkernel) == NULL) |
192 | return (EINVAL); | |
193 | ||
a86ce0cd | 194 | /* |
25206fdb MD |
195 | * NOTE: We have to copy *uap into ua because uap is an aliased |
196 | * pointer into the sysframe, which we are replacing. | |
a86ce0cd | 197 | */ |
5229377c SW |
198 | if ((ve = vkernel_find_vmspace(vkp, ua.id, 0)) == NULL) { |
199 | error = ENOENT; | |
200 | goto done; | |
3919ced0 | 201 | } |
4a22e893 | 202 | |
25206fdb | 203 | switch(ua.cmd) { |
4a22e893 MD |
204 | case VMSPACE_CTL_RUN: |
205 | /* | |
206 | * Save the caller's register context, swap VM spaces, and | |
207 | * install the passed register context. Return with | |
208 | * EJUSTRETURN so the syscall code doesn't adjust the context. | |
209 | */ | |
4a22e893 | 210 | framesz = sizeof(struct trapframe); |
39005e16 MD |
211 | if ((vklp = lp->lwp_vkernel) == NULL) { |
212 | vklp = kmalloc(sizeof(*vklp), M_VKERNEL, | |
213 | M_WAITOK|M_ZERO); | |
214 | lp->lwp_vkernel = vklp; | |
215 | } | |
d95d5e03 | 216 | if (ve && vklp->ve_cache != ve) { |
76f1911e MD |
217 | vmspace_entry_cache_ref(ve); |
218 | if (vklp->ve_cache) | |
219 | vmspace_entry_cache_drop(vklp->ve_cache); | |
d95d5e03 | 220 | vklp->ve_cache = ve; |
d95d5e03 | 221 | } |
25206fdb MD |
222 | vklp->user_trapframe = ua.tframe; |
223 | vklp->user_vextframe = ua.vframe; | |
80d831e1 | 224 | bcopy(sysmsg->sysmsg_frame, &vklp->save_trapframe, framesz); |
39005e16 MD |
225 | bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, |
226 | sizeof(vklp->save_vextframe.vx_tls)); | |
25206fdb | 227 | error = copyin(ua.tframe, sysmsg->sysmsg_frame, framesz); |
af2b4857 | 228 | if (error == 0) { |
25206fdb | 229 | error = copyin(&ua.vframe->vx_tls, |
af2b4857 MD |
230 | &curthread->td_tls, |
231 | sizeof(struct savetls)); | |
232 | } | |
4a22e893 | 233 | if (error == 0) |
80d831e1 | 234 | error = cpu_sanitize_frame(sysmsg->sysmsg_frame); |
4e7c41c5 MD |
235 | if (error == 0) |
236 | error = cpu_sanitize_tls(&curthread->td_tls); | |
4a22e893 | 237 | if (error) { |
80d831e1 | 238 | bcopy(&vklp->save_trapframe, sysmsg->sysmsg_frame, |
af2b4857 | 239 | framesz); |
39005e16 MD |
240 | bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, |
241 | sizeof(vklp->save_vextframe.vx_tls)); | |
4e7c41c5 | 242 | set_user_TLS(); |
4a22e893 | 243 | } else { |
5229377c SW |
244 | vklp->ve = ve; |
245 | atomic_add_int(&ve->refs, 1); | |
246 | pmap_setlwpvm(lp, ve->vmspace); | |
4e7c41c5 | 247 | set_user_TLS(); |
80d831e1 | 248 | set_vkernel_fp(sysmsg->sysmsg_frame); |
4a22e893 MD |
249 | error = EJUSTRETURN; |
250 | } | |
251 | break; | |
252 | default: | |
253 | error = EOPNOTSUPP; | |
254 | break; | |
255 | } | |
3919ced0 | 256 | done: |
d95d5e03 MD |
257 | if (ve) |
258 | vmspace_entry_drop(ve); | |
259 | ||
4a22e893 | 260 | return(error); |
133aabc4 MD |
261 | } |
262 | ||
263 | /* | |
d3313941 | 264 | * vmspace_mmap(id, addr, len, prot, flags, fd, offset) |
133aabc4 | 265 | * |
d3313941 | 266 | * map memory within a VMSPACE. This function is just like a normal mmap() |
4d4f84f5 | 267 | * but operates on the vmspace's memory map. |
3919ced0 | 268 | * |
3de6dc48 | 269 | * No requirements. |
133aabc4 MD |
270 | */ |
271 | int | |
80d831e1 MD |
272 | sys_vmspace_mmap(struct sysmsg *sysmsg, |
273 | const struct vmspace_mmap_args *uap) | |
133aabc4 | 274 | { |
39005e16 | 275 | struct vkernel_proc *vkp; |
d3313941 | 276 | struct vmspace_entry *ve; |
021a4ed4 | 277 | int error; |
d3313941 | 278 | |
3919ced0 MD |
279 | if ((vkp = curproc->p_vkernel) == NULL) { |
280 | error = EINVAL; | |
d95d5e03 | 281 | goto done2; |
3919ced0 | 282 | } |
af2b4857 | 283 | |
d95d5e03 | 284 | if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { |
3919ced0 | 285 | error = ENOENT; |
d95d5e03 | 286 | goto done2; |
3919ced0 | 287 | } |
d95d5e03 MD |
288 | |
289 | error = kern_mmap(ve->vmspace, uap->addr, uap->len, | |
290 | uap->prot, uap->flags, | |
80d831e1 | 291 | uap->fd, uap->offset, &sysmsg->sysmsg_resultp); |
76f1911e | 292 | |
d95d5e03 MD |
293 | vmspace_entry_drop(ve); |
294 | done2: | |
021a4ed4 | 295 | return (error); |
133aabc4 MD |
296 | } |
297 | ||
298 | /* | |
d3313941 | 299 | * vmspace_munmap(id, addr, len) |
133aabc4 | 300 | * |
d3313941 | 301 | * unmap memory within a VMSPACE. |
3919ced0 | 302 | * |
af2b4857 | 303 | * No requirements. |
133aabc4 MD |
304 | */ |
305 | int | |
80d831e1 MD |
306 | sys_vmspace_munmap(struct sysmsg *sysmsg, |
307 | const struct vmspace_munmap_args *uap) | |
133aabc4 | 308 | { |
39005e16 | 309 | struct vkernel_proc *vkp; |
d3313941 | 310 | struct vmspace_entry *ve; |
021a4ed4 | 311 | vm_offset_t addr; |
e54488bb | 312 | vm_offset_t tmpaddr; |
021a4ed4 MD |
313 | vm_size_t size, pageoff; |
314 | vm_map_t map; | |
3919ced0 | 315 | int error; |
d3313941 | 316 | |
3919ced0 MD |
317 | if ((vkp = curproc->p_vkernel) == NULL) { |
318 | error = EINVAL; | |
d95d5e03 | 319 | goto done2; |
3919ced0 | 320 | } |
d95d5e03 MD |
321 | |
322 | if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { | |
3919ced0 | 323 | error = ENOENT; |
af2b4857 | 324 | goto done2; |
3919ced0 | 325 | } |
021a4ed4 | 326 | |
af2b4857 MD |
327 | /* |
328 | * NOTE: kern_munmap() can block so we need to temporarily | |
329 | * ref ve->refs. | |
330 | */ | |
af2b4857 | 331 | |
021a4ed4 MD |
332 | /* |
333 | * Copied from sys_munmap() | |
334 | */ | |
335 | addr = (vm_offset_t)uap->addr; | |
336 | size = uap->len; | |
337 | ||
338 | pageoff = (addr & PAGE_MASK); | |
339 | addr -= pageoff; | |
340 | size += pageoff; | |
341 | size = (vm_size_t)round_page(size); | |
3919ced0 MD |
342 | if (size < uap->len) { /* wrap */ |
343 | error = EINVAL; | |
af2b4857 | 344 | goto done1; |
3919ced0 | 345 | } |
e54488bb | 346 | tmpaddr = addr + size; /* workaround gcc4 opt */ |
3919ced0 MD |
347 | if (tmpaddr < addr) { /* wrap */ |
348 | error = EINVAL; | |
af2b4857 | 349 | goto done1; |
3919ced0 MD |
350 | } |
351 | if (size == 0) { | |
352 | error = 0; | |
af2b4857 | 353 | goto done1; |
3919ced0 MD |
354 | } |
355 | ||
356 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { | |
357 | error = EINVAL; | |
af2b4857 | 358 | goto done1; |
3919ced0 MD |
359 | } |
360 | if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { | |
361 | error = EINVAL; | |
af2b4857 | 362 | goto done1; |
3919ced0 | 363 | } |
021a4ed4 | 364 | map = &ve->vmspace->vm_map; |
46754a20 | 365 | if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) { |
3919ced0 | 366 | error = EINVAL; |
af2b4857 | 367 | goto done1; |
3919ced0 | 368 | } |
021a4ed4 | 369 | vm_map_remove(map, addr, addr + size); |
3919ced0 | 370 | error = 0; |
af2b4857 | 371 | done1: |
d95d5e03 | 372 | vmspace_entry_drop(ve); |
af2b4857 | 373 | done2: |
3919ced0 | 374 | return (error); |
021a4ed4 MD |
375 | } |
376 | ||
377 | /* | |
378 | * vmspace_pread(id, buf, nbyte, flags, offset) | |
379 | * | |
380 | * Read data from a vmspace. The number of bytes read is returned or | |
381 | * -1 if an unrecoverable error occured. If the number of bytes read is | |
382 | * less then the request size, a page fault occured in the VMSPACE which | |
383 | * the caller must resolve in order to proceed. | |
3919ced0 MD |
384 | * |
385 | * (not implemented yet) | |
af2b4857 | 386 | * No requirements. |
021a4ed4 MD |
387 | */ |
388 | int | |
80d831e1 MD |
389 | sys_vmspace_pread(struct sysmsg *sysmsg, |
390 | const struct vmspace_pread_args *uap) | |
021a4ed4 | 391 | { |
39005e16 | 392 | struct vkernel_proc *vkp; |
021a4ed4 | 393 | struct vmspace_entry *ve; |
3919ced0 | 394 | int error; |
021a4ed4 | 395 | |
3919ced0 MD |
396 | if ((vkp = curproc->p_vkernel) == NULL) { |
397 | error = EINVAL; | |
af2b4857 | 398 | goto done3; |
3919ced0 | 399 | } |
d95d5e03 MD |
400 | |
401 | if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { | |
3919ced0 | 402 | error = ENOENT; |
d95d5e03 | 403 | goto done3; |
3919ced0 | 404 | } |
d95d5e03 | 405 | vmspace_entry_drop(ve); |
3919ced0 | 406 | error = EINVAL; |
af2b4857 | 407 | done3: |
3919ced0 | 408 | return (error); |
021a4ed4 MD |
409 | } |
410 | ||
411 | /* | |
412 | * vmspace_pwrite(id, buf, nbyte, flags, offset) | |
413 | * | |
414 | * Write data to a vmspace. The number of bytes written is returned or | |
415 | * -1 if an unrecoverable error occured. If the number of bytes written is | |
416 | * less then the request size, a page fault occured in the VMSPACE which | |
417 | * the caller must resolve in order to proceed. | |
3919ced0 MD |
418 | * |
419 | * (not implemented yet) | |
af2b4857 | 420 | * No requirements. |
021a4ed4 MD |
421 | */ |
422 | int | |
80d831e1 MD |
423 | sys_vmspace_pwrite(struct sysmsg *sysmsg, |
424 | const struct vmspace_pwrite_args *uap) | |
021a4ed4 | 425 | { |
39005e16 | 426 | struct vkernel_proc *vkp; |
021a4ed4 | 427 | struct vmspace_entry *ve; |
3919ced0 | 428 | int error; |
021a4ed4 | 429 | |
3919ced0 MD |
430 | if ((vkp = curproc->p_vkernel) == NULL) { |
431 | error = EINVAL; | |
af2b4857 | 432 | goto done3; |
3919ced0 | 433 | } |
d95d5e03 | 434 | if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { |
3919ced0 | 435 | error = ENOENT; |
d95d5e03 | 436 | goto done3; |
3919ced0 | 437 | } |
d95d5e03 | 438 | vmspace_entry_drop(ve); |
3919ced0 | 439 | error = EINVAL; |
af2b4857 | 440 | done3: |
3919ced0 | 441 | return (error); |
133aabc4 MD |
442 | } |
443 | ||
444 | /* | |
d3313941 | 445 | * vmspace_mcontrol(id, addr, len, behav, value) |
133aabc4 | 446 | * |
d3313941 | 447 | * madvise/mcontrol support for a vmspace. |
3919ced0 | 448 | * |
af2b4857 | 449 | * No requirements. |
133aabc4 MD |
450 | */ |
451 | int | |
80d831e1 MD |
452 | sys_vmspace_mcontrol(struct sysmsg *sysmsg, |
453 | const struct vmspace_mcontrol_args *uap) | |
d3313941 | 454 | { |
39005e16 | 455 | struct vkernel_proc *vkp; |
d3313941 | 456 | struct vmspace_entry *ve; |
d95d5e03 | 457 | struct lwp *lp; |
d3313941 | 458 | vm_offset_t start, end; |
e54488bb | 459 | vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; |
3919ced0 | 460 | int error; |
d3313941 | 461 | |
d95d5e03 | 462 | lp = curthread->td_lwp; |
3919ced0 MD |
463 | if ((vkp = curproc->p_vkernel) == NULL) { |
464 | error = EINVAL; | |
af2b4857 | 465 | goto done3; |
3919ced0 | 466 | } |
d95d5e03 MD |
467 | |
468 | if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { | |
3919ced0 | 469 | error = ENOENT; |
d95d5e03 | 470 | goto done3; |
3919ced0 | 471 | } |
d3313941 MD |
472 | |
473 | /* | |
474 | * This code is basically copied from sys_mcontrol() | |
475 | */ | |
3919ced0 MD |
476 | if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { |
477 | error = EINVAL; | |
af2b4857 | 478 | goto done1; |
3919ced0 | 479 | } |
d3313941 | 480 | |
3919ced0 MD |
481 | if (tmpaddr < (vm_offset_t)uap->addr) { |
482 | error = EINVAL; | |
af2b4857 | 483 | goto done1; |
3919ced0 MD |
484 | } |
485 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { | |
486 | error = EINVAL; | |
af2b4857 | 487 | goto done1; |
3919ced0 MD |
488 | } |
489 | if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { | |
490 | error = EINVAL; | |
af2b4857 | 491 | goto done1; |
3919ced0 | 492 | } |
d3313941 MD |
493 | |
494 | start = trunc_page((vm_offset_t) uap->addr); | |
e54488bb | 495 | end = round_page(tmpaddr); |
d3313941 | 496 | |
3919ced0 MD |
497 | error = vm_map_madvise(&ve->vmspace->vm_map, start, end, |
498 | uap->behav, uap->value); | |
af2b4857 | 499 | done1: |
d95d5e03 | 500 | vmspace_entry_drop(ve); |
af2b4857 | 501 | done3: |
3919ced0 | 502 | return (error); |
d3313941 MD |
503 | } |
504 | ||
505 | /* | |
506 | * Red black tree functions | |
507 | */ | |
508 | static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); | |
509 | RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); | |
510 | ||
af2b4857 MD |
511 | /* |
512 | * a->start is address, and the only field has to be initialized. | |
513 | * The caller must hold vkp->token. | |
514 | * | |
515 | * The caller must hold vkp->token. | |
516 | */ | |
d3313941 MD |
517 | static int |
518 | rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) | |
519 | { | |
520 | if ((char *)a->id < (char *)b->id) | |
521 | return(-1); | |
522 | else if ((char *)a->id > (char *)b->id) | |
523 | return(1); | |
524 | return(0); | |
525 | } | |
526 | ||
af2b4857 MD |
527 | /* |
528 | * The caller must hold vkp->token. | |
529 | */ | |
d3313941 MD |
530 | static |
531 | int | |
532 | rb_vmspace_delete(struct vmspace_entry *ve, void *data) | |
533 | { | |
39005e16 | 534 | struct vkernel_proc *vkp = data; |
4a22e893 | 535 | |
1e0d343f MD |
536 | if (vmspace_entry_delete(ve, vkp, 0) == 0) |
537 | vmspace_entry_cache_drop(ve); | |
538 | else | |
76f1911e | 539 | panic("rb_vmspace_delete: invalid refs %d", ve->refs); |
4a22e893 MD |
540 | return(0); |
541 | } | |
542 | ||
543 | /* | |
544 | * Remove a vmspace_entry from the RB tree and destroy it. We have to clean | |
1e0d343f MD |
545 | * up the pmap, the vm_map, then destroy the vmspace. We gain control of |
546 | * the associated cache_refs ref, which the caller will drop for us. | |
af2b4857 | 547 | * |
76f1911e | 548 | * The ve must not have any active references other than those from the |
1e0d343f MD |
549 | * caller. If it does, EBUSY is returned. The ve may still maintain |
550 | * any number of cache references which will drop as the related LWPs | |
551 | * execute vmspace operations or exit. | |
76f1911e | 552 | * |
1e0d343f MD |
553 | * 0 is returned on success, EBUSY on failure. On success the caller must |
554 | * drop the last cache_refs. We have dropped the callers active refs. | |
af2b4857 MD |
555 | * |
556 | * The caller must hold vkp->token. | |
4a22e893 MD |
557 | */ |
558 | static | |
76f1911e MD |
559 | int |
560 | vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp, | |
561 | int refs) | |
562 | { | |
1e0d343f MD |
563 | /* |
564 | * Interlocked by vkp->token. | |
565 | * | |
566 | * Drop the callers refs and set VKE_REF_DELETED atomically, if | |
567 | * the remaining refs match exactly. Dropping refs and setting | |
568 | * the DELETED flag atomically protects other threads from trying | |
569 | * to use the ve. | |
570 | * | |
571 | * The caller now owns the final cache_ref that was previously | |
572 | * associated with the live state of the ve. | |
573 | */ | |
574 | if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) { | |
575 | KKASSERT(ve->refs >= refs); | |
576 | return EBUSY; | |
07511847 | 577 | } |
1e0d343f | 578 | RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); |
07511847 | 579 | |
1e0d343f MD |
580 | pmap_remove_pages(vmspace_pmap(ve->vmspace), |
581 | VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); | |
582 | vm_map_remove(&ve->vmspace->vm_map, | |
583 | VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); | |
584 | vmspace_rel(ve->vmspace); | |
585 | ve->vmspace = NULL; /* safety */ | |
76f1911e | 586 | |
1e0d343f | 587 | return 0; |
76f1911e MD |
588 | } |
589 | ||
590 | /* | |
591 | * Ref a ve for cache purposes | |
592 | */ | |
593 | static | |
4a22e893 | 594 | void |
76f1911e | 595 | vmspace_entry_cache_ref(struct vmspace_entry *ve) |
4a22e893 | 596 | { |
76f1911e MD |
597 | atomic_add_int(&ve->cache_refs, 1); |
598 | } | |
599 | ||
600 | /* | |
1e0d343f MD |
601 | * The ve cache_drop is the final word for a ve. It gains an extra ref |
602 | * representing it being on the RB tree and not being in a deleted state. | |
603 | * Removal from the RB tree and deletion manipulate this ref. The last | |
604 | * drop will thus include full deletion of the ve in addition to the last | |
605 | * cached user going away. | |
76f1911e MD |
606 | */ |
607 | static | |
608 | void | |
609 | vmspace_entry_cache_drop(struct vmspace_entry *ve) | |
610 | { | |
611 | if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) { | |
1e0d343f MD |
612 | KKASSERT(ve->refs & VKE_REF_DELETED); |
613 | kfree(ve, M_VKERNEL); | |
76f1911e | 614 | } |
d95d5e03 MD |
615 | } |
616 | ||
76f1911e MD |
617 | /* |
618 | * Drop primary reference. The ve cannot be freed on the 1->0 transition. | |
619 | * Instead, ve deletion interlocks the final kfree() via cache_refs. | |
620 | */ | |
d95d5e03 MD |
621 | static |
622 | void | |
623 | vmspace_entry_drop(struct vmspace_entry *ve) | |
624 | { | |
76f1911e | 625 | atomic_fetchadd_int(&ve->refs, -1); |
d3313941 MD |
626 | } |
627 | ||
af2b4857 MD |
628 | /* |
629 | * Locate the ve for (id), return the ve or NULL. If found this function | |
630 | * will bump ve->refs which prevents the ve from being immediately destroyed | |
631 | * (but it can still be removed). | |
632 | * | |
d95d5e03 MD |
633 | * The cache can potentially contain a stale ve, check by testing ve->vmspace. |
634 | * | |
635 | * The caller must hold vkp->token if excl is non-zero. | |
af2b4857 | 636 | */ |
d3313941 MD |
637 | static |
638 | struct vmspace_entry * | |
d95d5e03 | 639 | vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl) |
d3313941 MD |
640 | { |
641 | struct vmspace_entry *ve; | |
642 | struct vmspace_entry key; | |
d95d5e03 MD |
643 | struct vkernel_lwp *vklp; |
644 | struct lwp *lp = curthread->td_lwp; | |
d3313941 | 645 | |
76f1911e MD |
646 | /* |
647 | * Cache check. Since we already hold a ref on the cache entry | |
648 | * the ve cannot be ripped out from under us while we cycle | |
649 | * ve->refs. | |
650 | */ | |
d95d5e03 MD |
651 | if ((vklp = lp->lwp_vkernel) != NULL) { |
652 | ve = vklp->ve_cache; | |
76f1911e MD |
653 | if (ve && ve->id == id) { |
654 | uint32_t n; | |
655 | ||
656 | /* | |
657 | * Bump active refs, check to see if the cache | |
658 | * entry is stale. If not, we are good. | |
659 | */ | |
660 | n = atomic_fetchadd_int(&ve->refs, 1); | |
661 | if ((n & VKE_REF_DELETED) == 0) { | |
662 | KKASSERT(ve->vmspace); | |
663 | return ve; | |
664 | } | |
665 | ||
666 | /* | |
667 | * Cache is stale, clean it out and fall through | |
668 | * to a normal search. | |
669 | */ | |
670 | vklp->ve_cache = NULL; | |
671 | vmspace_entry_drop(ve); | |
672 | vmspace_entry_cache_drop(ve); | |
673 | } | |
d95d5e03 | 674 | } |
76f1911e MD |
675 | |
676 | /* | |
677 | * Normal search protected by vkp->token. No new ve's can be marked | |
678 | * DELETED while we hold the token so we are safe. | |
679 | */ | |
680 | if (excl == 0) | |
681 | lwkt_gettoken_shared(&vkp->token); | |
682 | key.id = id; | |
683 | ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); | |
684 | if (ve) { | |
685 | if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) { | |
686 | vmspace_entry_drop(ve); | |
687 | ve = NULL; | |
d95d5e03 | 688 | } |
d95d5e03 | 689 | } |
76f1911e MD |
690 | if (excl == 0) |
691 | lwkt_reltoken(&vkp->token); | |
d3313941 MD |
692 | return (ve); |
693 | } | |
694 | ||
695 | /* | |
696 | * Manage vkernel refs, used by the kernel when fork()ing or exit()ing | |
697 | * a vkernel process. | |
af2b4857 MD |
698 | * |
699 | * No requirements. | |
d3313941 MD |
700 | */ |
701 | void | |
4a22e893 | 702 | vkernel_inherit(struct proc *p1, struct proc *p2) |
d3313941 | 703 | { |
39005e16 MD |
704 | struct vkernel_proc *vkp; |
705 | ||
706 | vkp = p1->p_vkernel; | |
707 | KKASSERT(vkp->refs > 0); | |
708 | atomic_add_int(&vkp->refs, 1); | |
709 | p2->p_vkernel = vkp; | |
d3313941 MD |
710 | } |
711 | ||
af2b4857 MD |
712 | /* |
713 | * No requirements. | |
714 | */ | |
d3313941 | 715 | void |
4a22e893 | 716 | vkernel_exit(struct proc *p) |
133aabc4 | 717 | { |
39005e16 | 718 | struct vkernel_proc *vkp; |
287ebb09 | 719 | struct lwp *lp; |
4a22e893 | 720 | |
39005e16 | 721 | vkp = p->p_vkernel; |
af2b4857 | 722 | |
4a22e893 MD |
723 | /* |
724 | * Restore the original VM context if we are killed while running | |
725 | * a different one. | |
4e7c41c5 MD |
726 | * |
727 | * This isn't supposed to happen. What is supposed to happen is | |
728 | * that the process should enter vkernel_trap() before the handling | |
729 | * the signal. | |
4a22e893 | 730 | */ |
3e291793 | 731 | RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { |
39005e16 | 732 | vkernel_lwp_exit(lp); |
d3313941 | 733 | } |
4a22e893 MD |
734 | |
735 | /* | |
736 | * Dereference the common area | |
737 | */ | |
39005e16 MD |
738 | p->p_vkernel = NULL; |
739 | KKASSERT(vkp->refs > 0); | |
4a22e893 | 740 | |
af2b4857 MD |
741 | if (atomic_fetchadd_int(&vkp->refs, -1) == 1) { |
742 | lwkt_gettoken(&vkp->token); | |
39005e16 MD |
743 | RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, |
744 | rb_vmspace_delete, vkp); | |
af2b4857 | 745 | lwkt_reltoken(&vkp->token); |
39005e16 MD |
746 | kfree(vkp, M_VKERNEL); |
747 | } | |
748 | } | |
749 | ||
af2b4857 MD |
750 | /* |
751 | * No requirements. | |
752 | */ | |
39005e16 MD |
753 | void |
754 | vkernel_lwp_exit(struct lwp *lp) | |
755 | { | |
756 | struct vkernel_lwp *vklp; | |
757 | struct vmspace_entry *ve; | |
758 | ||
759 | if ((vklp = lp->lwp_vkernel) != NULL) { | |
5229377c SW |
760 | /* |
761 | * vkernel thread | |
762 | */ | |
763 | if ((ve = vklp->ve) != NULL) { | |
764 | kprintf("Warning, pid %d killed with " | |
765 | "active VC!\n", lp->lwp_proc->p_pid); | |
766 | pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); | |
39005e16 | 767 | vklp->ve = NULL; |
5229377c SW |
768 | KKASSERT(ve->refs > 0); |
769 | vmspace_entry_drop(ve); | |
39005e16 | 770 | } |
d95d5e03 MD |
771 | if ((ve = vklp->ve_cache) != NULL) { |
772 | vklp->ve_cache = NULL; | |
76f1911e | 773 | vmspace_entry_cache_drop(ve); |
d95d5e03 MD |
774 | } |
775 | ||
39005e16 MD |
776 | lp->lwp_vkernel = NULL; |
777 | kfree(vklp, M_VKERNEL); | |
4a22e893 | 778 | } |
4a22e893 MD |
779 | } |
780 | ||
781 | /* | |
782 | * A VM space under virtual kernel control trapped out or made a system call | |
783 | * or otherwise needs to return control to the virtual kernel context. | |
af2b4857 MD |
784 | * |
785 | * No requirements. | |
4a22e893 | 786 | */ |
bb47c072 | 787 | void |
287ebb09 | 788 | vkernel_trap(struct lwp *lp, struct trapframe *frame) |
4a22e893 | 789 | { |
287ebb09 | 790 | struct proc *p = lp->lwp_proc; |
4a22e893 | 791 | struct vmspace_entry *ve; |
39005e16 | 792 | struct vkernel_lwp *vklp; |
4a22e893 MD |
793 | int error; |
794 | ||
4a22e893 MD |
795 | /* |
796 | * Which vmspace entry was running? | |
797 | */ | |
39005e16 MD |
798 | vklp = lp->lwp_vkernel; |
799 | KKASSERT(vklp); | |
4a22e893 | 800 | |
5229377c SW |
801 | ve = vklp->ve; |
802 | KKASSERT(ve != NULL); | |
4a22e893 | 803 | |
5229377c SW |
804 | /* |
805 | * Switch the LWP vmspace back to the virtual kernel's VM space. | |
806 | */ | |
807 | vklp->ve = NULL; | |
808 | pmap_setlwpvm(lp, p->p_vmspace); | |
809 | KKASSERT(ve->refs > 0); | |
810 | vmspace_entry_drop(ve); | |
811 | /* ve is invalid once we kill our ref */ | |
e1ea8b24 | 812 | |
4a22e893 | 813 | /* |
4e7c41c5 MD |
814 | * Copy the emulated process frame to the virtual kernel process. |
815 | * The emulated process cannot change TLS descriptors so don't | |
816 | * bother saving them, we already have a copy. | |
817 | * | |
818 | * Restore the virtual kernel's saved context so the virtual kernel | |
819 | * process can resume. | |
4a22e893 | 820 | */ |
39005e16 MD |
821 | error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); |
822 | bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); | |
823 | bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, | |
824 | sizeof(vklp->save_vextframe.vx_tls)); | |
4e7c41c5 | 825 | set_user_TLS(); |
bb47c072 | 826 | cpu_vkernel_trap(frame, error); |
133aabc4 | 827 | } |