Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * 3. All advertising materials mentioning features or use of this software | |
13 | * must display the following acknowledgement: | |
14 | * This product includes software developed by Adam Glass and Charles | |
15 | * Hannum. | |
16 | * 4. The names of the authors may not be used to endorse or promote products | |
17 | * derived from this software without specific prior written permission. | |
18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR | |
20 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
21 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
22 | * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
23 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
24 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
28 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | */ | |
30 | ||
31 | #include "opt_compat.h" | |
32 | #include "opt_sysvipc.h" | |
33 | ||
34 | #include <sys/param.h> | |
35 | #include <sys/systm.h> | |
36 | #include <sys/sysproto.h> | |
37 | #include <sys/kernel.h> | |
38 | #include <sys/sysctl.h> | |
39 | #include <sys/shm.h> | |
40 | #include <sys/proc.h> | |
41 | #include <sys/malloc.h> | |
42 | #include <sys/mman.h> | |
43 | #include <sys/stat.h> | |
44 | #include <sys/sysent.h> | |
45 | #include <sys/jail.h> | |
46 | ||
684a93c4 MD |
47 | #include <sys/mplock2.h> |
48 | ||
984263bc MD |
49 | #include <vm/vm.h> |
50 | #include <vm/vm_param.h> | |
51 | #include <sys/lock.h> | |
52 | #include <vm/pmap.h> | |
53 | #include <vm/vm_object.h> | |
54 | #include <vm/vm_map.h> | |
55 | #include <vm/vm_page.h> | |
56 | #include <vm/vm_pager.h> | |
57 | ||
58 | static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); | |
59 | ||
60 | struct oshmctl_args; | |
753fd850 | 61 | static int sys_oshmctl (struct proc *p, struct oshmctl_args *uap); |
984263bc | 62 | |
402ed7e1 RG |
63 | static int shmget_allocate_segment (struct proc *p, struct shmget_args *uap, int mode); |
64 | static int shmget_existing (struct proc *p, struct shmget_args *uap, int mode, int segnum); | |
984263bc MD |
65 | |
66 | /* XXX casting to (sy_call_t *) is bogus, as usual. */ | |
67 | static sy_call_t *shmcalls[] = { | |
753fd850 MD |
68 | (sy_call_t *)sys_shmat, (sy_call_t *)sys_oshmctl, |
69 | (sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget, | |
70 | (sy_call_t *)sys_shmctl | |
984263bc MD |
71 | }; |
72 | ||
73 | #define SHMSEG_FREE 0x0200 | |
74 | #define SHMSEG_REMOVED 0x0400 | |
75 | #define SHMSEG_ALLOCATED 0x0800 | |
76 | #define SHMSEG_WANTED 0x1000 | |
77 | ||
8ba5f7ef AH |
78 | static int shm_last_free, shm_committed, shmalloced; |
79 | int shm_nused; | |
984263bc MD |
80 | static struct shmid_ds *shmsegs; |
81 | ||
82 | struct shm_handle { | |
83 | /* vm_offset_t kva; */ | |
84 | vm_object_t shm_object; | |
85 | }; | |
86 | ||
87 | struct shmmap_state { | |
88 | vm_offset_t va; | |
89 | int shmid; | |
90 | }; | |
91 | ||
402ed7e1 RG |
92 | static void shm_deallocate_segment (struct shmid_ds *); |
93 | static int shm_find_segment_by_key (key_t); | |
94 | static struct shmid_ds *shm_find_segment_by_shmid (int); | |
95 | static int shm_delete_mapping (struct vmspace *vm, struct shmmap_state *); | |
96 | static void shmrealloc (void); | |
97 | static void shminit (void *); | |
984263bc MD |
98 | |
99 | /* | |
100 | * Tuneable values | |
101 | */ | |
984263bc MD |
102 | #ifndef SHMMIN |
103 | #define SHMMIN 1 | |
104 | #endif | |
105 | #ifndef SHMMNI | |
6e2ab4bf | 106 | #define SHMMNI 512 |
984263bc MD |
107 | #endif |
108 | #ifndef SHMSEG | |
6e2ab4bf | 109 | #define SHMSEG 1024 |
984263bc MD |
110 | #endif |
111 | ||
112 | struct shminfo shminfo = { | |
6e2ab4bf | 113 | 0, |
984263bc MD |
114 | SHMMIN, |
115 | SHMMNI, | |
116 | SHMSEG, | |
6e2ab4bf | 117 | 0 |
984263bc MD |
118 | }; |
119 | ||
4f1a1603 | 120 | static int shm_use_phys = 1; |
984263bc | 121 | |
7849af9f MD |
122 | TUNABLE_LONG("kern.ipc.shmmin", &shminfo.shmmin); |
123 | TUNABLE_LONG("kern.ipc.shmmni", &shminfo.shmmni); | |
124 | TUNABLE_LONG("kern.ipc.shmseg", &shminfo.shmseg); | |
125 | TUNABLE_LONG("kern.ipc.shmmaxpgs", &shminfo.shmall); | |
984263bc MD |
126 | TUNABLE_INT("kern.ipc.shm_use_phys", &shm_use_phys); |
127 | ||
7849af9f | 128 | SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, |
093e85dc | 129 | "Max shared memory segment size"); |
7849af9f | 130 | SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, |
093e85dc | 131 | "Min shared memory segment size"); |
7849af9f | 132 | SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, |
093e85dc | 133 | "Max number of shared memory identifiers"); |
7849af9f | 134 | SYSCTL_LONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, |
093e85dc | 135 | "Max shared memory segments per process"); |
7849af9f | 136 | SYSCTL_LONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, |
093e85dc SG |
137 | "Max pages of shared memory"); |
138 | SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, | |
139 | "Use phys pager allocation instead of swap pager allocation"); | |
984263bc MD |
140 | |
141 | static int | |
c972a82f | 142 | shm_find_segment_by_key(key_t key) |
984263bc MD |
143 | { |
144 | int i; | |
145 | ||
6e2ab4bf | 146 | for (i = 0; i < shmalloced; i++) { |
984263bc MD |
147 | if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) && |
148 | shmsegs[i].shm_perm.key == key) | |
149 | return i; | |
6e2ab4bf | 150 | } |
984263bc MD |
151 | return -1; |
152 | } | |
153 | ||
154 | static struct shmid_ds * | |
c972a82f | 155 | shm_find_segment_by_shmid(int shmid) |
984263bc MD |
156 | { |
157 | int segnum; | |
158 | struct shmid_ds *shmseg; | |
159 | ||
160 | segnum = IPCID_TO_IX(shmid); | |
161 | if (segnum < 0 || segnum >= shmalloced) | |
162 | return NULL; | |
163 | shmseg = &shmsegs[segnum]; | |
164 | if ((shmseg->shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED)) | |
165 | != SHMSEG_ALLOCATED || | |
6e2ab4bf | 166 | shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid)) { |
984263bc | 167 | return NULL; |
6e2ab4bf | 168 | } |
984263bc MD |
169 | return shmseg; |
170 | } | |
171 | ||
172 | static void | |
c972a82f | 173 | shm_deallocate_segment(struct shmid_ds *shmseg) |
984263bc MD |
174 | { |
175 | struct shm_handle *shm_handle; | |
176 | size_t size; | |
177 | ||
178 | shm_handle = shmseg->shm_internal; | |
179 | vm_object_deallocate(shm_handle->shm_object); | |
efda3bd0 | 180 | kfree((caddr_t)shm_handle, M_SHM); |
984263bc MD |
181 | shmseg->shm_internal = NULL; |
182 | size = round_page(shmseg->shm_segsz); | |
183 | shm_committed -= btoc(size); | |
184 | shm_nused--; | |
185 | shmseg->shm_perm.mode = SHMSEG_FREE; | |
186 | } | |
187 | ||
188 | static int | |
fef0fdf2 | 189 | shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) |
984263bc MD |
190 | { |
191 | struct shmid_ds *shmseg; | |
192 | int segnum, result; | |
193 | size_t size; | |
194 | ||
195 | segnum = IPCID_TO_IX(shmmap_s->shmid); | |
196 | shmseg = &shmsegs[segnum]; | |
197 | size = round_page(shmseg->shm_segsz); | |
fef0fdf2 | 198 | result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size); |
984263bc MD |
199 | if (result != KERN_SUCCESS) |
200 | return EINVAL; | |
201 | shmmap_s->shmid = -1; | |
202 | shmseg->shm_dtime = time_second; | |
203 | if ((--shmseg->shm_nattch <= 0) && | |
204 | (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { | |
205 | shm_deallocate_segment(shmseg); | |
206 | shm_last_free = segnum; | |
207 | } | |
208 | return 0; | |
209 | } | |
210 | ||
3919ced0 MD |
211 | /* |
212 | * MPALMOSTSAFE | |
213 | */ | |
984263bc | 214 | int |
753fd850 | 215 | sys_shmdt(struct shmdt_args *uap) |
984263bc | 216 | { |
9910d07b MD |
217 | struct thread *td = curthread; |
218 | struct proc *p = td->td_proc; | |
984263bc | 219 | struct shmmap_state *shmmap_s; |
7849af9f | 220 | long i; |
3919ced0 | 221 | int error; |
984263bc | 222 | |
9910d07b | 223 | if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) |
984263bc MD |
224 | return (ENOSYS); |
225 | ||
3919ced0 | 226 | get_mplock(); |
984263bc | 227 | shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; |
3919ced0 MD |
228 | if (shmmap_s == NULL) { |
229 | error = EINVAL; | |
230 | goto done; | |
231 | } | |
232 | for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { | |
984263bc MD |
233 | if (shmmap_s->shmid != -1 && |
234 | shmmap_s->va == (vm_offset_t)uap->shmaddr) | |
235 | break; | |
3919ced0 | 236 | } |
984263bc | 237 | if (i == shminfo.shmseg) |
3919ced0 MD |
238 | error = EINVAL; |
239 | else | |
240 | error = shm_delete_mapping(p->p_vmspace, shmmap_s); | |
241 | done: | |
242 | rel_mplock(); | |
243 | return (error); | |
984263bc MD |
244 | } |
245 | ||
3919ced0 MD |
246 | /* |
247 | * MPALMOSTSAFE | |
248 | */ | |
984263bc | 249 | int |
753fd850 | 250 | sys_shmat(struct shmat_args *uap) |
984263bc | 251 | { |
9910d07b MD |
252 | struct thread *td = curthread; |
253 | struct proc *p = td->td_proc; | |
7849af9f MD |
254 | int error, flags; |
255 | long i; | |
984263bc MD |
256 | struct shmid_ds *shmseg; |
257 | struct shmmap_state *shmmap_s = NULL; | |
258 | struct shm_handle *shm_handle; | |
259 | vm_offset_t attach_va; | |
260 | vm_prot_t prot; | |
261 | vm_size_t size; | |
262 | int rv; | |
263 | ||
9910d07b | 264 | if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) |
984263bc MD |
265 | return (ENOSYS); |
266 | ||
3919ced0 MD |
267 | get_mplock(); |
268 | again: | |
984263bc MD |
269 | shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; |
270 | if (shmmap_s == NULL) { | |
271 | size = shminfo.shmseg * sizeof(struct shmmap_state); | |
efda3bd0 | 272 | shmmap_s = kmalloc(size, M_SHM, M_WAITOK); |
984263bc MD |
273 | for (i = 0; i < shminfo.shmseg; i++) |
274 | shmmap_s[i].shmid = -1; | |
3919ced0 MD |
275 | if (p->p_vmspace->vm_shm != NULL) { |
276 | kfree(shmmap_s, M_SHM); | |
277 | goto again; | |
278 | } | |
984263bc MD |
279 | p->p_vmspace->vm_shm = (caddr_t)shmmap_s; |
280 | } | |
281 | shmseg = shm_find_segment_by_shmid(uap->shmid); | |
3919ced0 MD |
282 | if (shmseg == NULL) { |
283 | error = EINVAL; | |
284 | goto done; | |
285 | } | |
984263bc | 286 | error = ipcperm(p, &shmseg->shm_perm, |
3919ced0 | 287 | (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); |
984263bc | 288 | if (error) |
3919ced0 | 289 | goto done; |
984263bc MD |
290 | for (i = 0; i < shminfo.shmseg; i++) { |
291 | if (shmmap_s->shmid == -1) | |
292 | break; | |
293 | shmmap_s++; | |
294 | } | |
3919ced0 MD |
295 | if (i >= shminfo.shmseg) { |
296 | error = EMFILE; | |
297 | goto done; | |
298 | } | |
984263bc MD |
299 | size = round_page(shmseg->shm_segsz); |
300 | #ifdef VM_PROT_READ_IS_EXEC | |
301 | prot = VM_PROT_READ | VM_PROT_EXECUTE; | |
302 | #else | |
303 | prot = VM_PROT_READ; | |
304 | #endif | |
305 | if ((uap->shmflg & SHM_RDONLY) == 0) | |
306 | prot |= VM_PROT_WRITE; | |
307 | flags = MAP_ANON | MAP_SHARED; | |
308 | if (uap->shmaddr) { | |
309 | flags |= MAP_FIXED; | |
3919ced0 | 310 | if (uap->shmflg & SHM_RND) { |
984263bc | 311 | attach_va = (vm_offset_t)uap->shmaddr & ~(SHMLBA-1); |
3919ced0 | 312 | } else if (((vm_offset_t)uap->shmaddr & (SHMLBA-1)) == 0) { |
984263bc | 313 | attach_va = (vm_offset_t)uap->shmaddr; |
3919ced0 MD |
314 | } else { |
315 | error = EINVAL; | |
316 | goto done; | |
317 | } | |
984263bc | 318 | } else { |
3919ced0 MD |
319 | /* |
320 | * This is just a hint to vm_map_find() about where to put it. | |
321 | */ | |
984263bc MD |
322 | attach_va = round_page((vm_offset_t)p->p_vmspace->vm_taddr + maxtsiz + maxdsiz); |
323 | } | |
324 | ||
325 | shm_handle = shmseg->shm_internal; | |
b12defdc | 326 | vm_object_hold(shm_handle->shm_object); |
7050f2f3 | 327 | vm_object_chain_wait(shm_handle->shm_object); |
b12defdc | 328 | vm_object_reference_locked(shm_handle->shm_object); |
1b874851 MD |
329 | rv = vm_map_find(&p->p_vmspace->vm_map, |
330 | shm_handle->shm_object, 0, | |
9388fcaa MD |
331 | &attach_va, |
332 | size, PAGE_SIZE, | |
1b874851 MD |
333 | ((flags & MAP_FIXED) ? 0 : 1), |
334 | VM_MAPTYPE_NORMAL, | |
335 | prot, prot, | |
336 | 0); | |
b12defdc | 337 | vm_object_drop(shm_handle->shm_object); |
984263bc | 338 | if (rv != KERN_SUCCESS) { |
f23f0be6 | 339 | vm_object_deallocate(shm_handle->shm_object); |
3919ced0 MD |
340 | error = ENOMEM; |
341 | goto done; | |
984263bc MD |
342 | } |
343 | vm_map_inherit(&p->p_vmspace->vm_map, | |
3919ced0 | 344 | attach_va, attach_va + size, VM_INHERIT_SHARE); |
984263bc | 345 | |
3919ced0 | 346 | KKASSERT(shmmap_s->shmid == -1); |
984263bc MD |
347 | shmmap_s->va = attach_va; |
348 | shmmap_s->shmid = uap->shmid; | |
349 | shmseg->shm_lpid = p->p_pid; | |
350 | shmseg->shm_atime = time_second; | |
351 | shmseg->shm_nattch++; | |
65cac0a7 | 352 | uap->sysmsg_resultp = (void *)attach_va; |
3919ced0 MD |
353 | error = 0; |
354 | done: | |
355 | rel_mplock(); | |
356 | return error; | |
984263bc MD |
357 | } |
358 | ||
359 | struct oshmid_ds { | |
360 | struct ipc_perm shm_perm; /* operation perms */ | |
361 | int shm_segsz; /* size of segment (bytes) */ | |
362 | ushort shm_cpid; /* pid, creator */ | |
363 | ushort shm_lpid; /* pid, last operation */ | |
364 | short shm_nattch; /* no. of current attaches */ | |
365 | time_t shm_atime; /* last attach time */ | |
366 | time_t shm_dtime; /* last detach time */ | |
367 | time_t shm_ctime; /* last change time */ | |
368 | void *shm_handle; /* internal handle for shm segment */ | |
369 | }; | |
370 | ||
371 | struct oshmctl_args { | |
df2244e3 | 372 | struct sysmsg sysmsg; |
984263bc MD |
373 | int shmid; |
374 | int cmd; | |
375 | struct oshmid_ds *ubuf; | |
376 | }; | |
377 | ||
3919ced0 MD |
378 | /* |
379 | * MPALMOSTSAFE | |
380 | */ | |
984263bc | 381 | static int |
c972a82f | 382 | sys_oshmctl(struct proc *p, struct oshmctl_args *uap) |
984263bc MD |
383 | { |
384 | #ifdef COMPAT_43 | |
9910d07b | 385 | struct thread *td = curthread; |
984263bc MD |
386 | struct shmid_ds *shmseg; |
387 | struct oshmid_ds outbuf; | |
9910d07b | 388 | int error; |
984263bc | 389 | |
9910d07b | 390 | if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) |
984263bc MD |
391 | return (ENOSYS); |
392 | ||
3919ced0 | 393 | get_mplock(); |
984263bc | 394 | shmseg = shm_find_segment_by_shmid(uap->shmid); |
3919ced0 MD |
395 | if (shmseg == NULL) { |
396 | error = EINVAL; | |
397 | goto done; | |
398 | } | |
399 | ||
984263bc MD |
400 | switch (uap->cmd) { |
401 | case IPC_STAT: | |
402 | error = ipcperm(p, &shmseg->shm_perm, IPC_R); | |
403 | if (error) | |
3919ced0 | 404 | break; |
984263bc MD |
405 | outbuf.shm_perm = shmseg->shm_perm; |
406 | outbuf.shm_segsz = shmseg->shm_segsz; | |
407 | outbuf.shm_cpid = shmseg->shm_cpid; | |
408 | outbuf.shm_lpid = shmseg->shm_lpid; | |
409 | outbuf.shm_nattch = shmseg->shm_nattch; | |
410 | outbuf.shm_atime = shmseg->shm_atime; | |
411 | outbuf.shm_dtime = shmseg->shm_dtime; | |
412 | outbuf.shm_ctime = shmseg->shm_ctime; | |
413 | outbuf.shm_handle = shmseg->shm_internal; | |
414 | error = copyout((caddr_t)&outbuf, uap->ubuf, sizeof(outbuf)); | |
984263bc MD |
415 | break; |
416 | default: | |
417 | /* XXX casting to (sy_call_t *) is bogus, as usual. */ | |
3919ced0 | 418 | error = sys_shmctl((struct shmctl_args *)uap); |
984263bc | 419 | } |
3919ced0 MD |
420 | done: |
421 | rel_mplock(); | |
422 | return error; | |
984263bc MD |
423 | #else |
424 | return EINVAL; | |
425 | #endif | |
426 | } | |
427 | ||
3919ced0 MD |
428 | /* |
429 | * MPALMOSTSAFE | |
430 | */ | |
984263bc | 431 | int |
753fd850 | 432 | sys_shmctl(struct shmctl_args *uap) |
984263bc | 433 | { |
9910d07b MD |
434 | struct thread *td = curthread; |
435 | struct proc *p = td->td_proc; | |
984263bc MD |
436 | int error; |
437 | struct shmid_ds inbuf; | |
438 | struct shmid_ds *shmseg; | |
439 | ||
9910d07b | 440 | if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) |
984263bc MD |
441 | return (ENOSYS); |
442 | ||
3919ced0 | 443 | get_mplock(); |
984263bc | 444 | shmseg = shm_find_segment_by_shmid(uap->shmid); |
3919ced0 MD |
445 | if (shmseg == NULL) { |
446 | error = EINVAL; | |
447 | goto done; | |
448 | } | |
449 | ||
984263bc MD |
450 | switch (uap->cmd) { |
451 | case IPC_STAT: | |
452 | error = ipcperm(p, &shmseg->shm_perm, IPC_R); | |
3919ced0 MD |
453 | if (error == 0) |
454 | error = copyout(shmseg, uap->buf, sizeof(inbuf)); | |
984263bc MD |
455 | break; |
456 | case IPC_SET: | |
457 | error = ipcperm(p, &shmseg->shm_perm, IPC_M); | |
3919ced0 MD |
458 | if (error == 0) |
459 | error = copyin(uap->buf, &inbuf, sizeof(inbuf)); | |
460 | if (error == 0) { | |
461 | shmseg->shm_perm.uid = inbuf.shm_perm.uid; | |
462 | shmseg->shm_perm.gid = inbuf.shm_perm.gid; | |
463 | shmseg->shm_perm.mode = | |
464 | (shmseg->shm_perm.mode & ~ACCESSPERMS) | | |
465 | (inbuf.shm_perm.mode & ACCESSPERMS); | |
466 | shmseg->shm_ctime = time_second; | |
467 | } | |
984263bc MD |
468 | break; |
469 | case IPC_RMID: | |
470 | error = ipcperm(p, &shmseg->shm_perm, IPC_M); | |
3919ced0 MD |
471 | if (error == 0) { |
472 | shmseg->shm_perm.key = IPC_PRIVATE; | |
473 | shmseg->shm_perm.mode |= SHMSEG_REMOVED; | |
474 | if (shmseg->shm_nattch <= 0) { | |
475 | shm_deallocate_segment(shmseg); | |
476 | shm_last_free = IPCID_TO_IX(uap->shmid); | |
477 | } | |
984263bc MD |
478 | } |
479 | break; | |
480 | #if 0 | |
481 | case SHM_LOCK: | |
482 | case SHM_UNLOCK: | |
483 | #endif | |
484 | default: | |
3919ced0 MD |
485 | error = EINVAL; |
486 | break; | |
984263bc | 487 | } |
3919ced0 MD |
488 | done: |
489 | rel_mplock(); | |
490 | return error; | |
984263bc MD |
491 | } |
492 | ||
984263bc | 493 | static int |
c972a82f | 494 | shmget_existing(struct proc *p, struct shmget_args *uap, int mode, int segnum) |
984263bc MD |
495 | { |
496 | struct shmid_ds *shmseg; | |
497 | int error; | |
498 | ||
499 | shmseg = &shmsegs[segnum]; | |
500 | if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { | |
501 | /* | |
502 | * This segment is in the process of being allocated. Wait | |
503 | * until it's done, and look the key up again (in case the | |
504 | * allocation failed or it was freed). | |
505 | */ | |
506 | shmseg->shm_perm.mode |= SHMSEG_WANTED; | |
377d4740 | 507 | error = tsleep((caddr_t)shmseg, PCATCH, "shmget", 0); |
984263bc MD |
508 | if (error) |
509 | return error; | |
510 | return EAGAIN; | |
511 | } | |
512 | if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) | |
513 | return EEXIST; | |
514 | error = ipcperm(p, &shmseg->shm_perm, mode); | |
515 | if (error) | |
516 | return error; | |
517 | if (uap->size && uap->size > shmseg->shm_segsz) | |
518 | return EINVAL; | |
c7114eea | 519 | uap->sysmsg_result = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); |
984263bc MD |
520 | return 0; |
521 | } | |
522 | ||
523 | static int | |
c972a82f | 524 | shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode) |
984263bc | 525 | { |
d7c9bb18 MD |
526 | int i, segnum, shmid; |
527 | size_t size; | |
984263bc MD |
528 | struct ucred *cred = p->p_ucred; |
529 | struct shmid_ds *shmseg; | |
530 | struct shm_handle *shm_handle; | |
531 | ||
532 | if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) | |
533 | return EINVAL; | |
534 | if (shm_nused >= shminfo.shmmni) /* any shmids left? */ | |
535 | return ENOSPC; | |
536 | size = round_page(uap->size); | |
537 | if (shm_committed + btoc(size) > shminfo.shmall) | |
538 | return ENOMEM; | |
539 | if (shm_last_free < 0) { | |
540 | shmrealloc(); /* maybe expand the shmsegs[] array */ | |
6e2ab4bf | 541 | for (i = 0; i < shmalloced; i++) { |
984263bc MD |
542 | if (shmsegs[i].shm_perm.mode & SHMSEG_FREE) |
543 | break; | |
6e2ab4bf | 544 | } |
984263bc MD |
545 | if (i == shmalloced) |
546 | return ENOSPC; | |
547 | segnum = i; | |
548 | } else { | |
549 | segnum = shm_last_free; | |
550 | shm_last_free = -1; | |
551 | } | |
552 | shmseg = &shmsegs[segnum]; | |
553 | /* | |
554 | * In case we sleep in malloc(), mark the segment present but deleted | |
555 | * so that noone else tries to create the same key. | |
556 | */ | |
557 | shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; | |
558 | shmseg->shm_perm.key = uap->key; | |
559 | shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff; | |
efda3bd0 | 560 | shm_handle = kmalloc(sizeof(struct shm_handle), M_SHM, M_WAITOK); |
984263bc MD |
561 | shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); |
562 | ||
563 | /* | |
564 | * We make sure that we have allocated a pager before we need | |
565 | * to. | |
566 | */ | |
567 | if (shm_use_phys) { | |
568 | shm_handle->shm_object = | |
5a648714 | 569 | phys_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); |
984263bc MD |
570 | } else { |
571 | shm_handle->shm_object = | |
5a648714 | 572 | swap_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); |
984263bc MD |
573 | } |
574 | vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); | |
575 | vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); | |
576 | ||
577 | shmseg->shm_internal = shm_handle; | |
578 | shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; | |
579 | shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; | |
580 | shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | | |
581 | (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; | |
582 | shmseg->shm_segsz = uap->size; | |
583 | shmseg->shm_cpid = p->p_pid; | |
584 | shmseg->shm_lpid = shmseg->shm_nattch = 0; | |
585 | shmseg->shm_atime = shmseg->shm_dtime = 0; | |
586 | shmseg->shm_ctime = time_second; | |
587 | shm_committed += btoc(size); | |
588 | shm_nused++; | |
54341a3b MD |
589 | |
590 | /* | |
591 | * If a physical mapping is desired and we have a ton of free pages | |
592 | * we pre-allocate the pages here in order to avoid on-the-fly | |
593 | * allocation later. This has a big effect on database warm-up | |
594 | * times since DFly supports concurrent page faults coming from the | |
595 | * same VM object for pages which already exist. | |
596 | * | |
597 | * This can hang the kernel for a while so only do it if shm_use_phys | |
598 | * is set to 2 or higher. | |
599 | */ | |
600 | if (shm_use_phys > 1) { | |
601 | vm_pindex_t pi, pmax; | |
602 | vm_page_t m; | |
603 | ||
604 | pmax = round_page(shmseg->shm_segsz) >> PAGE_SHIFT; | |
605 | vm_object_hold(shm_handle->shm_object); | |
606 | if (pmax > vmstats.v_free_count) | |
607 | pmax = vmstats.v_free_count; | |
608 | for (pi = 0; pi < pmax; ++pi) { | |
609 | m = vm_page_grab(shm_handle->shm_object, pi, | |
610 | VM_ALLOC_SYSTEM | VM_ALLOC_NULL_OK | | |
611 | VM_ALLOC_ZERO); | |
612 | if (m == NULL) | |
613 | break; | |
614 | vm_pager_get_page(shm_handle->shm_object, &m, 1); | |
615 | vm_page_activate(m); | |
616 | vm_page_wakeup(m); | |
617 | lwkt_yield(); | |
618 | } | |
619 | vm_object_drop(shm_handle->shm_object); | |
620 | } | |
621 | ||
984263bc MD |
622 | if (shmseg->shm_perm.mode & SHMSEG_WANTED) { |
623 | /* | |
624 | * Somebody else wanted this key while we were asleep. Wake | |
625 | * them up now. | |
626 | */ | |
627 | shmseg->shm_perm.mode &= ~SHMSEG_WANTED; | |
628 | wakeup((caddr_t)shmseg); | |
629 | } | |
c7114eea | 630 | uap->sysmsg_result = shmid; |
984263bc MD |
631 | return 0; |
632 | } | |
633 | ||
3919ced0 MD |
634 | /* |
635 | * MPALMOSTSAFE | |
636 | */ | |
984263bc | 637 | int |
753fd850 | 638 | sys_shmget(struct shmget_args *uap) |
984263bc | 639 | { |
9910d07b MD |
640 | struct thread *td = curthread; |
641 | struct proc *p = td->td_proc; | |
984263bc MD |
642 | int segnum, mode, error; |
643 | ||
9910d07b | 644 | if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) |
984263bc MD |
645 | return (ENOSYS); |
646 | ||
647 | mode = uap->shmflg & ACCESSPERMS; | |
3919ced0 MD |
648 | get_mplock(); |
649 | ||
984263bc MD |
650 | if (uap->key != IPC_PRIVATE) { |
651 | again: | |
652 | segnum = shm_find_segment_by_key(uap->key); | |
653 | if (segnum >= 0) { | |
654 | error = shmget_existing(p, uap, mode, segnum); | |
655 | if (error == EAGAIN) | |
656 | goto again; | |
3919ced0 MD |
657 | goto done; |
658 | } | |
659 | if ((uap->shmflg & IPC_CREAT) == 0) { | |
660 | error = ENOENT; | |
661 | goto done; | |
984263bc | 662 | } |
984263bc | 663 | } |
3919ced0 MD |
664 | error = shmget_allocate_segment(p, uap, mode); |
665 | done: | |
666 | rel_mplock(); | |
667 | return (error); | |
984263bc MD |
668 | } |
669 | ||
41c20dac | 670 | /* |
3919ced0 MD |
671 | * shmsys_args(int which, int a2, ...) (VARARGS) |
672 | * | |
673 | * MPALMOSTSAFE | |
41c20dac | 674 | */ |
984263bc | 675 | int |
753fd850 | 676 | sys_shmsys(struct shmsys_args *uap) |
984263bc | 677 | { |
9910d07b | 678 | struct thread *td = curthread; |
180508ff | 679 | unsigned int which = (unsigned int)uap->which; |
df44673d | 680 | int error; |
984263bc | 681 | |
9910d07b | 682 | if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) |
984263bc MD |
683 | return (ENOSYS); |
684 | ||
c157ff7a | 685 | if (which >= NELEM(shmcalls)) |
984263bc | 686 | return EINVAL; |
3919ced0 | 687 | get_mplock(); |
df44673d MD |
688 | bcopy(&uap->a2, &uap->which, |
689 | sizeof(struct shmsys_args) - offsetof(struct shmsys_args, a2)); | |
690 | error = ((*shmcalls[which])(uap)); | |
3919ced0 MD |
691 | rel_mplock(); |
692 | ||
df44673d | 693 | return(error); |
984263bc MD |
694 | } |
695 | ||
696 | void | |
c972a82f | 697 | shmfork(struct proc *p1, struct proc *p2) |
984263bc MD |
698 | { |
699 | struct shmmap_state *shmmap_s; | |
700 | size_t size; | |
701 | int i; | |
702 | ||
5c7ffcc8 | 703 | get_mplock(); |
984263bc | 704 | size = shminfo.shmseg * sizeof(struct shmmap_state); |
efda3bd0 | 705 | shmmap_s = kmalloc(size, M_SHM, M_WAITOK); |
984263bc MD |
706 | bcopy((caddr_t)p1->p_vmspace->vm_shm, (caddr_t)shmmap_s, size); |
707 | p2->p_vmspace->vm_shm = (caddr_t)shmmap_s; | |
5c7ffcc8 | 708 | for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { |
984263bc MD |
709 | if (shmmap_s->shmid != -1) |
710 | shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; | |
5c7ffcc8 MD |
711 | } |
712 | rel_mplock(); | |
984263bc MD |
713 | } |
714 | ||
715 | void | |
fef0fdf2 | 716 | shmexit(struct vmspace *vm) |
984263bc | 717 | { |
fef0fdf2 | 718 | struct shmmap_state *base, *shm; |
984263bc MD |
719 | int i; |
720 | ||
fef0fdf2 MD |
721 | if ((base = (struct shmmap_state *)vm->vm_shm) != NULL) { |
722 | vm->vm_shm = NULL; | |
5c7ffcc8 | 723 | get_mplock(); |
fef0fdf2 MD |
724 | for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) { |
725 | if (shm->shmid != -1) | |
726 | shm_delete_mapping(vm, shm); | |
727 | } | |
efda3bd0 | 728 | kfree(base, M_SHM); |
5c7ffcc8 | 729 | rel_mplock(); |
fef0fdf2 | 730 | } |
984263bc MD |
731 | } |
732 | ||
733 | static void | |
734 | shmrealloc(void) | |
735 | { | |
736 | int i; | |
737 | struct shmid_ds *newsegs; | |
738 | ||
739 | if (shmalloced >= shminfo.shmmni) | |
740 | return; | |
741 | ||
efda3bd0 | 742 | newsegs = kmalloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK); |
984263bc MD |
743 | for (i = 0; i < shmalloced; i++) |
744 | bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0])); | |
745 | for (; i < shminfo.shmmni; i++) { | |
746 | shmsegs[i].shm_perm.mode = SHMSEG_FREE; | |
747 | shmsegs[i].shm_perm.seq = 0; | |
748 | } | |
efda3bd0 | 749 | kfree(shmsegs, M_SHM); |
984263bc MD |
750 | shmsegs = newsegs; |
751 | shmalloced = shminfo.shmmni; | |
752 | } | |
753 | ||
754 | static void | |
c972a82f | 755 | shminit(void *dummy) |
984263bc MD |
756 | { |
757 | int i; | |
758 | ||
6e2ab4bf MD |
759 | /* |
760 | * If not overridden by a tunable set the maximum shm to | |
761 | * 2/3 of main memory. | |
762 | */ | |
763 | if (shminfo.shmall == 0) | |
764 | shminfo.shmall = (size_t)vmstats.v_page_count * 2 / 3; | |
765 | ||
984263bc MD |
766 | shminfo.shmmax = shminfo.shmall * PAGE_SIZE; |
767 | shmalloced = shminfo.shmmni; | |
efda3bd0 | 768 | shmsegs = kmalloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK); |
984263bc MD |
769 | for (i = 0; i < shmalloced; i++) { |
770 | shmsegs[i].shm_perm.mode = SHMSEG_FREE; | |
771 | shmsegs[i].shm_perm.seq = 0; | |
772 | } | |
773 | shm_last_free = 0; | |
774 | shm_nused = 0; | |
775 | shm_committed = 0; | |
776 | } | |
777 | SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL); |