Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1996 John S. Dyson | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice immediately at the beginning of the file, without modification, | |
10 | * this list of conditions, and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * 3. Absolutely no warranty of function or purpose is made by the author | |
15 | * John S. Dyson. | |
16 | * 4. Modifications may be freely made to this file if the above conditions | |
17 | * are met. | |
18 | * | |
19 | * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ | |
dadab5e9 | 20 | * $DragonFly: src/sys/kern/sys_pipe.c,v 1.4 2003/06/25 03:55:57 dillon Exp $ |
984263bc MD |
21 | */ |
22 | ||
23 | /* | |
24 | * This file contains a high-performance replacement for the socket-based | |
25 | * pipes scheme originally used in FreeBSD/4.4Lite. It does not support | |
26 | * all features of sockets, but does do everything that pipes normally | |
27 | * do. | |
28 | */ | |
29 | ||
30 | /* | |
31 | * This code has two modes of operation, a small write mode and a large | |
32 | * write mode. The small write mode acts like conventional pipes with | |
33 | * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the | |
34 | * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT | |
35 | * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and | |
36 | * the receiving process can copy it directly from the pages in the sending | |
37 | * process. | |
38 | * | |
39 | * If the sending process receives a signal, it is possible that it will | |
40 | * go away, and certainly its address space can change, because control | |
41 | * is returned back to the user-mode side. In that case, the pipe code | |
42 | * arranges to copy the buffer supplied by the user process, to a pageable | |
43 | * kernel buffer, and the receiving process will grab the data from the | |
44 | * pageable kernel buffer. Since signals don't happen all that often, | |
45 | * the copy operation is normally eliminated. | |
46 | * | |
47 | * The constant PIPE_MINDIRECT is chosen to make sure that buffering will | |
48 | * happen for small transfers so that the system will not spend all of | |
49 | * its time context switching. PIPE_SIZE is constrained by the | |
50 | * amount of kernel virtual memory. | |
51 | */ | |
52 | ||
53 | #include <sys/param.h> | |
54 | #include <sys/systm.h> | |
55 | #include <sys/proc.h> | |
56 | #include <sys/fcntl.h> | |
57 | #include <sys/file.h> | |
58 | #include <sys/filedesc.h> | |
59 | #include <sys/filio.h> | |
60 | #include <sys/ttycom.h> | |
61 | #include <sys/stat.h> | |
62 | #include <sys/poll.h> | |
63 | #include <sys/select.h> | |
64 | #include <sys/signalvar.h> | |
65 | #include <sys/sysproto.h> | |
66 | #include <sys/pipe.h> | |
67 | #include <sys/vnode.h> | |
68 | #include <sys/uio.h> | |
69 | #include <sys/event.h> | |
70 | ||
71 | #include <vm/vm.h> | |
72 | #include <vm/vm_param.h> | |
73 | #include <sys/lock.h> | |
74 | #include <vm/vm_object.h> | |
75 | #include <vm/vm_kern.h> | |
76 | #include <vm/vm_extern.h> | |
77 | #include <vm/pmap.h> | |
78 | #include <vm/vm_map.h> | |
79 | #include <vm/vm_page.h> | |
80 | #include <vm/vm_zone.h> | |
81 | ||
dadab5e9 MD |
82 | #include <sys/file2.h> |
83 | ||
984263bc MD |
84 | /* |
85 | * Use this define if you want to disable *fancy* VM things. Expect an | |
86 | * approx 30% decrease in transfer rate. This could be useful for | |
87 | * NetBSD or OpenBSD. | |
88 | */ | |
89 | /* #define PIPE_NODIRECT */ | |
90 | ||
91 | /* | |
92 | * interfaces to the outside world | |
93 | */ | |
94 | static int pipe_read __P((struct file *fp, struct uio *uio, | |
dadab5e9 | 95 | struct ucred *cred, int flags, struct thread *td)); |
984263bc | 96 | static int pipe_write __P((struct file *fp, struct uio *uio, |
dadab5e9 MD |
97 | struct ucred *cred, int flags, struct thread *td)); |
98 | static int pipe_close __P((struct file *fp, struct thread *td)); | |
984263bc | 99 | static int pipe_poll __P((struct file *fp, int events, struct ucred *cred, |
dadab5e9 | 100 | struct thread *td)); |
984263bc | 101 | static int pipe_kqfilter __P((struct file *fp, struct knote *kn)); |
dadab5e9 MD |
102 | static int pipe_stat __P((struct file *fp, struct stat *sb, struct thread *td)); |
103 | static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct thread *td)); | |
984263bc MD |
104 | |
105 | static struct fileops pipeops = { | |
106 | pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, | |
107 | pipe_stat, pipe_close | |
108 | }; | |
109 | ||
110 | static void filt_pipedetach(struct knote *kn); | |
111 | static int filt_piperead(struct knote *kn, long hint); | |
112 | static int filt_pipewrite(struct knote *kn, long hint); | |
113 | ||
114 | static struct filterops pipe_rfiltops = | |
115 | { 1, NULL, filt_pipedetach, filt_piperead }; | |
116 | static struct filterops pipe_wfiltops = | |
117 | { 1, NULL, filt_pipedetach, filt_pipewrite }; | |
118 | ||
119 | ||
120 | /* | |
121 | * Default pipe buffer size(s), this can be kind-of large now because pipe | |
122 | * space is pageable. The pipe code will try to maintain locality of | |
123 | * reference for performance reasons, so small amounts of outstanding I/O | |
124 | * will not wipe the cache. | |
125 | */ | |
126 | #define MINPIPESIZE (PIPE_SIZE/3) | |
127 | #define MAXPIPESIZE (2*PIPE_SIZE/3) | |
128 | ||
129 | /* | |
130 | * Maximum amount of kva for pipes -- this is kind-of a soft limit, but | |
131 | * is there so that on large systems, we don't exhaust it. | |
132 | */ | |
133 | #define MAXPIPEKVA (8*1024*1024) | |
134 | ||
135 | /* | |
136 | * Limit for direct transfers, we cannot, of course limit | |
137 | * the amount of kva for pipes in general though. | |
138 | */ | |
139 | #define LIMITPIPEKVA (16*1024*1024) | |
140 | ||
141 | /* | |
142 | * Limit the number of "big" pipes | |
143 | */ | |
144 | #define LIMITBIGPIPES 32 | |
145 | static int nbigpipe; | |
146 | ||
147 | static int amountpipekva; | |
148 | ||
149 | static void pipeclose __P((struct pipe *cpipe)); | |
150 | static void pipe_free_kmem __P((struct pipe *cpipe)); | |
151 | static int pipe_create __P((struct pipe **cpipep)); | |
152 | static __inline int pipelock __P((struct pipe *cpipe, int catch)); | |
153 | static __inline void pipeunlock __P((struct pipe *cpipe)); | |
154 | static __inline void pipeselwakeup __P((struct pipe *cpipe)); | |
155 | #ifndef PIPE_NODIRECT | |
156 | static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); | |
157 | static void pipe_destroy_write_buffer __P((struct pipe *wpipe)); | |
158 | static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); | |
159 | static void pipe_clone_write_buffer __P((struct pipe *wpipe)); | |
160 | #endif | |
161 | static int pipespace __P((struct pipe *cpipe, int size)); | |
162 | ||
163 | static vm_zone_t pipe_zone; | |
164 | ||
165 | /* | |
166 | * The pipe system call for the DTYPE_PIPE type of pipes | |
41c20dac MD |
167 | * |
168 | * pipe_ARgs(int dummy) | |
984263bc MD |
169 | */ |
170 | ||
171 | /* ARGSUSED */ | |
172 | int | |
41c20dac | 173 | pipe(struct pipe_args *uap) |
984263bc | 174 | { |
dadab5e9 MD |
175 | struct thread *td = curthread; |
176 | struct proc *p = td->td_proc; | |
177 | struct filedesc *fdp; | |
984263bc MD |
178 | struct file *rf, *wf; |
179 | struct pipe *rpipe, *wpipe; | |
180 | int fd, error; | |
181 | ||
dadab5e9 MD |
182 | KKASSERT(p); |
183 | fdp = p->p_fd; | |
184 | ||
984263bc MD |
185 | if (pipe_zone == NULL) |
186 | pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); | |
187 | ||
188 | rpipe = wpipe = NULL; | |
189 | if (pipe_create(&rpipe) || pipe_create(&wpipe)) { | |
190 | pipeclose(rpipe); | |
191 | pipeclose(wpipe); | |
192 | return (ENFILE); | |
193 | } | |
194 | ||
195 | rpipe->pipe_state |= PIPE_DIRECTOK; | |
196 | wpipe->pipe_state |= PIPE_DIRECTOK; | |
197 | ||
198 | error = falloc(p, &rf, &fd); | |
199 | if (error) { | |
200 | pipeclose(rpipe); | |
201 | pipeclose(wpipe); | |
202 | return (error); | |
203 | } | |
204 | fhold(rf); | |
205 | p->p_retval[0] = fd; | |
206 | ||
207 | /* | |
208 | * Warning: once we've gotten past allocation of the fd for the | |
209 | * read-side, we can only drop the read side via fdrop() in order | |
210 | * to avoid races against processes which manage to dup() the read | |
211 | * side while we are blocked trying to allocate the write side. | |
212 | */ | |
213 | rf->f_flag = FREAD | FWRITE; | |
214 | rf->f_type = DTYPE_PIPE; | |
215 | rf->f_data = (caddr_t)rpipe; | |
216 | rf->f_ops = &pipeops; | |
217 | error = falloc(p, &wf, &fd); | |
218 | if (error) { | |
219 | if (fdp->fd_ofiles[p->p_retval[0]] == rf) { | |
220 | fdp->fd_ofiles[p->p_retval[0]] = NULL; | |
dadab5e9 | 221 | fdrop(rf, td); |
984263bc | 222 | } |
dadab5e9 | 223 | fdrop(rf, td); |
984263bc MD |
224 | /* rpipe has been closed by fdrop(). */ |
225 | pipeclose(wpipe); | |
226 | return (error); | |
227 | } | |
228 | wf->f_flag = FREAD | FWRITE; | |
229 | wf->f_type = DTYPE_PIPE; | |
230 | wf->f_data = (caddr_t)wpipe; | |
231 | wf->f_ops = &pipeops; | |
232 | p->p_retval[1] = fd; | |
233 | ||
234 | rpipe->pipe_peer = wpipe; | |
235 | wpipe->pipe_peer = rpipe; | |
dadab5e9 | 236 | fdrop(rf, td); |
984263bc MD |
237 | |
238 | return (0); | |
239 | } | |
240 | ||
241 | /* | |
242 | * Allocate kva for pipe circular buffer, the space is pageable | |
243 | * This routine will 'realloc' the size of a pipe safely, if it fails | |
244 | * it will retain the old buffer. | |
245 | * If it fails it will return ENOMEM. | |
246 | */ | |
247 | static int | |
248 | pipespace(cpipe, size) | |
249 | struct pipe *cpipe; | |
250 | int size; | |
251 | { | |
252 | struct vm_object *object; | |
253 | caddr_t buffer; | |
254 | int npages, error; | |
255 | ||
256 | npages = round_page(size)/PAGE_SIZE; | |
257 | /* | |
258 | * Create an object, I don't like the idea of paging to/from | |
259 | * kernel_object. | |
260 | * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. | |
261 | */ | |
262 | object = vm_object_allocate(OBJT_DEFAULT, npages); | |
263 | buffer = (caddr_t) vm_map_min(kernel_map); | |
264 | ||
265 | /* | |
266 | * Insert the object into the kernel map, and allocate kva for it. | |
267 | * The map entry is, by default, pageable. | |
268 | * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. | |
269 | */ | |
270 | error = vm_map_find(kernel_map, object, 0, | |
271 | (vm_offset_t *) &buffer, size, 1, | |
272 | VM_PROT_ALL, VM_PROT_ALL, 0); | |
273 | ||
274 | if (error != KERN_SUCCESS) { | |
275 | vm_object_deallocate(object); | |
276 | return (ENOMEM); | |
277 | } | |
278 | ||
279 | /* free old resources if we're resizing */ | |
280 | pipe_free_kmem(cpipe); | |
281 | cpipe->pipe_buffer.object = object; | |
282 | cpipe->pipe_buffer.buffer = buffer; | |
283 | cpipe->pipe_buffer.size = size; | |
284 | cpipe->pipe_buffer.in = 0; | |
285 | cpipe->pipe_buffer.out = 0; | |
286 | cpipe->pipe_buffer.cnt = 0; | |
287 | amountpipekva += cpipe->pipe_buffer.size; | |
288 | return (0); | |
289 | } | |
290 | ||
291 | /* | |
292 | * initialize and allocate VM and memory for pipe | |
293 | */ | |
294 | static int | |
295 | pipe_create(cpipep) | |
296 | struct pipe **cpipep; | |
297 | { | |
298 | struct pipe *cpipe; | |
299 | int error; | |
300 | ||
301 | *cpipep = zalloc(pipe_zone); | |
302 | if (*cpipep == NULL) | |
303 | return (ENOMEM); | |
304 | ||
305 | cpipe = *cpipep; | |
306 | ||
307 | /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ | |
308 | cpipe->pipe_buffer.object = NULL; | |
309 | #ifndef PIPE_NODIRECT | |
310 | cpipe->pipe_map.kva = NULL; | |
311 | #endif | |
312 | /* | |
313 | * protect so pipeclose() doesn't follow a junk pointer | |
314 | * if pipespace() fails. | |
315 | */ | |
316 | bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); | |
317 | cpipe->pipe_state = 0; | |
318 | cpipe->pipe_peer = NULL; | |
319 | cpipe->pipe_busy = 0; | |
320 | ||
321 | #ifndef PIPE_NODIRECT | |
322 | /* | |
323 | * pipe data structure initializations to support direct pipe I/O | |
324 | */ | |
325 | cpipe->pipe_map.cnt = 0; | |
326 | cpipe->pipe_map.kva = 0; | |
327 | cpipe->pipe_map.pos = 0; | |
328 | cpipe->pipe_map.npages = 0; | |
329 | /* cpipe->pipe_map.ms[] = invalid */ | |
330 | #endif | |
331 | ||
332 | error = pipespace(cpipe, PIPE_SIZE); | |
333 | if (error) | |
334 | return (error); | |
335 | ||
336 | vfs_timestamp(&cpipe->pipe_ctime); | |
337 | cpipe->pipe_atime = cpipe->pipe_ctime; | |
338 | cpipe->pipe_mtime = cpipe->pipe_ctime; | |
339 | ||
340 | return (0); | |
341 | } | |
342 | ||
343 | ||
344 | /* | |
345 | * lock a pipe for I/O, blocking other access | |
346 | */ | |
347 | static __inline int | |
348 | pipelock(cpipe, catch) | |
349 | struct pipe *cpipe; | |
350 | int catch; | |
351 | { | |
352 | int error; | |
353 | ||
354 | while (cpipe->pipe_state & PIPE_LOCK) { | |
355 | cpipe->pipe_state |= PIPE_LWANT; | |
356 | error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, | |
357 | "pipelk", 0); | |
358 | if (error != 0) | |
359 | return (error); | |
360 | } | |
361 | cpipe->pipe_state |= PIPE_LOCK; | |
362 | return (0); | |
363 | } | |
364 | ||
365 | /* | |
366 | * unlock a pipe I/O lock | |
367 | */ | |
368 | static __inline void | |
369 | pipeunlock(cpipe) | |
370 | struct pipe *cpipe; | |
371 | { | |
372 | ||
373 | cpipe->pipe_state &= ~PIPE_LOCK; | |
374 | if (cpipe->pipe_state & PIPE_LWANT) { | |
375 | cpipe->pipe_state &= ~PIPE_LWANT; | |
376 | wakeup(cpipe); | |
377 | } | |
378 | } | |
379 | ||
380 | static __inline void | |
381 | pipeselwakeup(cpipe) | |
382 | struct pipe *cpipe; | |
383 | { | |
384 | ||
385 | if (cpipe->pipe_state & PIPE_SEL) { | |
386 | cpipe->pipe_state &= ~PIPE_SEL; | |
387 | selwakeup(&cpipe->pipe_sel); | |
388 | } | |
389 | if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) | |
390 | pgsigio(cpipe->pipe_sigio, SIGIO, 0); | |
391 | KNOTE(&cpipe->pipe_sel.si_note, 0); | |
392 | } | |
393 | ||
394 | /* ARGSUSED */ | |
395 | static int | |
dadab5e9 MD |
396 | pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, |
397 | int flags, struct thread *td) | |
984263bc MD |
398 | { |
399 | struct pipe *rpipe = (struct pipe *) fp->f_data; | |
400 | int error; | |
401 | int nread = 0; | |
402 | u_int size; | |
403 | ||
404 | ++rpipe->pipe_busy; | |
405 | error = pipelock(rpipe, 1); | |
406 | if (error) | |
407 | goto unlocked_error; | |
408 | ||
409 | while (uio->uio_resid) { | |
410 | /* | |
411 | * normal pipe buffer receive | |
412 | */ | |
413 | if (rpipe->pipe_buffer.cnt > 0) { | |
414 | size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; | |
415 | if (size > rpipe->pipe_buffer.cnt) | |
416 | size = rpipe->pipe_buffer.cnt; | |
417 | if (size > (u_int) uio->uio_resid) | |
418 | size = (u_int) uio->uio_resid; | |
419 | ||
420 | error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], | |
421 | size, uio); | |
422 | if (error) | |
423 | break; | |
424 | ||
425 | rpipe->pipe_buffer.out += size; | |
426 | if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) | |
427 | rpipe->pipe_buffer.out = 0; | |
428 | ||
429 | rpipe->pipe_buffer.cnt -= size; | |
430 | ||
431 | /* | |
432 | * If there is no more to read in the pipe, reset | |
433 | * its pointers to the beginning. This improves | |
434 | * cache hit stats. | |
435 | */ | |
436 | if (rpipe->pipe_buffer.cnt == 0) { | |
437 | rpipe->pipe_buffer.in = 0; | |
438 | rpipe->pipe_buffer.out = 0; | |
439 | } | |
440 | nread += size; | |
441 | #ifndef PIPE_NODIRECT | |
442 | /* | |
443 | * Direct copy, bypassing a kernel buffer. | |
444 | */ | |
445 | } else if ((size = rpipe->pipe_map.cnt) && | |
446 | (rpipe->pipe_state & PIPE_DIRECTW)) { | |
447 | caddr_t va; | |
448 | if (size > (u_int) uio->uio_resid) | |
449 | size = (u_int) uio->uio_resid; | |
450 | ||
451 | va = (caddr_t) rpipe->pipe_map.kva + | |
452 | rpipe->pipe_map.pos; | |
453 | error = uiomove(va, size, uio); | |
454 | if (error) | |
455 | break; | |
456 | nread += size; | |
457 | rpipe->pipe_map.pos += size; | |
458 | rpipe->pipe_map.cnt -= size; | |
459 | if (rpipe->pipe_map.cnt == 0) { | |
460 | rpipe->pipe_state &= ~PIPE_DIRECTW; | |
461 | wakeup(rpipe); | |
462 | } | |
463 | #endif | |
464 | } else { | |
465 | /* | |
466 | * detect EOF condition | |
467 | * read returns 0 on EOF, no need to set error | |
468 | */ | |
469 | if (rpipe->pipe_state & PIPE_EOF) | |
470 | break; | |
471 | ||
472 | /* | |
473 | * If the "write-side" has been blocked, wake it up now. | |
474 | */ | |
475 | if (rpipe->pipe_state & PIPE_WANTW) { | |
476 | rpipe->pipe_state &= ~PIPE_WANTW; | |
477 | wakeup(rpipe); | |
478 | } | |
479 | ||
480 | /* | |
481 | * Break if some data was read. | |
482 | */ | |
483 | if (nread > 0) | |
484 | break; | |
485 | ||
486 | /* | |
487 | * Unlock the pipe buffer for our remaining processing. We | |
488 | * will either break out with an error or we will sleep and | |
489 | * relock to loop. | |
490 | */ | |
491 | pipeunlock(rpipe); | |
492 | ||
493 | /* | |
494 | * Handle non-blocking mode operation or | |
495 | * wait for more data. | |
496 | */ | |
497 | if (fp->f_flag & FNONBLOCK) { | |
498 | error = EAGAIN; | |
499 | } else { | |
500 | rpipe->pipe_state |= PIPE_WANTR; | |
501 | if ((error = tsleep(rpipe, PRIBIO | PCATCH, | |
502 | "piperd", 0)) == 0) | |
503 | error = pipelock(rpipe, 1); | |
504 | } | |
505 | if (error) | |
506 | goto unlocked_error; | |
507 | } | |
508 | } | |
509 | pipeunlock(rpipe); | |
510 | ||
511 | if (error == 0) | |
512 | vfs_timestamp(&rpipe->pipe_atime); | |
513 | unlocked_error: | |
514 | --rpipe->pipe_busy; | |
515 | ||
516 | /* | |
517 | * PIPE_WANT processing only makes sense if pipe_busy is 0. | |
518 | */ | |
519 | if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { | |
520 | rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); | |
521 | wakeup(rpipe); | |
522 | } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { | |
523 | /* | |
524 | * Handle write blocking hysteresis. | |
525 | */ | |
526 | if (rpipe->pipe_state & PIPE_WANTW) { | |
527 | rpipe->pipe_state &= ~PIPE_WANTW; | |
528 | wakeup(rpipe); | |
529 | } | |
530 | } | |
531 | ||
532 | if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) | |
533 | pipeselwakeup(rpipe); | |
534 | ||
535 | return (error); | |
536 | } | |
537 | ||
538 | #ifndef PIPE_NODIRECT | |
539 | /* | |
540 | * Map the sending processes' buffer into kernel space and wire it. | |
541 | * This is similar to a physical write operation. | |
542 | */ | |
543 | static int | |
544 | pipe_build_write_buffer(wpipe, uio) | |
545 | struct pipe *wpipe; | |
546 | struct uio *uio; | |
547 | { | |
548 | u_int size; | |
549 | int i; | |
550 | vm_offset_t addr, endaddr, paddr; | |
551 | ||
552 | size = (u_int) uio->uio_iov->iov_len; | |
553 | if (size > wpipe->pipe_buffer.size) | |
554 | size = wpipe->pipe_buffer.size; | |
555 | ||
556 | endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); | |
557 | addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); | |
558 | for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { | |
559 | vm_page_t m; | |
560 | ||
561 | if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || | |
562 | (paddr = pmap_kextract(addr)) == 0) { | |
563 | int j; | |
564 | ||
565 | for (j = 0; j < i; j++) | |
566 | vm_page_unwire(wpipe->pipe_map.ms[j], 1); | |
567 | return (EFAULT); | |
568 | } | |
569 | ||
570 | m = PHYS_TO_VM_PAGE(paddr); | |
571 | vm_page_wire(m); | |
572 | wpipe->pipe_map.ms[i] = m; | |
573 | } | |
574 | ||
575 | /* | |
576 | * set up the control block | |
577 | */ | |
578 | wpipe->pipe_map.npages = i; | |
579 | wpipe->pipe_map.pos = | |
580 | ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; | |
581 | wpipe->pipe_map.cnt = size; | |
582 | ||
583 | /* | |
584 | * and map the buffer | |
585 | */ | |
586 | if (wpipe->pipe_map.kva == 0) { | |
587 | /* | |
588 | * We need to allocate space for an extra page because the | |
589 | * address range might (will) span pages at times. | |
590 | */ | |
591 | wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, | |
592 | wpipe->pipe_buffer.size + PAGE_SIZE); | |
593 | amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; | |
594 | } | |
595 | pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, | |
596 | wpipe->pipe_map.npages); | |
597 | ||
598 | /* | |
599 | * and update the uio data | |
600 | */ | |
601 | ||
602 | uio->uio_iov->iov_len -= size; | |
603 | uio->uio_iov->iov_base += size; | |
604 | if (uio->uio_iov->iov_len == 0) | |
605 | uio->uio_iov++; | |
606 | uio->uio_resid -= size; | |
607 | uio->uio_offset += size; | |
608 | return (0); | |
609 | } | |
610 | ||
611 | /* | |
612 | * unmap and unwire the process buffer | |
613 | */ | |
614 | static void | |
615 | pipe_destroy_write_buffer(wpipe) | |
616 | struct pipe *wpipe; | |
617 | { | |
618 | int i; | |
619 | ||
620 | if (wpipe->pipe_map.kva) { | |
621 | pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); | |
622 | ||
623 | if (amountpipekva > MAXPIPEKVA) { | |
624 | vm_offset_t kva = wpipe->pipe_map.kva; | |
625 | wpipe->pipe_map.kva = 0; | |
626 | kmem_free(kernel_map, kva, | |
627 | wpipe->pipe_buffer.size + PAGE_SIZE); | |
628 | amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; | |
629 | } | |
630 | } | |
631 | for (i = 0; i < wpipe->pipe_map.npages; i++) | |
632 | vm_page_unwire(wpipe->pipe_map.ms[i], 1); | |
633 | wpipe->pipe_map.npages = 0; | |
634 | } | |
635 | ||
636 | /* | |
637 | * In the case of a signal, the writing process might go away. This | |
638 | * code copies the data into the circular buffer so that the source | |
639 | * pages can be freed without loss of data. | |
640 | */ | |
641 | static void | |
642 | pipe_clone_write_buffer(wpipe) | |
643 | struct pipe *wpipe; | |
644 | { | |
645 | int size; | |
646 | int pos; | |
647 | ||
648 | size = wpipe->pipe_map.cnt; | |
649 | pos = wpipe->pipe_map.pos; | |
650 | bcopy((caddr_t) wpipe->pipe_map.kva + pos, | |
651 | (caddr_t) wpipe->pipe_buffer.buffer, size); | |
652 | ||
653 | wpipe->pipe_buffer.in = size; | |
654 | wpipe->pipe_buffer.out = 0; | |
655 | wpipe->pipe_buffer.cnt = size; | |
656 | wpipe->pipe_state &= ~PIPE_DIRECTW; | |
657 | ||
658 | pipe_destroy_write_buffer(wpipe); | |
659 | } | |
660 | ||
661 | /* | |
662 | * This implements the pipe buffer write mechanism. Note that only | |
663 | * a direct write OR a normal pipe write can be pending at any given time. | |
664 | * If there are any characters in the pipe buffer, the direct write will | |
665 | * be deferred until the receiving process grabs all of the bytes from | |
666 | * the pipe buffer. Then the direct mapping write is set-up. | |
667 | */ | |
668 | static int | |
669 | pipe_direct_write(wpipe, uio) | |
670 | struct pipe *wpipe; | |
671 | struct uio *uio; | |
672 | { | |
673 | int error; | |
674 | ||
675 | retry: | |
676 | while (wpipe->pipe_state & PIPE_DIRECTW) { | |
677 | if (wpipe->pipe_state & PIPE_WANTR) { | |
678 | wpipe->pipe_state &= ~PIPE_WANTR; | |
679 | wakeup(wpipe); | |
680 | } | |
681 | wpipe->pipe_state |= PIPE_WANTW; | |
682 | error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); | |
683 | if (error) | |
684 | goto error1; | |
685 | if (wpipe->pipe_state & PIPE_EOF) { | |
686 | error = EPIPE; | |
687 | goto error1; | |
688 | } | |
689 | } | |
690 | wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ | |
691 | if (wpipe->pipe_buffer.cnt > 0) { | |
692 | if (wpipe->pipe_state & PIPE_WANTR) { | |
693 | wpipe->pipe_state &= ~PIPE_WANTR; | |
694 | wakeup(wpipe); | |
695 | } | |
696 | ||
697 | wpipe->pipe_state |= PIPE_WANTW; | |
698 | error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); | |
699 | if (error) | |
700 | goto error1; | |
701 | if (wpipe->pipe_state & PIPE_EOF) { | |
702 | error = EPIPE; | |
703 | goto error1; | |
704 | } | |
705 | goto retry; | |
706 | } | |
707 | ||
708 | wpipe->pipe_state |= PIPE_DIRECTW; | |
709 | ||
710 | error = pipe_build_write_buffer(wpipe, uio); | |
711 | if (error) { | |
712 | wpipe->pipe_state &= ~PIPE_DIRECTW; | |
713 | goto error1; | |
714 | } | |
715 | ||
716 | error = 0; | |
717 | while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { | |
718 | if (wpipe->pipe_state & PIPE_EOF) { | |
719 | pipelock(wpipe, 0); | |
720 | pipe_destroy_write_buffer(wpipe); | |
721 | pipeunlock(wpipe); | |
722 | pipeselwakeup(wpipe); | |
723 | error = EPIPE; | |
724 | goto error1; | |
725 | } | |
726 | if (wpipe->pipe_state & PIPE_WANTR) { | |
727 | wpipe->pipe_state &= ~PIPE_WANTR; | |
728 | wakeup(wpipe); | |
729 | } | |
730 | pipeselwakeup(wpipe); | |
731 | error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); | |
732 | } | |
733 | ||
734 | pipelock(wpipe,0); | |
735 | if (wpipe->pipe_state & PIPE_DIRECTW) { | |
736 | /* | |
737 | * this bit of trickery substitutes a kernel buffer for | |
738 | * the process that might be going away. | |
739 | */ | |
740 | pipe_clone_write_buffer(wpipe); | |
741 | } else { | |
742 | pipe_destroy_write_buffer(wpipe); | |
743 | } | |
744 | pipeunlock(wpipe); | |
745 | return (error); | |
746 | ||
747 | error1: | |
748 | wakeup(wpipe); | |
749 | return (error); | |
750 | } | |
751 | #endif | |
752 | ||
753 | static int | |
dadab5e9 MD |
754 | pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, |
755 | int flags, struct thread *td) | |
984263bc MD |
756 | { |
757 | int error = 0; | |
758 | int orig_resid; | |
759 | struct pipe *wpipe, *rpipe; | |
760 | ||
761 | rpipe = (struct pipe *) fp->f_data; | |
762 | wpipe = rpipe->pipe_peer; | |
763 | ||
764 | /* | |
765 | * detect loss of pipe read side, issue SIGPIPE if lost. | |
766 | */ | |
767 | if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { | |
768 | return (EPIPE); | |
769 | } | |
770 | ++wpipe->pipe_busy; | |
771 | ||
772 | /* | |
773 | * If it is advantageous to resize the pipe buffer, do | |
774 | * so. | |
775 | */ | |
776 | if ((uio->uio_resid > PIPE_SIZE) && | |
777 | (nbigpipe < LIMITBIGPIPES) && | |
778 | (wpipe->pipe_state & PIPE_DIRECTW) == 0 && | |
779 | (wpipe->pipe_buffer.size <= PIPE_SIZE) && | |
780 | (wpipe->pipe_buffer.cnt == 0)) { | |
781 | ||
782 | if ((error = pipelock(wpipe,1)) == 0) { | |
783 | if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) | |
784 | nbigpipe++; | |
785 | pipeunlock(wpipe); | |
786 | } | |
787 | } | |
788 | ||
789 | /* | |
790 | * If an early error occured unbusy and return, waking up any pending | |
791 | * readers. | |
792 | */ | |
793 | if (error) { | |
794 | --wpipe->pipe_busy; | |
795 | if ((wpipe->pipe_busy == 0) && | |
796 | (wpipe->pipe_state & PIPE_WANT)) { | |
797 | wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); | |
798 | wakeup(wpipe); | |
799 | } | |
800 | return(error); | |
801 | } | |
802 | ||
803 | KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); | |
804 | ||
805 | orig_resid = uio->uio_resid; | |
806 | ||
807 | while (uio->uio_resid) { | |
808 | int space; | |
809 | ||
810 | #ifndef PIPE_NODIRECT | |
811 | /* | |
812 | * If the transfer is large, we can gain performance if | |
813 | * we do process-to-process copies directly. | |
814 | * If the write is non-blocking, we don't use the | |
815 | * direct write mechanism. | |
816 | * | |
817 | * The direct write mechanism will detect the reader going | |
818 | * away on us. | |
819 | */ | |
820 | if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && | |
821 | (fp->f_flag & FNONBLOCK) == 0 && | |
822 | (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && | |
823 | (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { | |
824 | error = pipe_direct_write( wpipe, uio); | |
825 | if (error) | |
826 | break; | |
827 | continue; | |
828 | } | |
829 | #endif | |
830 | ||
831 | /* | |
832 | * Pipe buffered writes cannot be coincidental with | |
833 | * direct writes. We wait until the currently executing | |
834 | * direct write is completed before we start filling the | |
835 | * pipe buffer. We break out if a signal occurs or the | |
836 | * reader goes away. | |
837 | */ | |
838 | retrywrite: | |
839 | while (wpipe->pipe_state & PIPE_DIRECTW) { | |
840 | if (wpipe->pipe_state & PIPE_WANTR) { | |
841 | wpipe->pipe_state &= ~PIPE_WANTR; | |
842 | wakeup(wpipe); | |
843 | } | |
844 | error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); | |
845 | if (wpipe->pipe_state & PIPE_EOF) | |
846 | break; | |
847 | if (error) | |
848 | break; | |
849 | } | |
850 | if (wpipe->pipe_state & PIPE_EOF) { | |
851 | error = EPIPE; | |
852 | break; | |
853 | } | |
854 | ||
855 | space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; | |
856 | ||
857 | /* Writes of size <= PIPE_BUF must be atomic. */ | |
858 | if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) | |
859 | space = 0; | |
860 | ||
861 | if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { | |
862 | if ((error = pipelock(wpipe,1)) == 0) { | |
863 | int size; /* Transfer size */ | |
864 | int segsize; /* first segment to transfer */ | |
865 | ||
866 | /* | |
867 | * It is possible for a direct write to | |
868 | * slip in on us... handle it here... | |
869 | */ | |
870 | if (wpipe->pipe_state & PIPE_DIRECTW) { | |
871 | pipeunlock(wpipe); | |
872 | goto retrywrite; | |
873 | } | |
874 | /* | |
875 | * If a process blocked in uiomove, our | |
876 | * value for space might be bad. | |
877 | * | |
878 | * XXX will we be ok if the reader has gone | |
879 | * away here? | |
880 | */ | |
881 | if (space > wpipe->pipe_buffer.size - | |
882 | wpipe->pipe_buffer.cnt) { | |
883 | pipeunlock(wpipe); | |
884 | goto retrywrite; | |
885 | } | |
886 | ||
887 | /* | |
888 | * Transfer size is minimum of uio transfer | |
889 | * and free space in pipe buffer. | |
890 | */ | |
891 | if (space > uio->uio_resid) | |
892 | size = uio->uio_resid; | |
893 | else | |
894 | size = space; | |
895 | /* | |
896 | * First segment to transfer is minimum of | |
897 | * transfer size and contiguous space in | |
898 | * pipe buffer. If first segment to transfer | |
899 | * is less than the transfer size, we've got | |
900 | * a wraparound in the buffer. | |
901 | */ | |
902 | segsize = wpipe->pipe_buffer.size - | |
903 | wpipe->pipe_buffer.in; | |
904 | if (segsize > size) | |
905 | segsize = size; | |
906 | ||
907 | /* Transfer first segment */ | |
908 | ||
909 | error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], | |
910 | segsize, uio); | |
911 | ||
912 | if (error == 0 && segsize < size) { | |
913 | /* | |
914 | * Transfer remaining part now, to | |
915 | * support atomic writes. Wraparound | |
916 | * happened. | |
917 | */ | |
918 | if (wpipe->pipe_buffer.in + segsize != | |
919 | wpipe->pipe_buffer.size) | |
920 | panic("Expected pipe buffer wraparound disappeared"); | |
921 | ||
922 | error = uiomove(&wpipe->pipe_buffer.buffer[0], | |
923 | size - segsize, uio); | |
924 | } | |
925 | if (error == 0) { | |
926 | wpipe->pipe_buffer.in += size; | |
927 | if (wpipe->pipe_buffer.in >= | |
928 | wpipe->pipe_buffer.size) { | |
929 | if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) | |
930 | panic("Expected wraparound bad"); | |
931 | wpipe->pipe_buffer.in = size - segsize; | |
932 | } | |
933 | ||
934 | wpipe->pipe_buffer.cnt += size; | |
935 | if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) | |
936 | panic("Pipe buffer overflow"); | |
937 | ||
938 | } | |
939 | pipeunlock(wpipe); | |
940 | } | |
941 | if (error) | |
942 | break; | |
943 | ||
944 | } else { | |
945 | /* | |
946 | * If the "read-side" has been blocked, wake it up now. | |
947 | */ | |
948 | if (wpipe->pipe_state & PIPE_WANTR) { | |
949 | wpipe->pipe_state &= ~PIPE_WANTR; | |
950 | wakeup(wpipe); | |
951 | } | |
952 | ||
953 | /* | |
954 | * don't block on non-blocking I/O | |
955 | */ | |
956 | if (fp->f_flag & FNONBLOCK) { | |
957 | error = EAGAIN; | |
958 | break; | |
959 | } | |
960 | ||
961 | /* | |
962 | * We have no more space and have something to offer, | |
963 | * wake up select/poll. | |
964 | */ | |
965 | pipeselwakeup(wpipe); | |
966 | ||
967 | wpipe->pipe_state |= PIPE_WANTW; | |
968 | error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); | |
969 | if (error != 0) | |
970 | break; | |
971 | /* | |
972 | * If read side wants to go away, we just issue a signal | |
973 | * to ourselves. | |
974 | */ | |
975 | if (wpipe->pipe_state & PIPE_EOF) { | |
976 | error = EPIPE; | |
977 | break; | |
978 | } | |
979 | } | |
980 | } | |
981 | ||
982 | --wpipe->pipe_busy; | |
983 | ||
984 | if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { | |
985 | wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); | |
986 | wakeup(wpipe); | |
987 | } else if (wpipe->pipe_buffer.cnt > 0) { | |
988 | /* | |
989 | * If we have put any characters in the buffer, we wake up | |
990 | * the reader. | |
991 | */ | |
992 | if (wpipe->pipe_state & PIPE_WANTR) { | |
993 | wpipe->pipe_state &= ~PIPE_WANTR; | |
994 | wakeup(wpipe); | |
995 | } | |
996 | } | |
997 | ||
998 | /* | |
999 | * Don't return EPIPE if I/O was successful | |
1000 | */ | |
1001 | if ((wpipe->pipe_buffer.cnt == 0) && | |
1002 | (uio->uio_resid == 0) && | |
1003 | (error == EPIPE)) { | |
1004 | error = 0; | |
1005 | } | |
1006 | ||
1007 | if (error == 0) | |
1008 | vfs_timestamp(&wpipe->pipe_mtime); | |
1009 | ||
1010 | /* | |
1011 | * We have something to offer, | |
1012 | * wake up select/poll. | |
1013 | */ | |
1014 | if (wpipe->pipe_buffer.cnt) | |
1015 | pipeselwakeup(wpipe); | |
1016 | ||
1017 | return (error); | |
1018 | } | |
1019 | ||
1020 | /* | |
1021 | * we implement a very minimal set of ioctls for compatibility with sockets. | |
1022 | */ | |
1023 | int | |
dadab5e9 | 1024 | pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td) |
984263bc MD |
1025 | { |
1026 | struct pipe *mpipe = (struct pipe *)fp->f_data; | |
1027 | ||
1028 | switch (cmd) { | |
1029 | ||
1030 | case FIONBIO: | |
1031 | return (0); | |
1032 | ||
1033 | case FIOASYNC: | |
1034 | if (*(int *)data) { | |
1035 | mpipe->pipe_state |= PIPE_ASYNC; | |
1036 | } else { | |
1037 | mpipe->pipe_state &= ~PIPE_ASYNC; | |
1038 | } | |
1039 | return (0); | |
1040 | ||
1041 | case FIONREAD: | |
1042 | if (mpipe->pipe_state & PIPE_DIRECTW) | |
1043 | *(int *)data = mpipe->pipe_map.cnt; | |
1044 | else | |
1045 | *(int *)data = mpipe->pipe_buffer.cnt; | |
1046 | return (0); | |
1047 | ||
1048 | case FIOSETOWN: | |
1049 | return (fsetown(*(int *)data, &mpipe->pipe_sigio)); | |
1050 | ||
1051 | case FIOGETOWN: | |
1052 | *(int *)data = fgetown(mpipe->pipe_sigio); | |
1053 | return (0); | |
1054 | ||
1055 | /* This is deprecated, FIOSETOWN should be used instead. */ | |
1056 | case TIOCSPGRP: | |
1057 | return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); | |
1058 | ||
1059 | /* This is deprecated, FIOGETOWN should be used instead. */ | |
1060 | case TIOCGPGRP: | |
1061 | *(int *)data = -fgetown(mpipe->pipe_sigio); | |
1062 | return (0); | |
1063 | ||
1064 | } | |
1065 | return (ENOTTY); | |
1066 | } | |
1067 | ||
1068 | int | |
dadab5e9 | 1069 | pipe_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) |
984263bc MD |
1070 | { |
1071 | struct pipe *rpipe = (struct pipe *)fp->f_data; | |
1072 | struct pipe *wpipe; | |
1073 | int revents = 0; | |
1074 | ||
1075 | wpipe = rpipe->pipe_peer; | |
1076 | if (events & (POLLIN | POLLRDNORM)) | |
1077 | if ((rpipe->pipe_state & PIPE_DIRECTW) || | |
1078 | (rpipe->pipe_buffer.cnt > 0) || | |
1079 | (rpipe->pipe_state & PIPE_EOF)) | |
1080 | revents |= events & (POLLIN | POLLRDNORM); | |
1081 | ||
1082 | if (events & (POLLOUT | POLLWRNORM)) | |
1083 | if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || | |
1084 | (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && | |
1085 | (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) | |
1086 | revents |= events & (POLLOUT | POLLWRNORM); | |
1087 | ||
1088 | if ((rpipe->pipe_state & PIPE_EOF) || | |
1089 | (wpipe == NULL) || | |
1090 | (wpipe->pipe_state & PIPE_EOF)) | |
1091 | revents |= POLLHUP; | |
1092 | ||
1093 | if (revents == 0) { | |
1094 | if (events & (POLLIN | POLLRDNORM)) { | |
dadab5e9 | 1095 | selrecord(td, &rpipe->pipe_sel); |
984263bc MD |
1096 | rpipe->pipe_state |= PIPE_SEL; |
1097 | } | |
1098 | ||
1099 | if (events & (POLLOUT | POLLWRNORM)) { | |
dadab5e9 | 1100 | selrecord(td, &wpipe->pipe_sel); |
984263bc MD |
1101 | wpipe->pipe_state |= PIPE_SEL; |
1102 | } | |
1103 | } | |
1104 | ||
1105 | return (revents); | |
1106 | } | |
1107 | ||
1108 | static int | |
dadab5e9 | 1109 | pipe_stat(struct file *fp, struct stat *ub, struct thread *td) |
984263bc MD |
1110 | { |
1111 | struct pipe *pipe = (struct pipe *)fp->f_data; | |
1112 | ||
1113 | bzero((caddr_t)ub, sizeof(*ub)); | |
1114 | ub->st_mode = S_IFIFO; | |
1115 | ub->st_blksize = pipe->pipe_buffer.size; | |
1116 | ub->st_size = pipe->pipe_buffer.cnt; | |
1117 | ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; | |
1118 | ub->st_atimespec = pipe->pipe_atime; | |
1119 | ub->st_mtimespec = pipe->pipe_mtime; | |
1120 | ub->st_ctimespec = pipe->pipe_ctime; | |
1121 | /* | |
1122 | * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, | |
1123 | * st_flags, st_gen. | |
1124 | * XXX (st_dev, st_ino) should be unique. | |
1125 | */ | |
1126 | return (0); | |
1127 | } | |
1128 | ||
1129 | /* ARGSUSED */ | |
1130 | static int | |
dadab5e9 | 1131 | pipe_close(struct file *fp, struct thread *td) |
984263bc MD |
1132 | { |
1133 | struct pipe *cpipe = (struct pipe *)fp->f_data; | |
1134 | ||
1135 | fp->f_ops = &badfileops; | |
1136 | fp->f_data = NULL; | |
1137 | funsetown(cpipe->pipe_sigio); | |
1138 | pipeclose(cpipe); | |
1139 | return (0); | |
1140 | } | |
1141 | ||
1142 | static void | |
dadab5e9 | 1143 | pipe_free_kmem(struct pipe *cpipe) |
984263bc MD |
1144 | { |
1145 | ||
1146 | if (cpipe->pipe_buffer.buffer != NULL) { | |
1147 | if (cpipe->pipe_buffer.size > PIPE_SIZE) | |
1148 | --nbigpipe; | |
1149 | amountpipekva -= cpipe->pipe_buffer.size; | |
1150 | kmem_free(kernel_map, | |
1151 | (vm_offset_t)cpipe->pipe_buffer.buffer, | |
1152 | cpipe->pipe_buffer.size); | |
1153 | cpipe->pipe_buffer.buffer = NULL; | |
1154 | } | |
1155 | #ifndef PIPE_NODIRECT | |
1156 | if (cpipe->pipe_map.kva != NULL) { | |
1157 | amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; | |
1158 | kmem_free(kernel_map, | |
1159 | cpipe->pipe_map.kva, | |
1160 | cpipe->pipe_buffer.size + PAGE_SIZE); | |
1161 | cpipe->pipe_map.cnt = 0; | |
1162 | cpipe->pipe_map.kva = 0; | |
1163 | cpipe->pipe_map.pos = 0; | |
1164 | cpipe->pipe_map.npages = 0; | |
1165 | } | |
1166 | #endif | |
1167 | } | |
1168 | ||
1169 | /* | |
1170 | * shutdown the pipe | |
1171 | */ | |
1172 | static void | |
dadab5e9 | 1173 | pipeclose(struct pipe *cpipe) |
984263bc MD |
1174 | { |
1175 | struct pipe *ppipe; | |
1176 | ||
1177 | if (cpipe) { | |
1178 | ||
1179 | pipeselwakeup(cpipe); | |
1180 | ||
1181 | /* | |
1182 | * If the other side is blocked, wake it up saying that | |
1183 | * we want to close it down. | |
1184 | */ | |
1185 | while (cpipe->pipe_busy) { | |
1186 | wakeup(cpipe); | |
1187 | cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; | |
1188 | tsleep(cpipe, PRIBIO, "pipecl", 0); | |
1189 | } | |
1190 | ||
1191 | /* | |
1192 | * Disconnect from peer | |
1193 | */ | |
1194 | if ((ppipe = cpipe->pipe_peer) != NULL) { | |
1195 | pipeselwakeup(ppipe); | |
1196 | ||
1197 | ppipe->pipe_state |= PIPE_EOF; | |
1198 | wakeup(ppipe); | |
1199 | KNOTE(&ppipe->pipe_sel.si_note, 0); | |
1200 | ppipe->pipe_peer = NULL; | |
1201 | } | |
1202 | /* | |
1203 | * free resources | |
1204 | */ | |
1205 | pipe_free_kmem(cpipe); | |
1206 | zfree(pipe_zone, cpipe); | |
1207 | } | |
1208 | } | |
1209 | ||
1210 | /*ARGSUSED*/ | |
1211 | static int | |
1212 | pipe_kqfilter(struct file *fp, struct knote *kn) | |
1213 | { | |
1214 | struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; | |
1215 | ||
1216 | switch (kn->kn_filter) { | |
1217 | case EVFILT_READ: | |
1218 | kn->kn_fop = &pipe_rfiltops; | |
1219 | break; | |
1220 | case EVFILT_WRITE: | |
1221 | kn->kn_fop = &pipe_wfiltops; | |
1222 | cpipe = cpipe->pipe_peer; | |
1223 | if (cpipe == NULL) | |
1224 | /* other end of pipe has been closed */ | |
1225 | return (EBADF); | |
1226 | break; | |
1227 | default: | |
1228 | return (1); | |
1229 | } | |
1230 | kn->kn_hook = (caddr_t)cpipe; | |
1231 | ||
1232 | SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); | |
1233 | return (0); | |
1234 | } | |
1235 | ||
1236 | static void | |
1237 | filt_pipedetach(struct knote *kn) | |
1238 | { | |
1239 | struct pipe *cpipe = (struct pipe *)kn->kn_hook; | |
1240 | ||
1241 | SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); | |
1242 | } | |
1243 | ||
1244 | /*ARGSUSED*/ | |
1245 | static int | |
1246 | filt_piperead(struct knote *kn, long hint) | |
1247 | { | |
1248 | struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; | |
1249 | struct pipe *wpipe = rpipe->pipe_peer; | |
1250 | ||
1251 | kn->kn_data = rpipe->pipe_buffer.cnt; | |
1252 | if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) | |
1253 | kn->kn_data = rpipe->pipe_map.cnt; | |
1254 | ||
1255 | if ((rpipe->pipe_state & PIPE_EOF) || | |
1256 | (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { | |
1257 | kn->kn_flags |= EV_EOF; | |
1258 | return (1); | |
1259 | } | |
1260 | return (kn->kn_data > 0); | |
1261 | } | |
1262 | ||
1263 | /*ARGSUSED*/ | |
1264 | static int | |
1265 | filt_pipewrite(struct knote *kn, long hint) | |
1266 | { | |
1267 | struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; | |
1268 | struct pipe *wpipe = rpipe->pipe_peer; | |
1269 | ||
1270 | if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { | |
1271 | kn->kn_data = 0; | |
1272 | kn->kn_flags |= EV_EOF; | |
1273 | return (1); | |
1274 | } | |
1275 | kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; | |
1276 | if (wpipe->pipe_state & PIPE_DIRECTW) | |
1277 | kn->kn_data = 0; | |
1278 | ||
1279 | return (kn->kn_data >= PIPE_BUF); | |
1280 | } |