Merge branch 'master' of ssh://crater.dragonflybsd.org/repository/git/dragonfly
[dragonfly.git] / sys / kern / sys_pipe.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 *
19 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $
c730be20 20 * $DragonFly: src/sys/kern/sys_pipe.c,v 1.50 2008/09/09 04:06:13 dillon Exp $
984263bc
MD
21 */
22
23/*
24 * This file contains a high-performance replacement for the socket-based
25 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
26 * all features of sockets, but does do everything that pipes normally
27 * do.
28 */
984263bc
MD
29#include <sys/param.h>
30#include <sys/systm.h>
fc7d5181 31#include <sys/kernel.h>
984263bc
MD
32#include <sys/proc.h>
33#include <sys/fcntl.h>
34#include <sys/file.h>
35#include <sys/filedesc.h>
36#include <sys/filio.h>
37#include <sys/ttycom.h>
38#include <sys/stat.h>
39#include <sys/poll.h>
40#include <sys/select.h>
41#include <sys/signalvar.h>
42#include <sys/sysproto.h>
43#include <sys/pipe.h>
44#include <sys/vnode.h>
45#include <sys/uio.h>
46#include <sys/event.h>
fc7d5181
MD
47#include <sys/globaldata.h>
48#include <sys/module.h>
49#include <sys/malloc.h>
50#include <sys/sysctl.h>
004d2de5 51#include <sys/socket.h>
984263bc
MD
52
53#include <vm/vm.h>
54#include <vm/vm_param.h>
55#include <sys/lock.h>
56#include <vm/vm_object.h>
57#include <vm/vm_kern.h>
58#include <vm/vm_extern.h>
59#include <vm/pmap.h>
60#include <vm/vm_map.h>
61#include <vm/vm_page.h>
62#include <vm/vm_zone.h>
63
dadab5e9
MD
64#include <sys/file2.h>
65
8100156a 66#include <machine/cpufunc.h>
984263bc
MD
67
68/*
69 * interfaces to the outside world
70 */
402ed7e1 71static int pipe_read (struct file *fp, struct uio *uio,
87de5057 72 struct ucred *cred, int flags);
402ed7e1 73static int pipe_write (struct file *fp, struct uio *uio,
87de5057
MD
74 struct ucred *cred, int flags);
75static int pipe_close (struct file *fp);
76static int pipe_shutdown (struct file *fp, int how);
77static int pipe_poll (struct file *fp, int events, struct ucred *cred);
402ed7e1 78static int pipe_kqfilter (struct file *fp, struct knote *kn);
87de5057
MD
79static int pipe_stat (struct file *fp, struct stat *sb, struct ucred *cred);
80static int pipe_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred);
984263bc
MD
81
82static struct fileops pipeops = {
b2d248cb
MD
83 .fo_read = pipe_read,
84 .fo_write = pipe_write,
85 .fo_ioctl = pipe_ioctl,
86 .fo_poll = pipe_poll,
87 .fo_kqfilter = pipe_kqfilter,
88 .fo_stat = pipe_stat,
89 .fo_close = pipe_close,
90 .fo_shutdown = pipe_shutdown
984263bc
MD
91};
92
93static void filt_pipedetach(struct knote *kn);
94static int filt_piperead(struct knote *kn, long hint);
95static int filt_pipewrite(struct knote *kn, long hint);
96
97static struct filterops pipe_rfiltops =
98 { 1, NULL, filt_pipedetach, filt_piperead };
99static struct filterops pipe_wfiltops =
100 { 1, NULL, filt_pipedetach, filt_pipewrite };
101
fc7d5181 102MALLOC_DEFINE(M_PIPE, "pipe", "pipe structures");
984263bc
MD
103
104/*
105 * Default pipe buffer size(s), this can be kind-of large now because pipe
106 * space is pageable. The pipe code will try to maintain locality of
107 * reference for performance reasons, so small amounts of outstanding I/O
108 * will not wipe the cache.
109 */
110#define MINPIPESIZE (PIPE_SIZE/3)
111#define MAXPIPESIZE (2*PIPE_SIZE/3)
112
984263bc
MD
113/*
114 * Limit the number of "big" pipes
115 */
08593aa1 116#define LIMITBIGPIPES 64
fc7d5181
MD
117#define PIPEQ_MAX_CACHE 16 /* per-cpu pipe structure cache */
118
119static int pipe_maxbig = LIMITBIGPIPES;
120static int pipe_maxcache = PIPEQ_MAX_CACHE;
39b0a1af 121static int pipe_bigcount;
fc7d5181 122static int pipe_nbig;
fc7d5181
MD
123static int pipe_bcache_alloc;
124static int pipe_bkmem_alloc;
fc7d5181
MD
125
126SYSCTL_NODE(_kern, OID_AUTO, pipe, CTLFLAG_RW, 0, "Pipe operation");
127SYSCTL_INT(_kern_pipe, OID_AUTO, nbig,
128 CTLFLAG_RD, &pipe_nbig, 0, "numer of big pipes allocated");
39b0a1af
MD
129SYSCTL_INT(_kern_pipe, OID_AUTO, bigcount,
130 CTLFLAG_RW, &pipe_bigcount, 0, "number of times pipe expanded");
fc7d5181
MD
131SYSCTL_INT(_kern_pipe, OID_AUTO, maxcache,
132 CTLFLAG_RW, &pipe_maxcache, 0, "max pipes cached per-cpu");
133SYSCTL_INT(_kern_pipe, OID_AUTO, maxbig,
134 CTLFLAG_RW, &pipe_maxbig, 0, "max number of big pipes");
1ae37239
MD
135#ifdef SMP
136static int pipe_mpsafe = 0;
137SYSCTL_INT(_kern_pipe, OID_AUTO, mpsafe,
138 CTLFLAG_RW, &pipe_mpsafe, 0, "");
139#endif
fc7d5181
MD
140#if !defined(NO_PIPE_SYSCTL_STATS)
141SYSCTL_INT(_kern_pipe, OID_AUTO, bcache_alloc,
142 CTLFLAG_RW, &pipe_bcache_alloc, 0, "pipe buffer from pcpu cache");
fc7d5181
MD
143SYSCTL_INT(_kern_pipe, OID_AUTO, bkmem_alloc,
144 CTLFLAG_RW, &pipe_bkmem_alloc, 0, "pipe buffer from kmem");
fc7d5181 145#endif
984263bc 146
402ed7e1
RG
147static void pipeclose (struct pipe *cpipe);
148static void pipe_free_kmem (struct pipe *cpipe);
149static int pipe_create (struct pipe **cpipep);
402ed7e1 150static __inline void pipeselwakeup (struct pipe *cpipe);
402ed7e1 151static int pipespace (struct pipe *cpipe, int size);
984263bc 152
1ae37239
MD
153static __inline void
154pipeselwakeup(struct pipe *cpipe)
155{
156 if (cpipe->pipe_state & PIPE_SEL) {
157 get_mplock();
158 cpipe->pipe_state &= ~PIPE_SEL;
159 selwakeup(&cpipe->pipe_sel);
160 rel_mplock();
161 }
162 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) {
163 get_mplock();
164 pgsigio(cpipe->pipe_sigio, SIGIO, 0);
165 rel_mplock();
166 }
167 if (SLIST_FIRST(&cpipe->pipe_sel.si_note)) {
168 get_mplock();
169 KNOTE(&cpipe->pipe_sel.si_note, 0);
170 rel_mplock();
171 }
172}
173
174/*
175 * These routines are called before and after a UIO. The UIO
176 * may block, causing our held tokens to be lost temporarily.
177 *
178 * We use these routines to serialize reads against other reads
179 * and writes against other writes.
180 *
181 * The read token is held on entry so *ipp does not race.
182 */
183static __inline int
184pipe_start_uio(struct pipe *cpipe, u_int *ipp)
185{
186 int error;
187
188 while (*ipp) {
189 *ipp = -1;
190 error = tsleep(ipp, PCATCH, "pipexx", 0);
191 if (error)
192 return (error);
193 }
194 *ipp = 1;
195 return (0);
196}
197
198static __inline void
199pipe_end_uio(struct pipe *cpipe, u_int *ipp)
200{
201 if (*ipp < 0) {
202 *ipp = 0;
203 wakeup(ipp);
204 } else {
205 *ipp = 0;
206 }
207}
208
209static __inline void
210pipe_get_mplock(int *save)
211{
212#ifdef SMP
213 if (pipe_mpsafe == 0) {
214 get_mplock();
215 *save = 1;
216 } else
217#endif
218 {
219 *save = 0;
220 }
221}
222
223static __inline void
224pipe_rel_mplock(int *save)
225{
226#ifdef SMP
227 if (*save)
228 rel_mplock();
229#endif
230}
231
232
984263bc
MD
233/*
234 * The pipe system call for the DTYPE_PIPE type of pipes
41c20dac
MD
235 *
236 * pipe_ARgs(int dummy)
984263bc
MD
237 */
238
239/* ARGSUSED */
240int
753fd850 241sys_pipe(struct pipe_args *uap)
984263bc 242{
dadab5e9
MD
243 struct thread *td = curthread;
244 struct proc *p = td->td_proc;
984263bc
MD
245 struct file *rf, *wf;
246 struct pipe *rpipe, *wpipe;
90b9818c 247 int fd1, fd2, error;
984263bc 248
dadab5e9 249 KKASSERT(p);
dadab5e9 250
984263bc
MD
251 rpipe = wpipe = NULL;
252 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
253 pipeclose(rpipe);
254 pipeclose(wpipe);
255 return (ENFILE);
256 }
257
90b9818c 258 error = falloc(p, &rf, &fd1);
984263bc
MD
259 if (error) {
260 pipeclose(rpipe);
261 pipeclose(wpipe);
262 return (error);
263 }
c7114eea 264 uap->sysmsg_fds[0] = fd1;
984263bc
MD
265
266 /*
267 * Warning: once we've gotten past allocation of the fd for the
268 * read-side, we can only drop the read side via fdrop() in order
269 * to avoid races against processes which manage to dup() the read
270 * side while we are blocked trying to allocate the write side.
271 */
984263bc 272 rf->f_type = DTYPE_PIPE;
fbb4eeab 273 rf->f_flag = FREAD | FWRITE;
984263bc 274 rf->f_ops = &pipeops;
fbb4eeab 275 rf->f_data = rpipe;
90b9818c 276 error = falloc(p, &wf, &fd2);
984263bc 277 if (error) {
259b8ea0 278 fsetfd(p, NULL, fd1);
9f87144f 279 fdrop(rf);
984263bc
MD
280 /* rpipe has been closed by fdrop(). */
281 pipeclose(wpipe);
282 return (error);
283 }
984263bc 284 wf->f_type = DTYPE_PIPE;
fbb4eeab 285 wf->f_flag = FREAD | FWRITE;
984263bc 286 wf->f_ops = &pipeops;
fbb4eeab 287 wf->f_data = wpipe;
c7114eea 288 uap->sysmsg_fds[1] = fd2;
984263bc 289
1ae37239
MD
290 rpipe->pipe_slock = kmalloc(sizeof(struct lock),
291 M_PIPE, M_WAITOK|M_ZERO);
292 wpipe->pipe_slock = rpipe->pipe_slock;
984263bc
MD
293 rpipe->pipe_peer = wpipe;
294 wpipe->pipe_peer = rpipe;
1ae37239 295 lockinit(rpipe->pipe_slock, "pipecl", 0, 0);
259b8ea0 296
1ae37239
MD
297 /*
298 * Once activated the peer relationship remains valid until
299 * both sides are closed.
300 */
259b8ea0
MD
301 fsetfd(p, rf, fd1);
302 fsetfd(p, wf, fd2);
9f87144f
MD
303 fdrop(rf);
304 fdrop(wf);
984263bc
MD
305
306 return (0);
307}
308
309/*
310 * Allocate kva for pipe circular buffer, the space is pageable
311 * This routine will 'realloc' the size of a pipe safely, if it fails
312 * it will retain the old buffer.
313 * If it fails it will return ENOMEM.
314 */
315static int
fc7d5181 316pipespace(struct pipe *cpipe, int size)
984263bc
MD
317{
318 struct vm_object *object;
319 caddr_t buffer;
320 int npages, error;
321
fc7d5181
MD
322 npages = round_page(size) / PAGE_SIZE;
323 object = cpipe->pipe_buffer.object;
984263bc
MD
324
325 /*
fc7d5181
MD
326 * [re]create the object if necessary and reserve space for it
327 * in the kernel_map. The object and memory are pageable. On
328 * success, free the old resources before assigning the new
329 * ones.
984263bc 330 */
fc7d5181 331 if (object == NULL || object->size != npages) {
1ae37239 332 get_mplock();
fc7d5181 333 object = vm_object_allocate(OBJT_DEFAULT, npages);
e4846942 334 buffer = (caddr_t)vm_map_min(&kernel_map);
984263bc 335
e4846942 336 error = vm_map_find(&kernel_map, object, 0,
1b874851
MD
337 (vm_offset_t *)&buffer, size,
338 1,
339 VM_MAPTYPE_NORMAL,
340 VM_PROT_ALL, VM_PROT_ALL,
341 0);
984263bc 342
fc7d5181
MD
343 if (error != KERN_SUCCESS) {
344 vm_object_deallocate(object);
1ae37239 345 rel_mplock();
fc7d5181
MD
346 return (ENOMEM);
347 }
fc7d5181 348 pipe_free_kmem(cpipe);
1ae37239 349 rel_mplock();
fc7d5181
MD
350 cpipe->pipe_buffer.object = object;
351 cpipe->pipe_buffer.buffer = buffer;
352 cpipe->pipe_buffer.size = size;
353 ++pipe_bkmem_alloc;
354 } else {
355 ++pipe_bcache_alloc;
fc7d5181 356 }
c600838f
MD
357 cpipe->pipe_buffer.rindex = 0;
358 cpipe->pipe_buffer.windex = 0;
984263bc
MD
359 return (0);
360}
361
362/*
fc7d5181
MD
363 * Initialize and allocate VM and memory for pipe, pulling the pipe from
364 * our per-cpu cache if possible. For now make sure it is sized for the
365 * smaller PIPE_SIZE default.
984263bc
MD
366 */
367static int
c972a82f 368pipe_create(struct pipe **cpipep)
984263bc 369{
fc7d5181 370 globaldata_t gd = mycpu;
984263bc
MD
371 struct pipe *cpipe;
372 int error;
373
fc7d5181
MD
374 if ((cpipe = gd->gd_pipeq) != NULL) {
375 gd->gd_pipeq = cpipe->pipe_peer;
376 --gd->gd_pipeqcount;
377 cpipe->pipe_peer = NULL;
1ae37239 378 cpipe->pipe_wantwcnt = 0;
fc7d5181 379 } else {
efda3bd0 380 cpipe = kmalloc(sizeof(struct pipe), M_PIPE, M_WAITOK|M_ZERO);
fc7d5181
MD
381 }
382 *cpipep = cpipe;
383 if ((error = pipespace(cpipe, PIPE_SIZE)) != 0)
984263bc 384 return (error);
984263bc
MD
385 vfs_timestamp(&cpipe->pipe_ctime);
386 cpipe->pipe_atime = cpipe->pipe_ctime;
387 cpipe->pipe_mtime = cpipe->pipe_ctime;
1ae37239
MD
388 lwkt_token_init(&cpipe->pipe_rlock);
389 lwkt_token_init(&cpipe->pipe_wlock);
984263bc
MD
390 return (0);
391}
392
d9b2033e
MD
393/*
394 * MPALMOSTSAFE (acquires mplock)
395 */
984263bc 396static int
9ba76b73 397pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags)
984263bc 398{
d9b2033e 399 struct pipe *rpipe;
984263bc
MD
400 int error;
401 int nread = 0;
9ba76b73 402 int nbio;
c600838f 403 u_int size; /* total bytes available */
1ae37239 404 u_int nsize; /* total bytes to read */
c600838f 405 u_int rindex; /* contiguous bytes available */
1ae37239
MD
406 u_int half_way;
407 lwkt_tokref rlock;
408 lwkt_tokref wlock;
409 int mpsave;
984263bc 410
1ae37239
MD
411 /*
412 * Degenerate case
413 */
414 if (uio->uio_resid == 0)
415 return(0);
416
417 /*
418 * Setup locks, calculate nbio
419 */
420 pipe_get_mplock(&mpsave);
421 rpipe = (struct pipe *)fp->f_data;
422 lwkt_gettoken(&rlock, &rpipe->pipe_rlock);
984263bc 423
9ba76b73
MD
424 if (fflags & O_FBLOCKING)
425 nbio = 0;
426 else if (fflags & O_FNONBLOCKING)
427 nbio = 1;
428 else if (fp->f_flag & O_NONBLOCK)
429 nbio = 1;
430 else
431 nbio = 0;
432
1ae37239
MD
433 /*
434 * Reads are serialized. Note howeverthat pipe_buffer.buffer and
435 * pipe_buffer.size can change out from under us when the number
436 * of bytes in the buffer are zero due to the write-side doing a
437 * pipespace().
438 */
439 error = pipe_start_uio(rpipe, &rpipe->pipe_rip);
440 if (error) {
441 pipe_rel_mplock(&mpsave);
442 lwkt_reltoken(&rlock);
443 return (error);
444 }
984263bc 445 while (uio->uio_resid) {
c600838f 446 size = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
1ae37239 447 cpu_lfence();
c600838f
MD
448 if (size) {
449 rindex = rpipe->pipe_buffer.rindex &
450 (rpipe->pipe_buffer.size - 1);
1ae37239
MD
451 nsize = size;
452 if (nsize > rpipe->pipe_buffer.size - rindex)
453 nsize = rpipe->pipe_buffer.size - rindex;
454 if (nsize > (u_int)uio->uio_resid)
455 nsize = (u_int)uio->uio_resid;
c600838f
MD
456
457 error = uiomove(&rpipe->pipe_buffer.buffer[rindex],
1ae37239 458 nsize, uio);
984263bc
MD
459 if (error)
460 break;
1ae37239
MD
461 cpu_mfence();
462 rpipe->pipe_buffer.rindex += nsize;
463 nread += nsize;
984263bc
MD
464
465 /*
1ae37239
MD
466 * Shortcut to the loop-up if there is no writer
467 * waiting or if we have not transitioned across
468 * the half-way point.
984263bc 469 */
1ae37239
MD
470 half_way = rpipe->pipe_buffer.size >> 1;
471 if ((rpipe->pipe_state & PIPE_WANTW) == 0 ||
472 size <= half_way || size - nsize > half_way) {
473 } {
474 continue;
984263bc 475 }
1ae37239 476 }
984263bc 477
1ae37239
MD
478 /*
479 * If the "write-side" was blocked we wake it up. This code
480 * is reached either when the buffer is completely emptied
481 * or if it becomes more then half-empty.
482 *
483 * Pipe_state can only be modified if both the rlock and
484 * wlock are held.
485 */
486 if (rpipe->pipe_state & PIPE_WANTW) {
487 lwkt_gettoken(&wlock, &rpipe->pipe_wlock);
984263bc
MD
488 if (rpipe->pipe_state & PIPE_WANTW) {
489 rpipe->pipe_state &= ~PIPE_WANTW;
1ae37239 490 lwkt_reltoken(&wlock);
984263bc 491 wakeup(rpipe);
1ae37239
MD
492 } else {
493 lwkt_reltoken(&wlock);
984263bc 494 }
1ae37239 495 }
984263bc 496
1ae37239
MD
497 /*
498 * Pick up our copy loop again if the writer sent data to
499 * us.
500 */
501 size = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
502 if (size)
503 continue;
984263bc 504
1ae37239
MD
505 /*
506 * Detect EOF condition, do not set error.
507 */
508 if (rpipe->pipe_state & PIPE_REOF)
509 break;
984263bc 510
1ae37239
MD
511#ifdef SMP
512 /*
513 * Gravy train if SMP box. This saves a ton of IPIs and
514 * allows two cpus to operate in lockstep.
515 *
516 * XXX check pipe_wip also?
517 */
518 DELAY(1);
519 size = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
520 if (size)
521 continue;
522#endif
523
524 /*
525 * Break if some data was read, or if this was a non-blocking
526 * read.
527 */
528 if (nread > 0)
529 break;
530
531 if (nbio) {
532 error = EAGAIN;
533 break;
534 }
535
536 /*
537 * Last chance, interlock with WANTR.
538 */
539 lwkt_gettoken(&wlock, &rpipe->pipe_wlock);
540 size = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
541 if (size) {
542 lwkt_reltoken(&wlock);
543 continue;
984263bc 544 }
1ae37239
MD
545
546 /*
547 * If there is no more to read in the pipe, reset its
548 * pointers to the beginning. This improves cache hit
549 * stats.
550 *
551 * We need both locks to modify both pointers, and there
552 * must also not be a write in progress or the uiomove()
553 * in the write might block and temporarily release
554 * its wlock, then reacquire and update windex. We are
555 * only serialized against reads, not writes.
556 *
557 * XXX should we even bother resetting the indices? It
558 * might actually be more cache efficient not to.
559 */
560 if (rpipe->pipe_buffer.rindex == rpipe->pipe_buffer.windex &&
561 rpipe->pipe_wip == 0) {
562 rpipe->pipe_buffer.rindex = 0;
563 rpipe->pipe_buffer.windex = 0;
564 }
565
566 /*
567 * Wait for more data.
568 *
569 * Pipe_state can only be set if both the rlock and wlock
570 * are held.
571 */
572 rpipe->pipe_state |= PIPE_WANTR;
573 tsleep_interlock(rpipe);
574 lwkt_reltoken(&wlock);
575 error = tsleep(rpipe, PCATCH, "piperd", 0);
576 if (error)
577 break;
984263bc 578 }
1ae37239 579 pipe_end_uio(rpipe, &rpipe->pipe_rip);
984263bc 580
1ae37239
MD
581 /*
582 * Uptime last access time
583 */
584 if (error == 0 && nread)
984263bc 585 vfs_timestamp(&rpipe->pipe_atime);
984263bc 586
1ae37239 587#if 0
984263bc 588 /*
1ae37239
MD
589 * Handle write blocking hysteresis. size can only increase while
590 * we hold the rlock.
591 *
592 * XXX shouldn't need this any more. We will wakeup the writer
593 * when we've drained past half-way. The worst the writer
594 * can do is fill the buffer up, not make it smaller, so
595 * we are guaranteed our half-way test.
984263bc 596 */
1ae37239
MD
597 if (rpipe->pipe_state & PIPE_WANTW) {
598 size = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
599 if (size <= (rpipe->pipe_buffer.size >> 1)) {
600 lwkt_gettoken(&wlock, &rpipe->pipe_wlock);
601 if (rpipe->pipe_state & PIPE_WANTW) {
602 rpipe->pipe_state &= ~PIPE_WANTW;
603 lwkt_reltoken(&wlock);
604 wakeup(rpipe);
605 } else {
606 lwkt_reltoken(&wlock);
607 }
984263bc
MD
608 }
609 }
1ae37239
MD
610#endif
611 size = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
612 lwkt_reltoken(&rlock);
984263bc 613
1ae37239
MD
614 /*
615 * If enough space is available in buffer then wakeup sel writers?
616 */
c600838f 617 if ((rpipe->pipe_buffer.size - size) >= PIPE_BUF)
984263bc 618 pipeselwakeup(rpipe);
1ae37239 619 pipe_rel_mplock(&mpsave);
984263bc
MD
620 return (error);
621}
622
d9b2033e
MD
623/*
624 * MPALMOSTSAFE - acquires mplock
625 */
984263bc 626static int
9ba76b73 627pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags)
984263bc 628{
1ae37239 629 int error;
984263bc 630 int orig_resid;
9ba76b73 631 int nbio;
984263bc 632 struct pipe *wpipe, *rpipe;
1ae37239
MD
633 lwkt_tokref rlock;
634 lwkt_tokref wlock;
c600838f
MD
635 u_int windex;
636 u_int space;
1ae37239
MD
637 u_int wcount;
638 int mpsave;
984263bc 639
1ae37239
MD
640 pipe_get_mplock(&mpsave);
641
642 /*
643 * Writes go to the peer. The peer will always exist.
644 */
984263bc
MD
645 rpipe = (struct pipe *) fp->f_data;
646 wpipe = rpipe->pipe_peer;
1ae37239
MD
647 lwkt_gettoken(&wlock, &wpipe->pipe_wlock);
648 if (wpipe->pipe_state & PIPE_WEOF) {
649 pipe_rel_mplock(&mpsave);
650 lwkt_reltoken(&wlock);
651 return (EPIPE);
652 }
984263bc
MD
653
654 /*
1ae37239 655 * Degenerate case (EPIPE takes prec)
984263bc 656 */
1ae37239
MD
657 if (uio->uio_resid == 0) {
658 pipe_rel_mplock(&mpsave);
659 lwkt_reltoken(&wlock);
660 return(0);
661 }
662
663 /*
664 * Writes are serialized (start_uio must be called with wlock)
665 */
666 error = pipe_start_uio(wpipe, &wpipe->pipe_wip);
667 if (error) {
668 pipe_rel_mplock(&mpsave);
669 lwkt_reltoken(&wlock);
670 return (error);
984263bc 671 }
984263bc 672
9ba76b73
MD
673 if (fflags & O_FBLOCKING)
674 nbio = 0;
675 else if (fflags & O_FNONBLOCKING)
676 nbio = 1;
677 else if (fp->f_flag & O_NONBLOCK)
678 nbio = 1;
679 else
680 nbio = 0;
681
984263bc
MD
682 /*
683 * If it is advantageous to resize the pipe buffer, do
1ae37239 684 * so. We are write-serialized so we can block safely.
984263bc 685 */
1ae37239 686 if ((wpipe->pipe_buffer.size <= PIPE_SIZE) &&
39b0a1af 687 (pipe_nbig < pipe_maxbig) &&
1ae37239
MD
688 wpipe->pipe_wantwcnt > 4 &&
689 (wpipe->pipe_buffer.rindex == wpipe->pipe_buffer.windex)) {
39b0a1af
MD
690 /*
691 * Recheck after lock.
692 */
1ae37239
MD
693 lwkt_gettoken(&rlock, &wpipe->pipe_rlock);
694 if ((wpipe->pipe_buffer.size <= PIPE_SIZE) &&
695 (pipe_nbig < pipe_maxbig) &&
c600838f 696 (wpipe->pipe_buffer.rindex == wpipe->pipe_buffer.windex)) {
39b0a1af
MD
697 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) {
698 ++pipe_bigcount;
fc7d5181 699 pipe_nbig++;
39b0a1af 700 }
984263bc 701 }
1ae37239 702 lwkt_reltoken(&rlock);
984263bc 703 }
984263bc
MD
704
705 orig_resid = uio->uio_resid;
1ae37239 706 wcount = 0;
984263bc
MD
707
708 while (uio->uio_resid) {
1ae37239 709 if (wpipe->pipe_state & PIPE_WEOF) {
984263bc
MD
710 error = EPIPE;
711 break;
712 }
713
c600838f
MD
714 windex = wpipe->pipe_buffer.windex &
715 (wpipe->pipe_buffer.size - 1);
716 space = wpipe->pipe_buffer.size -
717 (wpipe->pipe_buffer.windex - wpipe->pipe_buffer.rindex);
1ae37239 718 cpu_lfence();
984263bc
MD
719
720 /* Writes of size <= PIPE_BUF must be atomic. */
721 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
722 space = 0;
723
c617bada
MD
724 /*
725 * Write to fill, read size handles write hysteresis. Also
726 * additional restrictions can cause select-based non-blocking
727 * writes to spin.
728 */
729 if (space > 0) {
1ae37239 730 u_int segsize;
984263bc 731
984263bc 732 /*
1ae37239
MD
733 * Transfer size is minimum of uio transfer
734 * and free space in pipe buffer.
735 *
736 * Limit each uiocopy to no more then PIPE_SIZE
737 * so we can keep the gravy train going on a
738 * SMP box. This doubles the performance for
739 * write sizes > 16K. Otherwise large writes
740 * wind up doing an inefficient synchronous
741 * ping-pong.
984263bc 742 */
1ae37239
MD
743 if (space > (u_int)uio->uio_resid)
744 space = (u_int)uio->uio_resid;
745 if (space > PIPE_SIZE)
746 space = PIPE_SIZE;
984263bc
MD
747
748 /*
1ae37239
MD
749 * First segment to transfer is minimum of
750 * transfer size and contiguous space in
751 * pipe buffer. If first segment to transfer
752 * is less than the transfer size, we've got
753 * a wraparound in the buffer.
984263bc 754 */
1ae37239
MD
755 segsize = wpipe->pipe_buffer.size - windex;
756 if (segsize > space)
757 segsize = space;
984263bc
MD
758
759 /*
1ae37239
MD
760 * If this is the first loop and the reader is
761 * blocked, do a preemptive wakeup of the reader.
762 *
763 * This works for both SMP and UP. On SMP the IPI
764 * latency plus the wlock interlock on the reader
765 * side is the fastest way to get the reader going.
766 * (The scheduler will hard loop on lock tokens).
767 *
768 * NOTE: We can't clear WANTR here without acquiring
769 * the rlock, which we don't want to do here!
984263bc 770 */
1ae37239
MD
771 if (wpipe->pipe_state & PIPE_WANTR)
772 wakeup(wpipe);
984263bc 773
984263bc 774 /*
1ae37239 775 * Transfer first segment
984263bc 776 */
1ae37239
MD
777 error = uiomove(&wpipe->pipe_buffer.buffer[windex],
778 segsize, uio);
779 cpu_mfence();
780 wpipe->pipe_buffer.windex += segsize;
781
782 if (error == 0 && segsize < space) {
783 /*
784 * Transfer remaining part now, to
785 * support atomic writes. Wraparound
786 * happened.
787 */
788 segsize = space - segsize;
789 error = uiomove(&wpipe->pipe_buffer.buffer[0],
790 segsize, uio);
791 cpu_mfence();
792 wpipe->pipe_buffer.windex += segsize;
793 }
794 if (error)
984263bc 795 break;
1ae37239
MD
796 wcount += space;
797 continue;
984263bc 798 }
984263bc 799
1ae37239
MD
800 /*
801 * We need both the rlock and the wlock to interlock against
802 * the EOF, WANTW, and size checks, and to modify pipe_state.
803 *
804 * These are token locks so we do not have to worry about
805 * deadlocks.
806 */
807 lwkt_gettoken(&rlock, &wpipe->pipe_rlock);
984263bc 808
984263bc 809 /*
1ae37239
MD
810 * If the "read-side" has been blocked, wake it up now
811 * and yield to let it drain synchronously rather
812 * then block.
984263bc
MD
813 */
814 if (wpipe->pipe_state & PIPE_WANTR) {
815 wpipe->pipe_state &= ~PIPE_WANTR;
816 wakeup(wpipe);
817 }
1ae37239
MD
818
819 /*
820 * don't block on non-blocking I/O
821 */
822 if (nbio) {
823 lwkt_reltoken(&rlock);
824 error = EAGAIN;
825 break;
826 }
827
828 /*
829 * We have no more space and have something to offer,
830 * wake up select/poll.
831 */
832 pipeselwakeup(wpipe);
833
834 ++wpipe->pipe_wantwcnt; /* don't care about overflow */
835 wpipe->pipe_state |= PIPE_WANTW;
836 error = tsleep(wpipe, PCATCH, "pipewr", 0);
837 lwkt_reltoken(&rlock);
838
839 /*
840 * Break out if we errored or the read side wants us to go
841 * away.
842 */
843 if (error)
844 break;
845 if (wpipe->pipe_state & PIPE_WEOF) {
846 error = EPIPE;
847 break;
848 }
849 }
850 pipe_end_uio(wpipe, &wpipe->pipe_wip);
851
852 /*
853 * If we have put any characters in the buffer, we wake up
854 * the reader.
855 *
856 * Both rlock and wlock are required to be able to modify pipe_state.
857 */
858 if (wpipe->pipe_buffer.windex != wpipe->pipe_buffer.rindex) {
859 if (wpipe->pipe_state & PIPE_WANTR) {
860 lwkt_gettoken(&rlock, &wpipe->pipe_rlock);
861 if (wpipe->pipe_state & PIPE_WANTR) {
862 wpipe->pipe_state &= ~PIPE_WANTR;
863 lwkt_reltoken(&rlock);
864 wakeup(wpipe);
865 } else {
866 lwkt_reltoken(&rlock);
867 }
868 }
984263bc
MD
869 }
870
871 /*
872 * Don't return EPIPE if I/O was successful
873 */
c600838f 874 if ((wpipe->pipe_buffer.rindex == wpipe->pipe_buffer.windex) &&
984263bc
MD
875 (uio->uio_resid == 0) &&
876 (error == EPIPE)) {
877 error = 0;
878 }
879
880 if (error == 0)
881 vfs_timestamp(&wpipe->pipe_mtime);
882
883 /*
884 * We have something to offer,
885 * wake up select/poll.
886 */
1ae37239
MD
887 space = wpipe->pipe_buffer.windex - wpipe->pipe_buffer.rindex;
888 lwkt_reltoken(&wlock);
889 if (space)
984263bc 890 pipeselwakeup(wpipe);
1ae37239 891 pipe_rel_mplock(&mpsave);
984263bc
MD
892 return (error);
893}
894
895/*
d9b2033e
MD
896 * MPALMOSTSAFE - acquires mplock
897 *
984263bc
MD
898 * we implement a very minimal set of ioctls for compatibility with sockets.
899 */
900int
87de5057 901pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct ucred *cred)
984263bc 902{
d9b2033e
MD
903 struct pipe *mpipe;
904 int error;
984263bc 905
d9b2033e
MD
906 get_mplock();
907 mpipe = (struct pipe *)fp->f_data;
984263bc 908
d9b2033e 909 switch (cmd) {
984263bc
MD
910 case FIOASYNC:
911 if (*(int *)data) {
912 mpipe->pipe_state |= PIPE_ASYNC;
913 } else {
914 mpipe->pipe_state &= ~PIPE_ASYNC;
915 }
d9b2033e
MD
916 error = 0;
917 break;
984263bc 918 case FIONREAD:
c600838f
MD
919 *(int *)data = mpipe->pipe_buffer.windex -
920 mpipe->pipe_buffer.rindex;
d9b2033e
MD
921 error = 0;
922 break;
984263bc 923 case FIOSETOWN:
d9b2033e
MD
924 error = fsetown(*(int *)data, &mpipe->pipe_sigio);
925 break;
984263bc
MD
926 case FIOGETOWN:
927 *(int *)data = fgetown(mpipe->pipe_sigio);
d9b2033e
MD
928 error = 0;
929 break;
984263bc 930 case TIOCSPGRP:
d9b2033e
MD
931 /* This is deprecated, FIOSETOWN should be used instead. */
932 error = fsetown(-(*(int *)data), &mpipe->pipe_sigio);
933 break;
984263bc 934
984263bc 935 case TIOCGPGRP:
d9b2033e 936 /* This is deprecated, FIOGETOWN should be used instead. */
984263bc 937 *(int *)data = -fgetown(mpipe->pipe_sigio);
d9b2033e
MD
938 error = 0;
939 break;
940 default:
941 error = ENOTTY;
942 break;
984263bc 943 }
d9b2033e
MD
944 rel_mplock();
945 return (error);
984263bc
MD
946}
947
d9b2033e
MD
948/*
949 * MPALMOSTSAFE - acquires mplock
950 */
984263bc 951int
87de5057 952pipe_poll(struct file *fp, int events, struct ucred *cred)
984263bc 953{
d9b2033e 954 struct pipe *rpipe;
984263bc
MD
955 struct pipe *wpipe;
956 int revents = 0;
c600838f 957 u_int space;
1ae37239 958 int mpsave;
984263bc 959
1ae37239 960 pipe_get_mplock(&mpsave);
d9b2033e 961 rpipe = (struct pipe *)fp->f_data;
984263bc 962 wpipe = rpipe->pipe_peer;
08593aa1 963 if (events & (POLLIN | POLLRDNORM)) {
c600838f 964 if ((rpipe->pipe_buffer.windex != rpipe->pipe_buffer.rindex) ||
1ae37239 965 (rpipe->pipe_state & PIPE_REOF)) {
984263bc 966 revents |= events & (POLLIN | POLLRDNORM);
08593aa1
MD
967 }
968 }
984263bc 969
08593aa1 970 if (events & (POLLOUT | POLLWRNORM)) {
1ae37239 971 if (wpipe == NULL || (wpipe->pipe_state & PIPE_WEOF)) {
984263bc 972 revents |= events & (POLLOUT | POLLWRNORM);
c600838f
MD
973 } else {
974 space = wpipe->pipe_buffer.windex -
975 wpipe->pipe_buffer.rindex;
976 space = wpipe->pipe_buffer.size - space;
977 if (space >= PIPE_BUF)
978 revents |= events & (POLLOUT | POLLWRNORM);
08593aa1
MD
979 }
980 }
984263bc 981
1ae37239 982 if ((rpipe->pipe_state & PIPE_REOF) ||
984263bc 983 (wpipe == NULL) ||
1ae37239 984 (wpipe->pipe_state & PIPE_WEOF))
984263bc
MD
985 revents |= POLLHUP;
986
987 if (revents == 0) {
988 if (events & (POLLIN | POLLRDNORM)) {
87de5057 989 selrecord(curthread, &rpipe->pipe_sel);
984263bc
MD
990 rpipe->pipe_state |= PIPE_SEL;
991 }
992
993 if (events & (POLLOUT | POLLWRNORM)) {
87de5057 994 selrecord(curthread, &wpipe->pipe_sel);
984263bc
MD
995 wpipe->pipe_state |= PIPE_SEL;
996 }
997 }
1ae37239 998 pipe_rel_mplock(&mpsave);
984263bc
MD
999 return (revents);
1000}
1001
d9b2033e
MD
1002/*
1003 * MPALMOSTSAFE - acquires mplock
1004 */
984263bc 1005static int
87de5057 1006pipe_stat(struct file *fp, struct stat *ub, struct ucred *cred)
984263bc 1007{
d9b2033e 1008 struct pipe *pipe;
1ae37239 1009 int mpsave;
d9b2033e 1010
1ae37239 1011 pipe_get_mplock(&mpsave);
d9b2033e 1012 pipe = (struct pipe *)fp->f_data;
984263bc
MD
1013
1014 bzero((caddr_t)ub, sizeof(*ub));
1015 ub->st_mode = S_IFIFO;
1016 ub->st_blksize = pipe->pipe_buffer.size;
c600838f 1017 ub->st_size = pipe->pipe_buffer.windex - pipe->pipe_buffer.rindex;
984263bc
MD
1018 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
1019 ub->st_atimespec = pipe->pipe_atime;
1020 ub->st_mtimespec = pipe->pipe_mtime;
1021 ub->st_ctimespec = pipe->pipe_ctime;
1022 /*
1023 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev,
1024 * st_flags, st_gen.
1025 * XXX (st_dev, st_ino) should be unique.
1026 */
1ae37239 1027 pipe_rel_mplock(&mpsave);
984263bc
MD
1028 return (0);
1029}
1030
d9b2033e
MD
1031/*
1032 * MPALMOSTSAFE - acquires mplock
1033 */
984263bc 1034static int
87de5057 1035pipe_close(struct file *fp)
984263bc 1036{
39b0a1af 1037 struct pipe *cpipe;
984263bc 1038
d9b2033e 1039 get_mplock();
39b0a1af 1040 cpipe = (struct pipe *)fp->f_data;
984263bc
MD
1041 fp->f_ops = &badfileops;
1042 fp->f_data = NULL;
1043 funsetown(cpipe->pipe_sigio);
1044 pipeclose(cpipe);
d9b2033e 1045 rel_mplock();
984263bc
MD
1046 return (0);
1047}
1048
004d2de5
MD
1049/*
1050 * Shutdown one or both directions of a full-duplex pipe.
d9b2033e
MD
1051 *
1052 * MPALMOSTSAFE - acquires mplock
004d2de5 1053 */
004d2de5 1054static int
87de5057 1055pipe_shutdown(struct file *fp, int how)
004d2de5 1056{
d9b2033e 1057 struct pipe *rpipe;
004d2de5
MD
1058 struct pipe *wpipe;
1059 int error = EPIPE;
1ae37239
MD
1060 lwkt_tokref rpipe_rlock;
1061 lwkt_tokref rpipe_wlock;
1062 lwkt_tokref wpipe_rlock;
1063 lwkt_tokref wpipe_wlock;
1064 int mpsave;
004d2de5 1065
1ae37239 1066 pipe_get_mplock(&mpsave);
d9b2033e 1067 rpipe = (struct pipe *)fp->f_data;
1ae37239
MD
1068 wpipe = rpipe->pipe_peer;
1069
1070 /*
1071 * We modify pipe_state on both pipes, which means we need
1072 * all four tokens!
1073 */
1074 lwkt_gettoken(&rpipe_rlock, &rpipe->pipe_rlock);
1075 lwkt_gettoken(&rpipe_wlock, &rpipe->pipe_wlock);
1076 lwkt_gettoken(&wpipe_rlock, &wpipe->pipe_rlock);
1077 lwkt_gettoken(&wpipe_wlock, &wpipe->pipe_wlock);
d9b2033e 1078
004d2de5
MD
1079 switch(how) {
1080 case SHUT_RDWR:
1081 case SHUT_RD:
1ae37239
MD
1082 rpipe->pipe_state |= PIPE_REOF;
1083 wpipe->pipe_state |= PIPE_WEOF;
1084 if (rpipe->pipe_state & PIPE_WANTR) {
1085 rpipe->pipe_state &= ~PIPE_WANTR;
1086 wakeup(rpipe);
004d2de5 1087 }
1ae37239
MD
1088 if (wpipe->pipe_state & PIPE_WANTW) {
1089 wpipe->pipe_state &= ~PIPE_WANTW;
1090 wakeup(wpipe);
1091 }
1092 pipeselwakeup(rpipe);
1093 error = 0;
004d2de5
MD
1094 if (how == SHUT_RD)
1095 break;
1096 /* fall through */
1097 case SHUT_WR:
1ae37239
MD
1098 wpipe->pipe_state |= PIPE_WEOF;
1099 rpipe->pipe_state |= PIPE_REOF;
1100 if (wpipe->pipe_state & PIPE_WANTW) {
1101 wpipe->pipe_state &= ~PIPE_WANTW;
1102 wakeup(wpipe);
1103 }
1104 if (rpipe->pipe_state & PIPE_WANTR) {
1105 rpipe->pipe_state &= ~PIPE_WANTR;
1106 wakeup(rpipe);
004d2de5 1107 }
1ae37239
MD
1108 pipeselwakeup(wpipe);
1109 error = 0;
1110 break;
004d2de5 1111 }
1ae37239
MD
1112
1113 lwkt_reltoken(&rpipe_rlock);
1114 lwkt_reltoken(&rpipe_wlock);
1115 lwkt_reltoken(&wpipe_rlock);
1116 lwkt_reltoken(&wpipe_wlock);
1117
1118 pipe_rel_mplock(&mpsave);
004d2de5
MD
1119 return (error);
1120}
1121
984263bc 1122static void
dadab5e9 1123pipe_free_kmem(struct pipe *cpipe)
984263bc 1124{
984263bc
MD
1125 if (cpipe->pipe_buffer.buffer != NULL) {
1126 if (cpipe->pipe_buffer.size > PIPE_SIZE)
fc7d5181 1127 --pipe_nbig;
e4846942 1128 kmem_free(&kernel_map,
984263bc
MD
1129 (vm_offset_t)cpipe->pipe_buffer.buffer,
1130 cpipe->pipe_buffer.size);
1131 cpipe->pipe_buffer.buffer = NULL;
fc7d5181 1132 cpipe->pipe_buffer.object = NULL;
984263bc 1133 }
984263bc
MD
1134}
1135
1136/*
1ae37239
MD
1137 * Close the pipe. The slock must be held to interlock against simultanious
1138 * closes. The rlock and wlock must be held to adjust the pipe_state.
984263bc
MD
1139 */
1140static void
dadab5e9 1141pipeclose(struct pipe *cpipe)
984263bc 1142{
fc7d5181 1143 globaldata_t gd;
984263bc 1144 struct pipe *ppipe;
1ae37239
MD
1145 lwkt_tokref cpipe_rlock;
1146 lwkt_tokref cpipe_wlock;
1147 lwkt_tokref ppipe_rlock;
1148 lwkt_tokref ppipe_wlock;
984263bc 1149
fc7d5181
MD
1150 if (cpipe == NULL)
1151 return;
984263bc 1152
1ae37239
MD
1153 /*
1154 * The slock may not have been allocated yet (close during
1155 * initialization)
1156 *
1157 * We need both the read and write tokens to modify pipe_state.
1158 */
1159 if (cpipe->pipe_slock)
1160 lockmgr(cpipe->pipe_slock, LK_EXCLUSIVE);
1161 lwkt_gettoken(&cpipe_rlock, &cpipe->pipe_rlock);
1162 lwkt_gettoken(&cpipe_wlock, &cpipe->pipe_wlock);
984263bc 1163
fc7d5181 1164 /*
1ae37239
MD
1165 * Set our state, wakeup anyone waiting in select, and
1166 * wakeup anyone blocked on our pipe.
fc7d5181 1167 */
1ae37239
MD
1168 cpipe->pipe_state |= PIPE_CLOSED | PIPE_REOF | PIPE_WEOF;
1169 pipeselwakeup(cpipe);
1170 if (cpipe->pipe_state & (PIPE_WANTR | PIPE_WANTW)) {
1171 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
fc7d5181 1172 wakeup(cpipe);
fc7d5181 1173 }
984263bc 1174
fc7d5181
MD
1175 /*
1176 * Disconnect from peer
1177 */
1178 if ((ppipe = cpipe->pipe_peer) != NULL) {
1ae37239
MD
1179 lwkt_gettoken(&ppipe_rlock, &ppipe->pipe_rlock);
1180 lwkt_gettoken(&ppipe_wlock, &ppipe->pipe_wlock);
1181 ppipe->pipe_state |= PIPE_REOF;
fc7d5181 1182 pipeselwakeup(ppipe);
1ae37239
MD
1183 if (ppipe->pipe_state & (PIPE_WANTR | PIPE_WANTW)) {
1184 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1185 wakeup(ppipe);
1186 }
1187 if (SLIST_FIRST(&ppipe->pipe_sel.si_note)) {
1188 get_mplock();
1189 KNOTE(&ppipe->pipe_sel.si_note, 0);
1190 rel_mplock();
1191 }
1192 lwkt_reltoken(&ppipe_rlock);
1193 lwkt_reltoken(&ppipe_wlock);
1194 }
fc7d5181 1195
1ae37239
MD
1196 /*
1197 * If the peer is also closed we can free resources for both
1198 * sides, otherwise we leave our side intact to deal with any
1199 * races (since we only have the slock).
1200 */
1201 if (ppipe && (ppipe->pipe_state & PIPE_CLOSED)) {
1202 cpipe->pipe_peer = NULL;
fc7d5181 1203 ppipe->pipe_peer = NULL;
1ae37239
MD
1204 ppipe->pipe_slock = NULL; /* we will free the slock */
1205 pipeclose(ppipe);
1206 ppipe = NULL;
fc7d5181
MD
1207 }
1208
1ae37239
MD
1209 lwkt_reltoken(&cpipe_rlock);
1210 lwkt_reltoken(&cpipe_wlock);
1211 if (cpipe->pipe_slock)
1212 lockmgr(cpipe->pipe_slock, LK_RELEASE);
8100156a 1213
fc7d5181 1214 /*
1ae37239 1215 * If we disassociated from our peer we can free resources
fc7d5181 1216 */
1ae37239
MD
1217 if (ppipe == NULL) {
1218 gd = mycpu;
1219 if (cpipe->pipe_slock) {
1220 kfree(cpipe->pipe_slock, M_PIPE);
1221 cpipe->pipe_slock = NULL;
1222 }
1223 if (gd->gd_pipeqcount >= pipe_maxcache ||
1224 cpipe->pipe_buffer.size != PIPE_SIZE
1225 ) {
1226 pipe_free_kmem(cpipe);
1227 kfree(cpipe, M_PIPE);
1228 } else {
1229 cpipe->pipe_state = 0;
1230 cpipe->pipe_peer = gd->gd_pipeq;
1231 gd->gd_pipeq = cpipe;
1232 ++gd->gd_pipeqcount;
1233 }
984263bc
MD
1234 }
1235}
1236
d9b2033e
MD
1237/*
1238 * MPALMOSTSAFE - acquires mplock
1239 */
984263bc
MD
1240static int
1241pipe_kqfilter(struct file *fp, struct knote *kn)
1242{
d9b2033e
MD
1243 struct pipe *cpipe;
1244
1245 get_mplock();
1246 cpipe = (struct pipe *)kn->kn_fp->f_data;
984263bc
MD
1247
1248 switch (kn->kn_filter) {
1249 case EVFILT_READ:
1250 kn->kn_fop = &pipe_rfiltops;
1251 break;
1252 case EVFILT_WRITE:
1253 kn->kn_fop = &pipe_wfiltops;
1254 cpipe = cpipe->pipe_peer;
d9b2033e 1255 if (cpipe == NULL) {
984263bc 1256 /* other end of pipe has been closed */
d9b2033e 1257 rel_mplock();
41f57d15 1258 return (EPIPE);
d9b2033e 1259 }
984263bc
MD
1260 break;
1261 default:
1262 return (1);
1263 }
1264 kn->kn_hook = (caddr_t)cpipe;
1265
1266 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
d9b2033e 1267 rel_mplock();
984263bc
MD
1268 return (0);
1269}
1270
1271static void
1272filt_pipedetach(struct knote *kn)
1273{
1274 struct pipe *cpipe = (struct pipe *)kn->kn_hook;
1275
1276 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
1277}
1278
1279/*ARGSUSED*/
1280static int
1281filt_piperead(struct knote *kn, long hint)
1282{
1283 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
984263bc 1284
c600838f 1285 kn->kn_data = rpipe->pipe_buffer.windex - rpipe->pipe_buffer.rindex;
984263bc 1286
1ae37239
MD
1287 /* XXX RACE */
1288 if (rpipe->pipe_state & PIPE_REOF) {
984263bc
MD
1289 kn->kn_flags |= EV_EOF;
1290 return (1);
1291 }
1292 return (kn->kn_data > 0);
1293}
1294
1295/*ARGSUSED*/
1296static int
1297filt_pipewrite(struct knote *kn, long hint)
1298{
1299 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1300 struct pipe *wpipe = rpipe->pipe_peer;
c600838f 1301 u_int32_t space;
984263bc 1302
1ae37239
MD
1303 /* XXX RACE */
1304 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_WEOF)) {
984263bc
MD
1305 kn->kn_data = 0;
1306 kn->kn_flags |= EV_EOF;
1307 return (1);
1308 }
c600838f
MD
1309 space = wpipe->pipe_buffer.windex -
1310 wpipe->pipe_buffer.rindex;
1311 space = wpipe->pipe_buffer.size - space;
1312 kn->kn_data = space;
984263bc
MD
1313 return (kn->kn_data >= PIPE_BUF);
1314}