Change the kernel dev_t, representing a pointer to a specinfo structure,
[dragonfly.git] / sys / kern / vfs_vnops.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $
b13267a5 40 * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.46 2006/09/10 01:26:39 dillon Exp $
984263bc
MD
41 */
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/fcntl.h>
46#include <sys/file.h>
47#include <sys/stat.h>
48#include <sys/proc.h>
49#include <sys/mount.h>
fad57d0e 50#include <sys/nlookup.h>
984263bc
MD
51#include <sys/vnode.h>
52#include <sys/buf.h>
53#include <sys/filio.h>
54#include <sys/ttycom.h>
55#include <sys/conf.h>
56#include <sys/syslog.h>
57
87de5057
MD
58static int vn_closefile (struct file *fp);
59static int vn_ioctl (struct file *fp, u_long com, caddr_t data,
60 struct ucred *cred);
402ed7e1 61static int vn_read (struct file *fp, struct uio *uio,
87de5057 62 struct ucred *cred, int flags);
fad57d0e 63static int svn_read (struct file *fp, struct uio *uio,
87de5057
MD
64 struct ucred *cred, int flags);
65static int vn_poll (struct file *fp, int events, struct ucred *cred);
402ed7e1 66static int vn_kqfilter (struct file *fp, struct knote *kn);
87de5057 67static int vn_statfile (struct file *fp, struct stat *sb, struct ucred *cred);
402ed7e1 68static int vn_write (struct file *fp, struct uio *uio,
87de5057 69 struct ucred *cred, int flags);
fad57d0e 70static int svn_write (struct file *fp, struct uio *uio,
87de5057 71 struct ucred *cred, int flags);
984263bc 72
fad57d0e 73struct fileops vnode_fileops = {
b2d248cb
MD
74 .fo_read = vn_read,
75 .fo_write = vn_write,
76 .fo_ioctl = vn_ioctl,
77 .fo_poll = vn_poll,
78 .fo_kqfilter = vn_kqfilter,
79 .fo_stat = vn_statfile,
80 .fo_close = vn_closefile,
81 .fo_shutdown = nofo_shutdown
984263bc
MD
82};
83
fad57d0e 84struct fileops specvnode_fileops = {
b2d248cb
MD
85 .fo_read = svn_read,
86 .fo_write = svn_write,
87 .fo_ioctl = vn_ioctl,
88 .fo_poll = vn_poll,
89 .fo_kqfilter = vn_kqfilter,
90 .fo_stat = vn_statfile,
91 .fo_close = vn_closefile,
92 .fo_shutdown = nofo_shutdown
fad57d0e
MD
93};
94
95/*
96 * Shortcut the device read/write. This avoids a lot of vnode junk.
97 * Basically the specfs vnops for read and write take the locked vnode,
98 * unlock it (because we can't hold the vnode locked while reading or writing
99 * a device which may block indefinitely), issues the device operation, then
100 * relock the vnode before returning, plus other junk. This bypasses all
101 * of that and just does the device operation.
102 */
103void
104vn_setspecops(struct file *fp)
105{
106 if (vfs_fastdev && fp->f_ops == &vnode_fileops) {
107 fp->f_ops = &specvnode_fileops;
108 }
109}
110
984263bc 111/*
fad57d0e
MD
112 * Common code for vnode open operations. Check permissions, and call
113 * the VOP_NOPEN or VOP_NCREATE routine.
114 *
115 * The caller is responsible for setting up nd with nlookup_init() and
116 * for cleaning it up with nlookup_done(), whether we return an error
117 * or not.
118 *
119 * On success nd->nl_open_vp will hold a referenced and, if requested,
120 * locked vnode. A locked vnode is requested via NLC_LOCKVP. If fp
121 * is non-NULL the vnode will be installed in the file pointer.
122 *
123 * NOTE: The vnode is referenced just once on return whether or not it
124 * is also installed in the file pointer.
984263bc
MD
125 */
126int
fad57d0e 127vn_open(struct nlookupdata *nd, struct file *fp, int fmode, int cmode)
984263bc 128{
1fd87d54 129 struct vnode *vp;
fad57d0e 130 struct ucred *cred = nd->nl_cred;
984263bc
MD
131 struct vattr vat;
132 struct vattr *vap = &vat;
fad57d0e 133 struct namecache *ncp;
984263bc
MD
134 int mode, error;
135
fad57d0e
MD
136 /*
137 * Lookup the path and create or obtain the vnode. After a
138 * successful lookup a locked nd->nl_ncp will be returned.
139 *
140 * The result of this section should be a locked vnode.
141 *
142 * XXX with only a little work we should be able to avoid locking
143 * the vnode if FWRITE, O_CREAT, and O_TRUNC are *not* set.
144 */
984263bc 145 if (fmode & O_CREAT) {
fad57d0e
MD
146 /*
147 * CONDITIONAL CREATE FILE CASE
148 *
149 * Setting NLC_CREATE causes a negative hit to store
150 * the negative hit ncp and not return an error. Then
151 * nc_error or nc_vp may be checked to see if the ncp
152 * represents a negative hit. NLC_CREATE also requires
153 * write permission on the governing directory or EPERM
154 * is returned.
155 */
984263bc 156 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
fad57d0e
MD
157 nd->nl_flags |= NLC_FOLLOW;
158 nd->nl_flags |= NLC_CREATE;
984263bc 159 bwillwrite();
fad57d0e 160 error = nlookup(nd);
806dcf9a
MD
161 } else {
162 /*
163 * NORMAL OPEN FILE CASE
164 */
165 error = nlookup(nd);
166 }
fad57d0e 167
806dcf9a
MD
168 if (error)
169 return (error);
170 ncp = nd->nl_ncp;
fad57d0e 171
806dcf9a
MD
172 /*
173 * split case to allow us to re-resolve and retry the ncp in case
174 * we get ESTALE.
175 */
176again:
177 if (fmode & O_CREAT) {
fad57d0e 178 if (ncp->nc_vp == NULL) {
984263bc
MD
179 VATTR_NULL(vap);
180 vap->va_type = VREG;
181 vap->va_mode = cmode;
182 if (fmode & O_EXCL)
183 vap->va_vaflags |= VA_EXCLUSIVE;
fad57d0e
MD
184 error = VOP_NCREATE(ncp, &vp, nd->nl_cred, vap);
185 if (error)
984263bc 186 return (error);
984263bc 187 fmode &= ~O_TRUNC;
fad57d0e 188 /* locked vnode is returned */
984263bc 189 } else {
984263bc
MD
190 if (fmode & O_EXCL) {
191 error = EEXIST;
fad57d0e
MD
192 } else {
193 error = cache_vget(ncp, cred,
194 LK_EXCLUSIVE, &vp);
984263bc 195 }
fad57d0e
MD
196 if (error)
197 return (error);
984263bc
MD
198 fmode &= ~O_CREAT;
199 }
200 } else {
fad57d0e 201 error = cache_vget(ncp, cred, LK_EXCLUSIVE, &vp);
984263bc
MD
202 if (error)
203 return (error);
984263bc 204 }
fad57d0e
MD
205
206 /*
806dcf9a
MD
207 * We have a locked vnode and ncp now. Note that the ncp will
208 * be cleaned up by the caller if nd->nl_ncp is left intact.
fad57d0e 209 */
984263bc
MD
210 if (vp->v_type == VLNK) {
211 error = EMLINK;
212 goto bad;
213 }
214 if (vp->v_type == VSOCK) {
215 error = EOPNOTSUPP;
216 goto bad;
217 }
218 if ((fmode & O_CREAT) == 0) {
219 mode = 0;
220 if (fmode & (FWRITE | O_TRUNC)) {
221 if (vp->v_type == VDIR) {
222 error = EISDIR;
223 goto bad;
224 }
225 error = vn_writechk(vp);
806dcf9a
MD
226 if (error) {
227 /*
228 * Special stale handling, re-resolve the
229 * vnode.
230 */
231 if (error == ESTALE) {
232 vput(vp);
233 vp = NULL;
234 cache_setunresolved(ncp);
235 error = cache_resolve(ncp, cred);
236 if (error == 0)
237 goto again;
238 }
984263bc 239 goto bad;
806dcf9a 240 }
984263bc
MD
241 mode |= VWRITE;
242 }
243 if (fmode & FREAD)
244 mode |= VREAD;
245 if (mode) {
87de5057 246 error = VOP_ACCESS(vp, mode, cred);
806dcf9a
MD
247 if (error) {
248 /*
249 * Special stale handling, re-resolve the
250 * vnode.
251 */
252 if (error == ESTALE) {
253 vput(vp);
254 vp = NULL;
255 cache_setunresolved(ncp);
256 error = cache_resolve(ncp, cred);
257 if (error == 0)
258 goto again;
259 }
984263bc 260 goto bad;
806dcf9a 261 }
984263bc
MD
262 }
263 }
264 if (fmode & O_TRUNC) {
a11aaa81 265 vn_unlock(vp); /* XXX */
ca466bae 266 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
984263bc
MD
267 VATTR_NULL(vap);
268 vap->va_size = 0;
87de5057 269 error = VOP_SETATTR(vp, vap, cred);
984263bc
MD
270 if (error)
271 goto bad;
272 }
fad57d0e
MD
273
274 /*
275 * Setup the fp so VOP_OPEN can override it. No descriptor has been
8ddc6004
MD
276 * associated with the fp yet so we own it clean. f_ncp inherits
277 * nl_ncp .
fad57d0e
MD
278 */
279 if (fp) {
fad57d0e
MD
280 if (vp->v_type == VDIR) {
281 fp->f_ncp = nd->nl_ncp;
282 nd->nl_ncp = NULL;
283 cache_unlock(fp->f_ncp);
284 }
285 }
286
287 /*
288 * Get rid of nl_ncp. vn_open does not return it (it returns the
289 * vnode or the file pointer). Note: we can't leave nl_ncp locked
290 * through the VOP_OPEN anyway since the VOP_OPEN may block, e.g.
291 * on /dev/ttyd0
292 */
293 if (nd->nl_ncp) {
294 cache_put(nd->nl_ncp);
295 nd->nl_ncp = NULL;
296 }
297
87de5057 298 error = VOP_OPEN(vp, fmode, cred, fp);
fad57d0e
MD
299 if (error) {
300 /*
301 * setting f_ops to &badfileops will prevent the descriptor
302 * code from trying to close and release the vnode, since
303 * the open failed we do not want to call close.
304 */
675eb4c0
MD
305 if (fp) {
306 fp->f_data = NULL;
307 fp->f_ops = &badfileops;
308 }
984263bc 309 goto bad;
fad57d0e 310 }
fad57d0e 311
7540ab49 312#if 0
984263bc 313 /*
7540ab49 314 * Assert that VREG files have been setup for vmio.
984263bc 315 */
7540ab49
MD
316 KASSERT(vp->v_type != VREG || vp->v_object != NULL,
317 ("vn_open: regular file was not VMIO enabled!"));
318#endif
984263bc 319
fad57d0e
MD
320 /*
321 * Return the vnode. XXX needs some cleaning up. The vnode is
8ddc6004 322 * only returned in the fp == NULL case.
fad57d0e
MD
323 */
324 if (fp == NULL) {
325 nd->nl_open_vp = vp;
326 nd->nl_vp_fmode = fmode;
327 if ((nd->nl_flags & NLC_LOCKVP) == 0)
a11aaa81 328 vn_unlock(vp);
fad57d0e 329 } else {
8ddc6004 330 vput(vp);
fad57d0e 331 }
984263bc
MD
332 return (0);
333bad:
bb5c9c00
MD
334 if (vp)
335 vput(vp);
984263bc
MD
336 return (error);
337}
338
339/*
340 * Check for write permissions on the specified vnode.
341 * Prototype text segments cannot be written.
342 */
343int
344vn_writechk(vp)
1fd87d54 345 struct vnode *vp;
984263bc
MD
346{
347
348 /*
349 * If there's shared text associated with
350 * the vnode, try to free it up once. If
351 * we fail, we can't allow writing.
352 */
353 if (vp->v_flag & VTEXT)
354 return (ETXTBSY);
355 return (0);
356}
357
358/*
359 * Vnode close call
360 */
361int
87de5057 362vn_close(struct vnode *vp, int flags)
984263bc
MD
363{
364 int error;
365
ca466bae 366 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) == 0) {
87de5057 367 error = VOP_CLOSE(vp, flags);
a11aaa81 368 vn_unlock(vp);
5fd012e0 369 }
984263bc
MD
370 vrele(vp);
371 return (error);
372}
373
374static __inline
375int
376sequential_heuristic(struct uio *uio, struct file *fp)
377{
378 /*
379 * Sequential heuristic - detect sequential operation
380 */
381 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
382 uio->uio_offset == fp->f_nextoff) {
383 int tmpseq = fp->f_seqcount;
384 /*
385 * XXX we assume that the filesystem block size is
386 * the default. Not true, but still gives us a pretty
387 * good indicator of how sequential the read operations
388 * are.
389 */
390 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
391 if (tmpseq > IO_SEQMAX)
392 tmpseq = IO_SEQMAX;
393 fp->f_seqcount = tmpseq;
394 return(fp->f_seqcount << IO_SEQSHIFT);
395 }
396
397 /*
398 * Not sequential, quick draw-down of seqcount
399 */
400 if (fp->f_seqcount > 1)
401 fp->f_seqcount = 1;
402 else
403 fp->f_seqcount = 0;
404 return(0);
405}
406
407/*
408 * Package up an I/O request on a vnode into a uio and do it.
409 */
410int
87de5057
MD
411vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
412 off_t offset, enum uio_seg segflg, int ioflg,
413 struct ucred *cred, int *aresid)
984263bc
MD
414{
415 struct uio auio;
416 struct iovec aiov;
9bfc4d6d 417 struct ccms_lock ccms_lock;
984263bc
MD
418 int error;
419
420 if ((ioflg & IO_NODELOCKED) == 0)
ca466bae 421 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
984263bc
MD
422 auio.uio_iov = &aiov;
423 auio.uio_iovcnt = 1;
424 aiov.iov_base = base;
425 aiov.iov_len = len;
426 auio.uio_resid = len;
427 auio.uio_offset = offset;
428 auio.uio_segflg = segflg;
429 auio.uio_rw = rw;
87de5057 430 auio.uio_td = curthread;
9bfc4d6d 431 ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, &auio);
984263bc
MD
432 if (rw == UIO_READ) {
433 error = VOP_READ(vp, &auio, ioflg, cred);
434 } else {
435 error = VOP_WRITE(vp, &auio, ioflg, cred);
436 }
9bfc4d6d 437 ccms_lock_put(&vp->v_ccms, &ccms_lock);
984263bc
MD
438 if (aresid)
439 *aresid = auio.uio_resid;
440 else
441 if (auio.uio_resid && error == 0)
442 error = EIO;
443 if ((ioflg & IO_NODELOCKED) == 0)
a11aaa81 444 vn_unlock(vp);
984263bc
MD
445 return (error);
446}
447
448/*
449 * Package up an I/O request on a vnode into a uio and do it. The I/O
450 * request is split up into smaller chunks and we try to avoid saturating
451 * the buffer cache while potentially holding a vnode locked, so we
452 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield()
453 * to give other processes a chance to lock the vnode (either other processes
454 * core'ing the same binary, or unrelated processes scanning the directory).
455 */
456int
87de5057
MD
457vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
458 off_t offset, enum uio_seg segflg, int ioflg,
459 struct ucred *cred, int *aresid)
984263bc
MD
460{
461 int error = 0;
462
463 do {
9a0222ac 464 int chunk;
984263bc 465
9a0222ac
DR
466 /*
467 * Force `offset' to a multiple of MAXBSIZE except possibly
468 * for the first chunk, so that filesystems only need to
469 * write full blocks except possibly for the first and last
470 * chunks.
471 */
472 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
473
474 if (chunk > len)
475 chunk = len;
984263bc
MD
476 if (rw != UIO_READ && vp->v_type == VREG)
477 bwillwrite();
478 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
87de5057 479 ioflg, cred, aresid);
984263bc
MD
480 len -= chunk; /* aresid calc already includes length */
481 if (error)
482 break;
483 offset += chunk;
484 base += chunk;
485 uio_yield();
486 } while (len);
487 if (aresid)
488 *aresid += len;
489 return (error);
490}
491
492/*
d9b2033e 493 * MPALMOSTSAFE - acquires mplock
984263bc
MD
494 */
495static int
87de5057 496vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
984263bc 497{
9bfc4d6d 498 struct ccms_lock ccms_lock;
984263bc
MD
499 struct vnode *vp;
500 int error, ioflag;
501
d9b2033e 502 get_mplock();
87de5057
MD
503 KASSERT(uio->uio_td == curthread,
504 ("uio_td %p is not td %p", uio->uio_td, curthread));
984263bc 505 vp = (struct vnode *)fp->f_data;
9ba76b73 506
984263bc 507 ioflag = 0;
9ba76b73
MD
508 if (flags & O_FBLOCKING) {
509 /* ioflag &= ~IO_NDELAY; */
510 } else if (flags & O_FNONBLOCKING) {
511 ioflag |= IO_NDELAY;
512 } else if (fp->f_flag & FNONBLOCK) {
984263bc 513 ioflag |= IO_NDELAY;
9ba76b73
MD
514 }
515 if (flags & O_FBUFFERED) {
516 /* ioflag &= ~IO_DIRECT; */
517 } else if (flags & O_FUNBUFFERED) {
518 ioflag |= IO_DIRECT;
519 } else if (fp->f_flag & O_DIRECT) {
984263bc 520 ioflag |= IO_DIRECT;
9ba76b73 521 }
ab6f251b 522 vn_lock(vp, LK_SHARED | LK_RETRY);
9ba76b73 523 if ((flags & O_FOFFSET) == 0)
984263bc 524 uio->uio_offset = fp->f_offset;
984263bc
MD
525 ioflag |= sequential_heuristic(uio, fp);
526
9bfc4d6d 527 ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, uio);
984263bc 528 error = VOP_READ(vp, uio, ioflag, cred);
9bfc4d6d 529 ccms_lock_put(&vp->v_ccms, &ccms_lock);
9ba76b73 530 if ((flags & O_FOFFSET) == 0)
984263bc
MD
531 fp->f_offset = uio->uio_offset;
532 fp->f_nextoff = uio->uio_offset;
a11aaa81 533 vn_unlock(vp);
d9b2033e 534 rel_mplock();
984263bc
MD
535 return (error);
536}
537
fad57d0e
MD
538/*
539 * Device-optimized file table vnode read routine.
540 *
541 * This bypasses the VOP table and talks directly to the device. Most
542 * filesystems just route to specfs and can make this optimization.
d9b2033e
MD
543 *
544 * MPALMOSTSAFE - acquires mplock
fad57d0e
MD
545 */
546static int
87de5057 547svn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
fad57d0e
MD
548{
549 struct vnode *vp;
550 int ioflag;
551 int error;
b13267a5 552 cdev_t dev;
fad57d0e 553
d9b2033e 554 get_mplock();
87de5057
MD
555 KASSERT(uio->uio_td == curthread,
556 ("uio_td %p is not td %p", uio->uio_td, curthread));
fad57d0e
MD
557
558 vp = (struct vnode *)fp->f_data;
d9b2033e
MD
559 if (vp == NULL || vp->v_type == VBAD) {
560 error = EBADF;
561 goto done;
562 }
fad57d0e 563
d9b2033e
MD
564 if ((dev = vp->v_rdev) == NULL) {
565 error = EBADF;
566 goto done;
567 }
fad57d0e
MD
568 reference_dev(dev);
569
d9b2033e
MD
570 if (uio->uio_resid == 0) {
571 error = 0;
572 goto done;
573 }
9ba76b73 574 if ((flags & O_FOFFSET) == 0)
fad57d0e
MD
575 uio->uio_offset = fp->f_offset;
576
577 ioflag = 0;
9ba76b73
MD
578 if (flags & O_FBLOCKING) {
579 /* ioflag &= ~IO_NDELAY; */
580 } else if (flags & O_FNONBLOCKING) {
581 ioflag |= IO_NDELAY;
582 } else if (fp->f_flag & FNONBLOCK) {
fad57d0e 583 ioflag |= IO_NDELAY;
9ba76b73
MD
584 }
585 if (flags & O_FBUFFERED) {
586 /* ioflag &= ~IO_DIRECT; */
587 } else if (flags & O_FUNBUFFERED) {
588 ioflag |= IO_DIRECT;
589 } else if (fp->f_flag & O_DIRECT) {
fad57d0e 590 ioflag |= IO_DIRECT;
9ba76b73 591 }
fad57d0e
MD
592 ioflag |= sequential_heuristic(uio, fp);
593
594 error = dev_dread(dev, uio, ioflag);
595
596 release_dev(dev);
9ba76b73 597 if ((flags & O_FOFFSET) == 0)
fad57d0e
MD
598 fp->f_offset = uio->uio_offset;
599 fp->f_nextoff = uio->uio_offset;
d9b2033e
MD
600done:
601 rel_mplock();
fad57d0e
MD
602 return (error);
603}
604
984263bc 605/*
d9b2033e 606 * MPALMOSTSAFE - acquires mplock
984263bc
MD
607 */
608static int
87de5057 609vn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
984263bc 610{
9bfc4d6d 611 struct ccms_lock ccms_lock;
984263bc
MD
612 struct vnode *vp;
613 int error, ioflag;
614
d9b2033e 615 get_mplock();
87de5057
MD
616 KASSERT(uio->uio_td == curthread,
617 ("uio_procp %p is not p %p", uio->uio_td, curthread));
984263bc
MD
618 vp = (struct vnode *)fp->f_data;
619 if (vp->v_type == VREG)
620 bwillwrite();
621 vp = (struct vnode *)fp->f_data; /* XXX needed? */
9ba76b73 622
984263bc 623 ioflag = IO_UNIT;
9ba76b73
MD
624 if (vp->v_type == VREG &&
625 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) {
984263bc 626 ioflag |= IO_APPEND;
9ba76b73
MD
627 }
628
629 if (flags & O_FBLOCKING) {
630 /* ioflag &= ~IO_NDELAY; */
631 } else if (flags & O_FNONBLOCKING) {
984263bc 632 ioflag |= IO_NDELAY;
9ba76b73
MD
633 } else if (fp->f_flag & FNONBLOCK) {
634 ioflag |= IO_NDELAY;
635 }
636 if (flags & O_FBUFFERED) {
637 /* ioflag &= ~IO_DIRECT; */
638 } else if (flags & O_FUNBUFFERED) {
639 ioflag |= IO_DIRECT;
640 } else if (fp->f_flag & O_DIRECT) {
984263bc 641 ioflag |= IO_DIRECT;
9ba76b73
MD
642 }
643 if (flags & O_FASYNCWRITE) {
644 /* ioflag &= ~IO_SYNC; */
645 } else if (flags & O_FSYNCWRITE) {
646 ioflag |= IO_SYNC;
647 } else if (fp->f_flag & O_FSYNC) {
648 ioflag |= IO_SYNC;
649 }
650
651 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
984263bc 652 ioflag |= IO_SYNC;
ca466bae 653 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
9ba76b73 654 if ((flags & O_FOFFSET) == 0)
984263bc
MD
655 uio->uio_offset = fp->f_offset;
656 ioflag |= sequential_heuristic(uio, fp);
9bfc4d6d 657 ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, uio);
984263bc 658 error = VOP_WRITE(vp, uio, ioflag, cred);
9bfc4d6d 659 ccms_lock_put(&vp->v_ccms, &ccms_lock);
9ba76b73 660 if ((flags & O_FOFFSET) == 0)
984263bc
MD
661 fp->f_offset = uio->uio_offset;
662 fp->f_nextoff = uio->uio_offset;
a11aaa81 663 vn_unlock(vp);
d9b2033e 664 rel_mplock();
984263bc
MD
665 return (error);
666}
667
fad57d0e
MD
668/*
669 * Device-optimized file table vnode write routine.
670 *
671 * This bypasses the VOP table and talks directly to the device. Most
672 * filesystems just route to specfs and can make this optimization.
d9b2033e
MD
673 *
674 * MPALMOSTSAFE - acquires mplock
fad57d0e
MD
675 */
676static int
87de5057 677svn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
fad57d0e
MD
678{
679 struct vnode *vp;
680 int ioflag;
681 int error;
b13267a5 682 cdev_t dev;
fad57d0e 683
d9b2033e 684 get_mplock();
87de5057
MD
685 KASSERT(uio->uio_td == curthread,
686 ("uio_procp %p is not p %p", uio->uio_td, curthread));
fad57d0e
MD
687
688 vp = (struct vnode *)fp->f_data;
d9b2033e
MD
689 if (vp == NULL || vp->v_type == VBAD) {
690 error = EBADF;
691 goto done;
692 }
fad57d0e
MD
693 if (vp->v_type == VREG)
694 bwillwrite();
695 vp = (struct vnode *)fp->f_data; /* XXX needed? */
696
d9b2033e
MD
697 if ((dev = vp->v_rdev) == NULL) {
698 error = EBADF;
699 goto done;
700 }
fad57d0e
MD
701 reference_dev(dev);
702
9ba76b73 703 if ((flags & O_FOFFSET) == 0)
fad57d0e
MD
704 uio->uio_offset = fp->f_offset;
705
706 ioflag = IO_UNIT;
9ba76b73
MD
707 if (vp->v_type == VREG &&
708 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) {
fad57d0e 709 ioflag |= IO_APPEND;
9ba76b73
MD
710 }
711
712 if (flags & O_FBLOCKING) {
713 /* ioflag &= ~IO_NDELAY; */
714 } else if (flags & O_FNONBLOCKING) {
715 ioflag |= IO_NDELAY;
716 } else if (fp->f_flag & FNONBLOCK) {
fad57d0e 717 ioflag |= IO_NDELAY;
9ba76b73
MD
718 }
719 if (flags & O_FBUFFERED) {
720 /* ioflag &= ~IO_DIRECT; */
721 } else if (flags & O_FUNBUFFERED) {
722 ioflag |= IO_DIRECT;
723 } else if (fp->f_flag & O_DIRECT) {
fad57d0e 724 ioflag |= IO_DIRECT;
9ba76b73
MD
725 }
726 if (flags & O_FASYNCWRITE) {
727 /* ioflag &= ~IO_SYNC; */
728 } else if (flags & O_FSYNCWRITE) {
729 ioflag |= IO_SYNC;
730 } else if (fp->f_flag & O_FSYNC) {
731 ioflag |= IO_SYNC;
732 }
733
734 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
fad57d0e
MD
735 ioflag |= IO_SYNC;
736 ioflag |= sequential_heuristic(uio, fp);
737
738 error = dev_dwrite(dev, uio, ioflag);
739
740 release_dev(dev);
9ba76b73 741 if ((flags & O_FOFFSET) == 0)
fad57d0e
MD
742 fp->f_offset = uio->uio_offset;
743 fp->f_nextoff = uio->uio_offset;
d9b2033e
MD
744done:
745 rel_mplock();
fad57d0e
MD
746 return (error);
747}
748
984263bc 749/*
d9b2033e 750 * MPALMOSTSAFE - acquires mplock
984263bc
MD
751 */
752static int
87de5057 753vn_statfile(struct file *fp, struct stat *sb, struct ucred *cred)
984263bc 754{
d9b2033e
MD
755 struct vnode *vp;
756 int error;
984263bc 757
d9b2033e
MD
758 get_mplock();
759 vp = (struct vnode *)fp->f_data;
760 error = vn_stat(vp, sb, cred);
761 rel_mplock();
762 return (error);
984263bc
MD
763}
764
765int
87de5057 766vn_stat(struct vnode *vp, struct stat *sb, struct ucred *cred)
984263bc
MD
767{
768 struct vattr vattr;
dadab5e9 769 struct vattr *vap;
984263bc
MD
770 int error;
771 u_short mode;
b13267a5 772 cdev_t dev;
984263bc
MD
773
774 vap = &vattr;
87de5057 775 error = VOP_GETATTR(vp, vap);
984263bc
MD
776 if (error)
777 return (error);
778
779 /*
780 * Zero the spare stat fields
781 */
782 sb->st_lspare = 0;
7d15906a 783 sb->st_qspare = 0;
984263bc
MD
784
785 /*
786 * Copy from vattr table
787 */
788 if (vap->va_fsid != VNOVAL)
789 sb->st_dev = vap->va_fsid;
790 else
791 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
792 sb->st_ino = vap->va_fileid;
793 mode = vap->va_mode;
794 switch (vap->va_type) {
795 case VREG:
796 mode |= S_IFREG;
797 break;
798 case VDIR:
799 mode |= S_IFDIR;
800 break;
801 case VBLK:
802 mode |= S_IFBLK;
803 break;
804 case VCHR:
805 mode |= S_IFCHR;
806 break;
807 case VLNK:
808 mode |= S_IFLNK;
809 /* This is a cosmetic change, symlinks do not have a mode. */
810 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
811 sb->st_mode &= ~ACCESSPERMS; /* 0000 */
812 else
813 sb->st_mode |= ACCESSPERMS; /* 0777 */
814 break;
815 case VSOCK:
816 mode |= S_IFSOCK;
817 break;
818 case VFIFO:
819 mode |= S_IFIFO;
820 break;
821 default:
822 return (EBADF);
823 };
824 sb->st_mode = mode;
825 sb->st_nlink = vap->va_nlink;
826 sb->st_uid = vap->va_uid;
827 sb->st_gid = vap->va_gid;
828 sb->st_rdev = vap->va_rdev;
829 sb->st_size = vap->va_size;
830 sb->st_atimespec = vap->va_atime;
831 sb->st_mtimespec = vap->va_mtime;
832 sb->st_ctimespec = vap->va_ctime;
833
d8869c1b
MD
834 /*
835 * A VCHR and VBLK device may track the last access and last modified
836 * time independantly of the filesystem. This is particularly true
837 * because device read and write calls may bypass the filesystem.
838 */
839 if (vp->v_type == VCHR || vp->v_type == VBLK) {
840 if ((dev = vp->v_rdev) != NULL) {
841 if (dev->si_lastread) {
842 sb->st_atimespec.tv_sec = dev->si_lastread;
843 sb->st_atimespec.tv_nsec = 0;
844 }
845 if (dev->si_lastwrite) {
846 sb->st_atimespec.tv_sec = dev->si_lastwrite;
847 sb->st_atimespec.tv_nsec = 0;
848 }
849 }
850 }
851
984263bc
MD
852 /*
853 * According to www.opengroup.org, the meaning of st_blksize is
854 * "a filesystem-specific preferred I/O block size for this
855 * object. In some filesystem types, this may vary from file
856 * to file"
857 * Default to PAGE_SIZE after much discussion.
858 */
859
860 if (vap->va_type == VREG) {
861 sb->st_blksize = vap->va_blocksize;
862 } else if (vn_isdisk(vp, NULL)) {
e4c9c0c8
MD
863 /*
864 * XXX this is broken. If the device is not yet open (aka
865 * stat() call, aka v_rdev == NULL), how are we supposed
866 * to get a valid block size out of it?
867 */
b13267a5 868 cdev_t dev;
e4c9c0c8
MD
869
870 if ((dev = vp->v_rdev) == NULL)
871 dev = udev2dev(vp->v_udev, vp->v_type == VBLK);
872 sb->st_blksize = dev->si_bsize_best;
873 if (sb->st_blksize < dev->si_bsize_phys)
874 sb->st_blksize = dev->si_bsize_phys;
984263bc
MD
875 if (sb->st_blksize < BLKDEV_IOSIZE)
876 sb->st_blksize = BLKDEV_IOSIZE;
877 } else {
878 sb->st_blksize = PAGE_SIZE;
879 }
880
881 sb->st_flags = vap->va_flags;
87de5057 882 if (suser_cred(cred, 0))
984263bc
MD
883 sb->st_gen = 0;
884 else
885 sb->st_gen = vap->va_gen;
886
887#if (S_BLKSIZE == 512)
888 /* Optimize this case */
889 sb->st_blocks = vap->va_bytes >> 9;
890#else
891 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
892#endif
dc1be39c 893 sb->st_fsmid = vap->va_fsmid;
984263bc
MD
894 return (0);
895}
896
897/*
d9b2033e 898 * MPALMOSTSAFE - acquires mplock
984263bc
MD
899 */
900static int
87de5057 901vn_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred)
984263bc 902{
dadab5e9 903 struct vnode *vp = ((struct vnode *)fp->f_data);
1fbb5fc0 904 struct vnode *ovp;
984263bc
MD
905 struct vattr vattr;
906 int error;
907
d9b2033e
MD
908 get_mplock();
909
dadab5e9 910 switch (vp->v_type) {
984263bc
MD
911 case VREG:
912 case VDIR:
913 if (com == FIONREAD) {
d9b2033e
MD
914 if ((error = VOP_GETATTR(vp, &vattr)) != 0)
915 break;
984263bc 916 *(int *)data = vattr.va_size - fp->f_offset;
d9b2033e
MD
917 error = 0;
918 break;
919 }
9ba76b73 920 if (com == FIOASYNC) { /* XXX */
d9b2033e
MD
921 error = 0; /* XXX */
922 break;
984263bc 923 }
984263bc 924 /* fall into ... */
984263bc
MD
925 default:
926#if 0
927 return (ENOTTY);
928#endif
929 case VFIFO:
930 case VCHR:
931 case VBLK:
932 if (com == FIODTYPE) {
d9b2033e
MD
933 if (vp->v_type != VCHR && vp->v_type != VBLK) {
934 error = ENOTTY;
935 break;
936 }
335dda38 937 *(int *)data = dev_dflags(vp->v_rdev) & D_TYPEMASK;
d9b2033e
MD
938 error = 0;
939 break;
984263bc 940 }
87de5057 941 error = VOP_IOCTL(vp, com, data, fp->f_flag, ucred);
984263bc 942 if (error == 0 && com == TIOCSCTTY) {
87de5057
MD
943 struct proc *p = curthread->td_proc;
944 struct session *sess;
945
d9b2033e
MD
946 if (p == NULL) {
947 error = ENOTTY;
948 break;
949 }
984263bc 950
87de5057 951 sess = p->p_session;
984263bc 952 /* Do nothing if reassigning same control tty */
d9b2033e
MD
953 if (sess->s_ttyvp == vp) {
954 error = 0;
955 break;
956 }
984263bc
MD
957
958 /* Get rid of reference to old control tty */
1fbb5fc0 959 ovp = sess->s_ttyvp;
597aea93 960 vref(vp);
1fbb5fc0
MD
961 sess->s_ttyvp = vp;
962 if (ovp)
963 vrele(ovp);
984263bc 964 }
d9b2033e 965 break;
984263bc 966 }
d9b2033e
MD
967 rel_mplock();
968 return (error);
984263bc
MD
969}
970
971/*
d9b2033e 972 * MPALMOSTSAFE - acquires mplock
984263bc
MD
973 */
974static int
87de5057 975vn_poll(struct file *fp, int events, struct ucred *cred)
984263bc 976{
d9b2033e
MD
977 int error;
978
979 get_mplock();
980 error = VOP_POLL(((struct vnode *)fp->f_data), events, cred);
981 rel_mplock();
982 return (error);
984263bc
MD
983}
984
985/*
986 * Check that the vnode is still valid, and if so
987 * acquire requested lock.
988 */
989int
990#ifndef DEBUG_LOCKS
ca466bae 991vn_lock(struct vnode *vp, int flags)
984263bc 992#else
ca466bae 993debug_vn_lock(struct vnode *vp, int flags, const char *filename, int line)
984263bc
MD
994#endif
995{
996 int error;
997
998 do {
984263bc 999#ifdef DEBUG_LOCKS
5fd012e0
MD
1000 vp->filename = filename;
1001 vp->line = line;
a11aaa81
MD
1002 error = debuglockmgr(&vp->v_lock, flags,
1003 "vn_lock", filename, line);
1004#else
1005 error = lockmgr(&vp->v_lock, flags);
984263bc 1006#endif
5fd012e0
MD
1007 if (error == 0)
1008 break;
984263bc 1009 } while (flags & LK_RETRY);
5fd012e0
MD
1010
1011 /*
1012 * Because we (had better!) have a ref on the vnode, once it
1013 * goes to VRECLAIMED state it will not be recycled until all
1014 * refs go away. So we can just check the flag.
1015 */
1016 if (error == 0 && (vp->v_flag & VRECLAIMED)) {
a11aaa81 1017 lockmgr(&vp->v_lock, LK_RELEASE);
5fd012e0
MD
1018 error = ENOENT;
1019 }
984263bc
MD
1020 return (error);
1021}
1022
a11aaa81
MD
1023void
1024vn_unlock(struct vnode *vp)
1025{
1026 lockmgr(&vp->v_lock, LK_RELEASE);
1027}
1028
1029int
1030vn_islocked(struct vnode *vp)
1031{
1032 return (lockstatus(&vp->v_lock, curthread));
1033}
1034
984263bc 1035/*
d9b2033e 1036 * MPALMOSTSAFE - acquires mplock
984263bc
MD
1037 */
1038static int
87de5057 1039vn_closefile(struct file *fp)
984263bc 1040{
d9b2033e 1041 int error;
984263bc 1042
d9b2033e 1043 get_mplock();
984263bc 1044 fp->f_ops = &badfileops;
d9b2033e
MD
1045 error = vn_close(((struct vnode *)fp->f_data), fp->f_flag);
1046 rel_mplock();
1047 return(error);
984263bc
MD
1048}
1049
d9b2033e
MD
1050/*
1051 * MPALMOSTSAFE - acquires mplock
1052 */
984263bc
MD
1053static int
1054vn_kqfilter(struct file *fp, struct knote *kn)
1055{
d9b2033e 1056 int error;
984263bc 1057
d9b2033e
MD
1058 get_mplock();
1059 error = VOP_KQFILTER(((struct vnode *)fp->f_data), kn);
1060 rel_mplock();
1061 return (error);
984263bc 1062}