2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)vnode.h 8.7 (Berkeley) 2/4/94
34 * $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $
35 * $DragonFly: src/sys/sys/vnode.h,v 1.83 2008/09/17 21:44:19 dillon Exp $
41 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
44 #include <sys/queue.h>
50 #include <sys/event.h>
52 #ifndef _SYS_BIOTRACK_H_
53 #include <sys/biotrack.h>
61 #ifndef _SYS_NAMECACHE_H_
62 #include <sys/namecache.h>
64 #ifndef _SYS_THREAD_H_
65 #include <sys/thread.h>
67 #ifndef _SYS_VFSOPS_H_
68 #include <sys/vfsops.h>
70 #ifndef _SYS_VFSCACHE_H_
71 #include <sys/vfscache.h>
76 #ifndef _SYS_SYSLINK_RPC_H_
77 #include <sys/syslink_rpc.h>
79 #ifndef _SYS_SYSREF_H_
80 #include <sys/sysref.h>
85 #ifndef _MACHINE_LOCK_H_
86 #include <machine/lock.h>
90 * The vnode is the focus of all file activity in UNIX. There is a
91 * unique vnode allocated for each active file, each current directory,
92 * each mounted-on file, text file, and the root.
96 * Each underlying filesystem allocates its own private area and hangs
97 * it from v_data. If non-null, this area is freed in getnewvnode().
99 TAILQ_HEAD(buflists, buf);
102 * Struct for mount options to printable formats.
104 struct mountctl_opt {
110 * The vnode infrastructure is being reorgranized. Most reference-related
111 * fields are locked by the BGL, and most file I/O related operations and
112 * vnode teardown functions are locked by the vnode lock.
114 * File read operations require a shared lock, file write operations require
115 * an exclusive lock. Most directory operations (read or write) currently
116 * require an exclusive lock due to the side effects stored in the directory
117 * inode (which we intend to fix).
119 * File reads and writes are further protected by a range lock. The intention
120 * is to be able to break I/O operations down into more easily managed pieces
121 * so vm_page arrays can be passed through rather then UIOs. This work will
122 * occur in multiple stages. The range locks will also eventually be used to
123 * deal with clustered cache coherency issues and, more immediately, to
124 * protect operations associated with the kernel-managed journaling module.
126 * NOTE: Certain fields within the vnode structure requires v_token to be
127 * held. The vnode's normal lock need not be held when accessing
128 * these fields as long as the vnode is deterministically referenced
129 * (i.e. can't be ripped out from under the caller). This is typical
130 * for code paths based on descriptors or file pointers, but not for
131 * backdoor code paths that come in via the buffer cache.
138 * NOTE: The vnode operations vector, v_ops, is a double-indirect that
139 * typically points to &v_mount->mnt_vn_use_ops. We use a double
140 * pointer because mnt_vn_use_ops may change dynamically when e.g.
141 * journaling is turned on or off.
143 * NOTE: v_filesize is currently only applicable when a VM object is
144 * associated with the vnode. Otherwise it will be set to NOOFFSET.
146 * NOTE: The following fields require a spin or token lock. Note that
147 * additional subsystems may use v_token or v_spin for other
148 * purposes, e.g. vfs/fifofs/fifo_vnops.c
153 RB_HEAD(buf_rb_tree, buf);
154 RB_HEAD(buf_rb_hash, buf);
157 struct spinlock v_spin;
158 int v_flag; /* vnode flags (see below) */
160 int v_opencount; /* number of explicit opens */
161 int v_auxrefs; /* auxiliary references */
162 struct sysref v_sysref; /* normal references */
163 struct bio_track v_track_read; /* track I/O's in progress */
164 struct bio_track v_track_write; /* track I/O's in progress */
165 struct mount *v_mount; /* ptr to vfs we are in */
166 struct vop_ops **v_ops; /* vnode operations vector */
167 TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist/cachelist */
168 TAILQ_ENTRY(vnode) v_nmntvnodes; /* vnodes for mount point */
169 struct buf_rb_tree v_rbclean_tree; /* RB tree of clean bufs */
170 struct buf_rb_tree v_rbdirty_tree; /* RB tree of dirty bufs */
171 struct buf_rb_hash v_rbhash_tree; /* RB tree general lookup */
172 LIST_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */
173 enum vtype v_type; /* vnode type */
175 struct socket *vu_socket; /* unix ipc (VSOCK) */
177 int vu_umajor; /* device number for attach */
179 struct cdev *vu_cdevinfo; /* device (VCHR, VBLK) */
180 SLIST_ENTRY(vnode) vu_cdevnext;
182 struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
184 off_t v_filesize; /* file EOF or NOOFFSET */
185 off_t v_lazyw; /* lazy write iterator */
186 off_t v_lastw; /* last write (write cluster) */
187 off_t v_cstart; /* start block of cluster */
188 off_t v_lasta; /* last allocation */
189 int v_clen; /* length of current cluster */
190 struct vm_object *v_object; /* Place to store VM object */
191 struct lock v_lock; /* file/dir ops lock */
192 struct lwkt_token v_token; /* (see above) */
193 enum vtagtype v_tag; /* type of underlying data */
194 void *v_data; /* private data for fs */
195 struct namecache_list v_namecache; /* (S) associated nc entries */
197 struct kqinfo vpi_kqinfo; /* identity of poller(s) */
199 struct vmresident *v_resident; /* optional vmresident */
200 struct ccms_dataspace v_ccms; /* cache coherency */
201 struct mount *v_pfsmp; /* XXX: hack for PFS accounting */
203 const char *filename; /* Source file doing locking */
204 int line; /* Line number doing locking */
207 #define v_socket v_un.vu_socket
208 #define v_umajor v_un.vu_cdev.vu_umajor
209 #define v_uminor v_un.vu_cdev.vu_uminor
210 #define v_rdev v_un.vu_cdev.vu_cdevinfo
211 #define v_cdevnext v_un.vu_cdev.vu_cdevnext
212 #define v_fifoinfo v_un.vu_fifoinfo
217 #define VROOT 0x00000001 /* root of its file system */
218 #define VTEXT 0x00000002 /* vnode is a pure text prototype */
219 #define VSYSTEM 0x00000004 /* vnode being used by kernel */
220 #define VISTTY 0x00000008 /* vnode represents a tty */
221 #define VCTTYISOPEN 0x00000010 /* controlling terminal tty is open */
222 #define VCKPT 0x00000020 /* checkpoint-restored vnode */
223 /* open for business 0x00000040 */
224 #define VMAYHAVELOCKS 0x00000080 /* maybe posix or flock locks on vp */
225 #define VPFSROOT 0x00000100 /* may be a pseudo filesystem root */
226 /* open for business 0x00000200 */
227 #define VAGE0 0x00000400 /* Age count for recycling - 2 bits */
228 #define VAGE1 0x00000800 /* Age count for recycling - 2 bits */
229 #define VCACHED 0x00001000 /* No active references but has cache value */
230 #define VOBJBUF 0x00002000 /* Allocate buffers in VM object */
231 #define VINACTIVE 0x00004000 /* The vnode is inactive (did VOP_INACTIVE) */
232 /* open for business 0x00008000 */
233 #define VOLOCK 0x00010000 /* vnode is locked waiting for an object */
234 #define VOWANT 0x00020000 /* a process is waiting for VOLOCK */
235 #define VRECLAIMED 0x00040000 /* This vnode has been destroyed */
236 #define VFREE 0x00080000 /* This vnode is on the freelist */
237 #define VNOTSEEKABLE 0x00100000 /* rd/wr ignores file offset */
238 #define VONWORKLST 0x00200000 /* On syncer work-list */
239 #define VMOUNT 0x00400000 /* Mount in progress */
240 #define VOBJDIRTY 0x00800000 /* object might be dirty */
241 #define VSWAPCACHE 0x01000000 /* enable swapcache */
242 /* open for business 0x02000000 */
243 /* open for business 0x04000000 */
246 * vmntvnodescan() flags
248 #define VMSC_GETVP 0x01
249 #define VMSC_GETVX 0x02
250 #define VMSC_NOWAIT 0x10
251 #define VMSC_ONEPASS 0x20
254 * Flags for ioflag. (high 16 bits used to ask for read-ahead and
255 * help with write clustering)
257 #define IO_UNIT 0x0001 /* do I/O as atomic unit */
258 #define IO_APPEND 0x0002 /* append write to end */
259 #define IO_SYNC 0x0004 /* do I/O synchronously */
260 #define IO_NODELOCKED 0x0008 /* underlying node already locked */
261 #define IO_NDELAY 0x0010 /* FNDELAY flag set in file table */
262 #define IO_VMIO 0x0020 /* data already in VMIO space */
263 #define IO_INVAL 0x0040 /* invalidate after I/O */
264 #define IO_ASYNC 0x0080 /* bawrite rather then bdwrite */
265 #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */
266 #define IO_RECURSE 0x0200 /* possibly device-recursive (vn) */
267 #define IO_CORE 0x0400 /* I/O is part of core dump */
268 #define IO_NRDELAY 0x0800 /* do not block on disk reads */
270 #define IO_SEQMAX 0x7F /* seq heuristic max value */
271 #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */
274 * Modes. Note that these V-modes must match file S_I*USR, SUID, SGID,
275 * and SVTX flag bits.
277 #define VSUID 04000 /* set user id on execution */
278 #define VSGID 02000 /* set group id on execution */
279 #define VSVTX 01000 /* save swapped text even after use */
280 #define VREAD 00400 /* read, write, execute permissions */
285 * Token indicating no attribute value yet assigned.
290 * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon)
292 #define VLKTIMEOUT (hz / 20 + 1)
297 * Convert between vnode types and inode formats (since POSIX.1
298 * defines mode word of stat structure in terms of inode formats).
300 extern enum vtype iftovt_tab[];
301 extern int vttoif_tab[];
302 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
303 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
304 #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
307 * Flags to various vnode functions.
309 #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
310 #define FORCECLOSE 0x0002 /* vflush: force file closure */
311 #define WRITECLOSE 0x0004 /* vflush: only close writable files */
312 #define DOCLOSE 0x0008 /* vclean: close active files */
313 #define V_SAVE 0x0001 /* vinvalbuf: sync file first */
316 #define VATTR_NULL(vap) vattr_null(vap)
318 #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
319 #endif /* DIAGNOSTIC */
321 #define NULLVP ((struct vnode *)NULL)
323 #define VNODEOP_SET(f) \
324 SYSINIT(f##init, SI_SUB_VFS, SI_ORDER_SECOND, vfs_nadd_vnodeops_sysinit, &f); \
325 SYSUNINIT(f##uninit, SI_SUB_VFS, SI_ORDER_SECOND,vfs_nrm_vnodeops_sysinit, &f);
332 extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
333 extern struct nchandle rootnch; /* root (i.e. "/") namecache */
334 extern int desiredvnodes; /* number of vnodes desired */
335 extern time_t syncdelay; /* max time to delay syncing data */
336 extern time_t filedelay; /* time to delay syncing files */
337 extern time_t dirdelay; /* time to delay syncing directories */
338 extern time_t metadelay; /* time to delay syncing metadata */
339 extern struct objcache *namei_oc;
340 extern int prtactive; /* nonzero to call vprint() */
341 extern struct vattr va_null; /* predefined null vattr structure */
342 extern int vfs_ioopt;
343 extern int numvnodes;
344 extern int freevnodes;
347 * Interlock for scanning list of vnodes attached to a mountpoint
349 extern struct lwkt_token mntvnode_token;
352 * This macro is very helpful in defining those offsets in the vdesc struct.
354 * This is stolen from X11R4. I ignored all the fancy stuff for
355 * Crays, so if you decide to port this to such a serious machine,
356 * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
358 #define VOPARG_OFFSET(p_type,field) \
359 ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
360 #define VOPARG_OFFSETOF(s_type,field) \
361 VOPARG_OFFSET(s_type*,field)
362 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
363 ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
365 typedef int (*vnodeopv_entry_t)(struct vop_generic_args *);
368 * VOCALL calls an op given an ops vector. We break it out because BSD's
369 * vclean changes the ops vector and then wants to call ops with the old
373 typedef int (*vocall_func_t)(struct vop_generic_args *);
376 * This call executes the vops vector for the offset stored in the ap's
377 * descriptor of the passed vops rather then the one related to the
378 * ap's vop_ops structure. It is used to chain VOPS calls on behalf of
379 * filesystems from a VFS's context ONLY (that is, from a VFS's own vops
382 #define VOCALL(vops, ap) \
383 (*(vocall_func_t *)((char *)(vops)+((ap)->a_desc->sd_offset)))(ap)
385 #define VDESC(OP) (& __CONCAT(OP,_desc))
388 * Public vnode manipulation functions.
401 struct vnode *getsynthvnode(const char *devname);
402 void addaliasu (struct vnode *vp, int x, int y);
403 int v_associate_rdev(struct vnode *vp, cdev_t dev);
404 void v_release_rdev(struct vnode *vp);
405 int bdevvp (cdev_t dev, struct vnode **vpp);
406 struct vnode *allocvnode(int lktimeout, int lkflags);
407 int freesomevnodes(int count);
408 int getnewvnode (enum vtagtype tag, struct mount *mp,
409 struct vnode **vpp, int timo, int lkflags);
410 int getspecialvnode (enum vtagtype tag, struct mount *mp,
411 struct vop_ops **ops, struct vnode **vpp, int timo,
413 int speedup_syncer (void);
414 int vaccess(enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *);
415 void vattr_null (struct vattr *vap);
416 int vcount (struct vnode *vp);
417 int vfinddev (cdev_t dev, enum vtype type, struct vnode **vpp);
418 void vfs_nadd_vnodeops_sysinit (void *);
419 void vfs_nrm_vnodeops_sysinit (void *);
420 void vfs_add_vnodeops(struct mount *, struct vop_ops *, struct vop_ops **);
421 void vfs_rm_vnodeops(struct mount *, struct vop_ops *, struct vop_ops **);
422 int vflush (struct mount *mp, int rootrefs, int flags);
423 int vmntvnodescan(struct mount *mp, int flags,
424 int (*fastfunc)(struct mount *mp, struct vnode *vp, void *data),
425 int (*slowfunc)(struct mount *mp, struct vnode *vp, void *data),
427 void insmntque(struct vnode *vp, struct mount *mp);
429 void vclean_vxlocked (struct vnode *vp, int flags);
430 void vclean_unlocked (struct vnode *vp);
431 void vgone_vxlocked (struct vnode *vp);
432 int vrevoke (struct vnode *vp, struct ucred *cred);
433 int vinvalbuf (struct vnode *vp, int save, int slpflag, int slptimeo);
434 int vtruncbuf (struct vnode *vp, off_t length, int blksize);
435 void vnode_pager_setsize (struct vnode *, vm_ooffset_t);
436 int nvtruncbuf (struct vnode *vp, off_t length, int blksize, int boff);
437 int nvextendbuf(struct vnode *vp, off_t olength, off_t nlength,
438 int oblksize, int nblksize,
439 int oboff, int nboff, int trivial);
440 void nvnode_pager_setsize (struct vnode *vp, off_t length,
441 int blksize, int boff);
442 int vfsync(struct vnode *vp, int waitfor, int passes,
443 int (*checkdef)(struct buf *),
444 int (*waitoutput)(struct vnode *, struct thread *));
445 int vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff);
446 void vprint (char *label, struct vnode *vp);
447 int vrecycle (struct vnode *vp);
448 int vmaxiosize (struct vnode *vp);
449 void vn_strategy(struct vnode *vp, struct bio *bio);
450 int vn_cache_strategy(struct vnode *vp, struct bio *bio);
451 int vn_close (struct vnode *vp, int flags);
452 void vn_gone (struct vnode *vp);
453 int vn_isdisk (struct vnode *vp, int *errp);
454 int vn_islocked (struct vnode *vp);
455 int vn_islocked_unlock (struct vnode *vp);
456 void vn_islocked_relock (struct vnode *vp, int vpls);
457 int vn_lock (struct vnode *vp, int flags);
458 void vn_unlock (struct vnode *vp);
461 int debug_vn_lock (struct vnode *vp, int flags,
462 const char *filename, int line);
463 #define vn_lock(vp,flags) debug_vn_lock(vp, flags, __FILE__, __LINE__)
466 /*#define DEBUG_VN_UNLOCK*/
467 #ifdef DEBUG_VN_UNLOCK
468 void debug_vn_unlock (struct vnode *vp,
469 const char *filename, int line);
470 #define vn_unlock(vp) debug_vn_unlock(vp, __FILE__, __LINE__)
473 int vn_get_namelen(struct vnode *, int *);
474 void vn_setspecops (struct file *fp);
475 int vn_fullpath (struct proc *p, struct vnode *vn, char **retbuf, char **freebuf, int guess);
476 int vn_open (struct nlookupdata *ndp, struct file *fp, int fmode, int cmode);
477 int vn_opendisk (const char *devname, int fmode, struct vnode **vpp);
478 int vn_rdwr (enum uio_rw rw, struct vnode *vp, caddr_t base,
479 int len, off_t offset, enum uio_seg segflg, int ioflg,
480 struct ucred *cred, int *aresid);
481 int vn_rdwr_inchunks (enum uio_rw rw, struct vnode *vp, caddr_t base,
482 int len, off_t offset, enum uio_seg segflg, int ioflg,
483 struct ucred *cred, int *aresid);
484 int vn_stat (struct vnode *vp, struct stat *sb, struct ucred *cred);
485 cdev_t vn_todev (struct vnode *vp);
486 void vfs_timestamp (struct timespec *);
487 size_t vfs_flagstostr(int flags, const struct mountctl_opt *optp, char *buf, size_t len, int *errorp);
488 void vn_mark_atime(struct vnode *vp, struct thread *td);
489 int vn_writechk (struct vnode *vp, struct nchandle *nch);
490 int ncp_writechk(struct nchandle *nch);
491 int vop_stdopen (struct vop_open_args *ap);
492 int vop_stdclose (struct vop_close_args *ap);
493 int vop_stdmountctl(struct vop_mountctl_args *ap);
494 int vop_stdgetpages(struct vop_getpages_args *ap);
495 int vop_stdputpages(struct vop_putpages_args *ap);
496 int vop_stdmarkatime(struct vop_markatime_args *ap);
497 int vop_stdnoread(struct vop_read_args *ap);
498 int vop_stdnowrite(struct vop_write_args *ap);
499 int vop_stdpathconf (struct vop_pathconf_args *ap);
500 int vop_eopnotsupp (struct vop_generic_args *ap);
501 int vop_ebadf (struct vop_generic_args *ap);
502 int vop_einval (struct vop_generic_args *ap);
503 int vop_enotty (struct vop_generic_args *ap);
504 int vop_defaultop (struct vop_generic_args *ap);
505 int vop_null (struct vop_generic_args *ap);
506 int vop_panic (struct vop_generic_args *ap);
507 int vop_write_dirent(int *, struct uio *, ino_t, uint8_t, uint16_t,
510 int vop_compat_nresolve(struct vop_nresolve_args *ap);
511 int vop_compat_nlookupdotdot(struct vop_nlookupdotdot_args *ap);
512 int vop_compat_ncreate(struct vop_ncreate_args *ap);
513 int vop_compat_nmkdir(struct vop_nmkdir_args *ap);
514 int vop_compat_nmknod(struct vop_nmknod_args *ap);
515 int vop_compat_nlink(struct vop_nlink_args *ap);
516 int vop_compat_nsymlink(struct vop_nsymlink_args *ap);
517 int vop_compat_nwhiteout(struct vop_nwhiteout_args *ap);
518 int vop_compat_nremove(struct vop_nremove_args *ap);
519 int vop_compat_nrmdir(struct vop_nrmdir_args *ap);
520 int vop_compat_nrename(struct vop_nrename_args *ap);
522 void vx_lock (struct vnode *vp);
523 void vx_unlock (struct vnode *vp);
524 void vx_get (struct vnode *vp);
525 int vx_get_nonblock (struct vnode *vp);
526 void vx_put (struct vnode *vp);
527 int vget (struct vnode *vp, int lockflag);
528 void vput (struct vnode *vp);
529 void vhold (struct vnode *);
530 void vhold_interlocked (struct vnode *);
531 void vdrop (struct vnode *);
532 void vref (struct vnode *vp);
533 void vrele (struct vnode *vp);
534 void vsetflags (struct vnode *vp, int flags);
535 void vclrflags (struct vnode *vp, int flags);
537 /*#define DEBUG_VPUT*/
539 void debug_vput (struct vnode *vp, const char *filename, int line);
540 #define vput(vp) debug_vput(vp, __FILE__, __LINE__)
543 void vfs_subr_init(void);
544 void vfs_mount_init(void);
545 void vfs_lock_init(void);
546 void vfs_sync_init(void);
547 void mount_init(struct mount *mp);
549 void vn_syncer_add(struct vnode *, int);
550 void vn_syncer_remove(struct vnode *);
551 void vnlru_proc_wait(void);
553 extern struct vop_ops default_vnode_vops;
554 extern struct vop_ops dead_vnode_vops;
556 extern struct vop_ops *default_vnode_vops_p;
557 extern struct vop_ops *dead_vnode_vops_p;
561 #endif /* _KERNEL || _KERNEL_STRUCTURES */
562 #endif /* !_SYS_VNODE_H_ */