2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)vnode.h 8.7 (Berkeley) 2/4/94
34 * $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $
35 * $DragonFly: src/sys/sys/vnode.h,v 1.62 2006/07/19 06:08:07 dillon Exp $
41 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
44 #include <sys/queue.h>
49 #ifndef _SYS_SELINFO_H_
50 #include <sys/selinfo.h>
52 #ifndef _SYS_BIOTRACK_H_
53 #include <sys/biotrack.h>
61 #ifndef _SYS_NAMECACHE_H_
62 #include <sys/namecache.h>
64 #ifndef _SYS_THREAD_H_
65 #include <sys/thread.h>
67 #ifndef _SYS_VFSOPS_H_
68 #include <sys/vfsops.h>
70 #ifndef _SYS_VFSCACHE_H_
71 #include <sys/vfscache.h>
76 #ifndef _SYS_SYSLINK_H_
77 #include <sys/syslink.h>
79 #ifndef _MACHINE_LOCK_H_
80 #include <machine/lock.h>
84 * The vnode is the focus of all file activity in UNIX. There is a
85 * unique vnode allocated for each active file, each current directory,
86 * each mounted-on file, text file, and the root.
90 * Each underlying filesystem allocates its own private area and hangs
91 * it from v_data. If non-null, this area is freed in getnewvnode().
93 TAILQ_HEAD(buflists, buf);
96 * Range locks protect offset ranges in files and directories at a high
97 * level, allowing the actual I/O to be broken down into smaller pieces.
98 * Range locks will eventually be integrated into the clustered cache
99 * coherency infrastructure.
101 * We use a simple data structure for now, but eventually this should
102 * probably be a btree or red-black tree.
106 TAILQ_HEAD(vrangelock_list, vrangelock);
109 struct vrangelock_list vh_list;
113 TAILQ_ENTRY(vrangelock) vr_node;
119 #define RNGL_WAITING 0x0001 /* waiting for lock, else has lock */
120 #define RNGL_CHECK 0x0002 /* check for work on unlock */
121 #define RNGL_SHARED 0x0004 /* shared lock, else exclusive */
122 #define RNGL_ONLIST 0x0008 /* sanity check */
126 vrange_init(struct vrangelock *vr, int flags, off_t offset, off_t length)
128 vr->vr_flags = flags;
129 vr->vr_offset = offset;
130 vr->vr_length = length;
135 void vrange_lock(struct vnode *vp, struct vrangelock *vr);
136 void vrange_unlock(struct vnode *vp, struct vrangelock *vr);
140 vrange_lock_shared(struct vnode *vp, struct vrangelock *vr,
141 off_t offset, off_t length)
143 vrange_init(vr, RNGL_SHARED, offset, length);
149 vrange_lock_excl(struct vnode *vp, struct vrangelock *vr,
150 off_t offset, off_t length)
152 vrange_init(vr, 0, offset, length);
159 * The vnode infrastructure is being reorgranized. Most reference-related
160 * fields are locked by the BGL, and most file I/O related operations and
161 * vnode teardown functions are locked by the vnode lock.
163 * File read operations require a shared lock, file write operations require
164 * an exclusive lock. Most directory operations (read or write) currently
165 * require an exclusive lock due to the side effects stored in the directory
166 * inode (which we intend to fix).
168 * File reads and writes are further protected by a range lock. The intention
169 * is to be able to break I/O operations down into more easily managed pieces
170 * so vm_page arrays can be passed through rather then UIOs. This work will
171 * occur in multiple stages. The range locks will also eventually be used to
172 * deal with clustered cache coherency issues and, more immediately, to
173 * protect operations associated with the kernel-managed journaling module.
175 * NOTE: The vnode operations vector, v_ops, is a double-indirect that
176 * typically points to &v_mount->mnt_vn_use_ops. We use a double
177 * pointer because mnt_vn_use_ops may change dynamically when e.g.
178 * journaling is turned on or off.
180 * NOTE: v_filesize is currently only applicable when a VM object is
181 * associated with the vnode. Otherwise it will be set to NOOFFSET.
183 RB_HEAD(buf_rb_tree, buf);
184 RB_HEAD(buf_rb_hash, buf);
187 int v_flag; /* vnode flags (see below) */
188 int v_usecount; /* reference count of users */
190 int v_holdcnt; /* page & buffer references */
191 int v_opencount; /* number of explicit opens */
192 struct bio_track v_track_read; /* track I/O's in progress */
193 struct bio_track v_track_write; /* track I/O's in progress */
194 struct mount *v_mount; /* ptr to vfs we are in */
195 struct vop_ops **v_ops; /* vnode operations vector */
196 TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */
197 TAILQ_ENTRY(vnode) v_nmntvnodes; /* vnodes for mount point */
198 struct buf_rb_tree v_rbclean_tree; /* RB tree of clean bufs */
199 struct buf_rb_tree v_rbdirty_tree; /* RB tree of dirty bufs */
200 struct buf_rb_hash v_rbhash_tree; /* RB tree general lookup */
201 LIST_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */
202 enum vtype v_type; /* vnode type */
204 struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
205 struct socket *vu_socket; /* unix ipc (VSOCK) */
207 udev_t vu_udev; /* device number for attach */
208 struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */
209 SLIST_ENTRY(vnode) vu_specnext;
211 struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
213 off_t v_filesize; /* file EOF or NOOFFSET */
214 off_t v_lazyw; /* lazy write iterator */
215 off_t v_lastw; /* last write (write cluster) */
216 off_t v_cstart; /* start block of cluster */
217 off_t v_lasta; /* last allocation */
218 int v_clen; /* length of current cluster */
219 struct vm_object *v_object; /* Place to store VM object */
220 struct lock v_lock; /* file/dir ops lock */
221 enum vtagtype v_tag; /* type of underlying data */
222 void *v_data; /* private data for fs */
223 struct namecache_list v_namecache; /* associated nc entries */
225 struct lwkt_token vpi_token; /* lock to protect below */
226 struct selinfo vpi_selinfo; /* identity of poller(s) */
227 short vpi_events; /* what they are looking for */
228 short vpi_revents; /* what has happened */
230 struct vmresident *v_resident; /* optional vmresident */
231 struct vrangehead v_range; /* range lock */
233 const char *filename; /* Source file doing locking */
234 int line; /* Line number doing locking */
238 #define v_mountedhere v_un.vu_mountedhere
239 #define v_socket v_un.vu_socket
240 #define v_udev v_un.vu_spec.vu_udev
241 #define v_rdev v_un.vu_spec.vu_specinfo
242 #define v_specnext v_un.vu_spec.vu_specnext
243 #define v_fifoinfo v_un.vu_fifoinfo
244 #define v_spinlock v_lock.lk_spinlock
246 #define VN_POLLEVENT(vp, events) \
248 if ((vp)->v_pollinfo.vpi_events & (events)) \
249 vn_pollevent((vp), (events)); \
255 #define VROOT 0x00001 /* root of its file system */
256 #define VTEXT 0x00002 /* vnode is a pure text prototype */
257 #define VSYSTEM 0x00004 /* vnode being used by kernel */
258 #define VISTTY 0x00008 /* vnode represents a tty */
259 #define VCTTYISOPEN 0x00010 /* controlling terminal tty is open */
260 #define VCKPT 0x00020 /* checkpoint-restored vnode */
261 #define VFSMID 0x00040 /* request FSMID update */
262 #define VMAYHAVELOCKS 0x00080 /* there may be posix or flock locks on vp */
263 /* open for business 0x00100 */
264 /* open for business 0x00200 */
265 /* open for business 0x00400 */
266 /* open for business 0x00800 */
267 /* open for business 0x01000 */
268 #define VOBJBUF 0x02000 /* Allocate buffers in VM object */
269 #define VINACTIVE 0x04000 /* The vnode is inactive */
270 #define VAGE 0x08000 /* Insert vnode at head of free list */
271 #define VOLOCK 0x10000 /* vnode is locked waiting for an object */
272 #define VOWANT 0x20000 /* a process is waiting for VOLOCK */
273 #define VRECLAIMED 0x40000 /* This vnode has been destroyed */
274 #define VFREE 0x80000 /* This vnode is on the freelist */
275 /* open for business 0x100000 */
276 #define VONWORKLST 0x200000 /* On syncer work-list */
277 #define VMOUNT 0x400000 /* Mount in progress */
278 #define VOBJDIRTY 0x800000 /* object might be dirty */
281 * vmntvnodescan() flags
286 #define VMSC_NOWAIT 0x10
289 * Flags for ioflag. (high 16 bits used to ask for read-ahead and
290 * help with write clustering)
292 #define IO_UNIT 0x0001 /* do I/O as atomic unit */
293 #define IO_APPEND 0x0002 /* append write to end */
294 #define IO_SYNC 0x0004 /* do I/O synchronously */
295 #define IO_NODELOCKED 0x0008 /* underlying node already locked */
296 #define IO_NDELAY 0x0010 /* FNDELAY flag set in file table */
297 #define IO_VMIO 0x0020 /* data already in VMIO space */
298 #define IO_INVAL 0x0040 /* invalidate after I/O */
299 #define IO_ASYNC 0x0080 /* bawrite rather then bdwrite */
300 #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */
301 #define IO_NOWDRAIN 0x0200 /* do not block on wdrain */
302 #define IO_CORE 0x0400 /* I/O is part of core dump */
304 #define IO_SEQMAX 0x7F /* seq heuristic max value */
305 #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */
308 * Modes. Note that these V-modes must match file S_I*USR, SUID, SGID,
309 * and SVTX flag bits.
311 * VCREATE, VDELETE, and VEXCL may only be used in naccess() calls.
313 #define VDELETE 040000 /* delete if the file/dir exists */
314 #define VCREATE 020000 /* create if the file/dir does not exist */
315 #define VEXCL 010000 /* error if the file/dir already exists */
317 #define VSUID 04000 /* set user id on execution */
318 #define VSGID 02000 /* set group id on execution */
319 #define VSVTX 01000 /* save swapped text even after use */
320 #define VREAD 00400 /* read, write, execute permissions */
325 * Token indicating no attribute value yet assigned.
330 * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon)
332 #define VLKTIMEOUT (hz / 20 + 1)
337 * Convert between vnode types and inode formats (since POSIX.1
338 * defines mode word of stat structure in terms of inode formats).
340 extern enum vtype iftovt_tab[];
341 extern int vttoif_tab[];
342 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
343 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
344 #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
347 * Flags to various vnode functions.
349 #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
350 #define FORCECLOSE 0x0002 /* vflush: force file closure */
351 #define WRITECLOSE 0x0004 /* vflush: only close writable files */
352 #define DOCLOSE 0x0008 /* vclean: close active files */
353 #define V_SAVE 0x0001 /* vinvalbuf: sync file first */
354 #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
357 #define VATTR_NULL(vap) vattr_null(vap)
359 #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
360 #endif /* DIAGNOSTIC */
362 #define NULLVP ((struct vnode *)NULL)
364 #define VNODEOP_SET(f) \
365 SYSINIT(f##init, SI_SUB_VFS, SI_ORDER_SECOND, vfs_nadd_vnodeops_sysinit, &f); \
366 SYSUNINIT(f##uninit, SI_SUB_VFS, SI_ORDER_SECOND,vfs_nrm_vnodeops_sysinit, &f);
373 extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
374 extern struct namecache *rootncp; /* root (i.e. "/") namecache */
375 extern int desiredvnodes; /* number of vnodes desired */
376 extern time_t syncdelay; /* max time to delay syncing data */
377 extern time_t filedelay; /* time to delay syncing files */
378 extern time_t dirdelay; /* time to delay syncing directories */
379 extern time_t metadelay; /* time to delay syncing metadata */
380 extern struct objcache *namei_oc;
381 extern int prtactive; /* nonzero to call vprint() */
382 extern struct vattr va_null; /* predefined null vattr structure */
383 extern int vfs_ioopt;
384 extern int numvnodes;
385 extern int freevnodes;
386 extern int vfs_fastdev; /* fast specfs device access */
389 * Interlock for scanning list of vnodes attached to a mountpoint
391 extern struct lwkt_token mntvnode_token;
394 * This macro is very helpful in defining those offsets in the vdesc struct.
396 * This is stolen from X11R4. I ignored all the fancy stuff for
397 * Crays, so if you decide to port this to such a serious machine,
398 * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
400 #define VOPARG_OFFSET(p_type,field) \
401 ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
402 #define VOPARG_OFFSETOF(s_type,field) \
403 VOPARG_OFFSET(s_type*,field)
404 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
405 ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
407 typedef int (*vnodeopv_entry_t)(struct vop_generic_args *);
409 #ifdef DEBUG_VFS_LOCKS
411 * Macros to aid in tracing VFS locking problems. Not totally
412 * reliable since if the process sleeps between changing the lock
413 * state and checking it with the assert, some other process could
414 * change the state. They are good enough for debugging a single
415 * filesystem using a single-threaded test. I find that 'cvs co src'
416 * is a pretty good test.
419 #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked(vp, str)
420 #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked(vp, str);
422 void assert_vop_locked(struct vnode *vp, const char *str);
423 void assert_vop_unlocked(struct vnode *vp, const char *str);
427 #define ASSERT_VOP_LOCKED(vp, str)
428 #define ASSERT_VOP_UNLOCKED(vp, str)
430 #endif /* DEBUG_VFS_LOCKS */
433 * VOCALL calls an op given an ops vector. We break it out because BSD's
434 * vclean changes the ops vector and then wants to call ops with the old
438 typedef int (*vocall_func_t)(struct vop_generic_args *);
441 * This call executes the vops vector for the offset stored in the ap's
442 * descriptor of the passed vops rather then the one related to the
443 * ap's vop_ops structure. It is used to chain VOPS calls on behalf of
444 * filesystems from a VFS's context ONLY (that is, from a VFS's own vops
447 #define VOCALL(vops, ap) \
448 (*(vocall_func_t *)((char *)(vops)+((ap)->a_desc->sd_offset)))(ap)
450 #define VDESC(OP) (& __CONCAT(OP,_desc))
453 * Public vnode manipulation functions.
466 void addaliasu (struct vnode *vp, udev_t nvp_udev);
467 int v_associate_rdev(struct vnode *vp, dev_t dev);
468 void v_release_rdev(struct vnode *vp);
469 int bdevvp (dev_t dev, struct vnode **vpp);
470 struct vnode *allocvnode(int lktimeout, int lkflags);
471 int getnewvnode (enum vtagtype tag, struct mount *mp,
472 struct vnode **vpp, int timo, int lkflags);
473 int getspecialvnode (enum vtagtype tag, struct mount *mp,
474 struct vop_ops **ops, struct vnode **vpp, int timo,
476 int spec_vnoperate (struct vop_generic_args *);
477 int speedup_syncer (void);
478 void vattr_null (struct vattr *vap);
479 int vcount (struct vnode *vp);
480 int vfinddev (dev_t dev, enum vtype type, struct vnode **vpp);
481 void vfs_nadd_vnodeops_sysinit (void *);
482 void vfs_nrm_vnodeops_sysinit (void *);
483 void vfs_add_vnodeops(struct mount *, struct vop_ops *, struct vop_ops **);
484 void vfs_rm_vnodeops(struct mount *, struct vop_ops *, struct vop_ops **);
485 int vflush (struct mount *mp, int rootrefs, int flags);
486 int vmntvnodescan(struct mount *mp, int flags,
487 int (*fastfunc)(struct mount *mp, struct vnode *vp, void *data),
488 int (*slowfunc)(struct mount *mp, struct vnode *vp, void *data),
490 void insmntque(struct vnode *vp, struct mount *mp);
492 void vclean (struct vnode *vp, int flags);
493 void vgone (struct vnode *vp);
494 void vupdatefsmid (struct vnode *vp);
495 int vinvalbuf (struct vnode *vp, int save, int slpflag, int slptimeo);
496 int vtruncbuf (struct vnode *vp, off_t length, int blksize);
497 int vfsync(struct vnode *vp, int waitfor, int passes,
498 int (*checkdef)(struct buf *),
499 int (*waitoutput)(struct vnode *, struct thread *));
500 int vinitvmio(struct vnode *vp, off_t filesize);
501 void vprint (char *label, struct vnode *vp);
502 int vrecycle (struct vnode *vp);
503 void vn_strategy(struct vnode *vp, struct bio *bio);
504 int vn_close (struct vnode *vp, int flags);
505 int vn_isdisk (struct vnode *vp, int *errp);
506 int vn_lock (struct vnode *vp, int flags);
508 int debug_vn_lock (struct vnode *vp, int flags,
509 const char *filename, int line);
510 #define vn_lock(vp,flags) debug_vn_lock(vp, flags, __FILE__, __LINE__)
513 int vn_get_namelen(struct vnode *, int *);
514 void vn_setspecops (struct file *fp);
515 int vn_fullpath (struct proc *p, struct vnode *vn, char **retbuf, char **freebuf);
516 int vn_open (struct nlookupdata *ndp, struct file *fp, int fmode, int cmode);
517 void vn_pollevent (struct vnode *vp, int events);
518 void vn_pollgone (struct vnode *vp);
519 int vn_pollrecord (struct vnode *vp, int events);
520 int vn_rdwr (enum uio_rw rw, struct vnode *vp, caddr_t base,
521 int len, off_t offset, enum uio_seg segflg, int ioflg,
522 struct ucred *cred, int *aresid);
523 int vn_rdwr_inchunks (enum uio_rw rw, struct vnode *vp, caddr_t base,
524 int len, off_t offset, enum uio_seg segflg, int ioflg,
525 struct ucred *cred, int *aresid);
526 int vn_stat (struct vnode *vp, struct stat *sb, struct ucred *cred);
527 dev_t vn_todev (struct vnode *vp);
528 void vfs_timestamp (struct timespec *);
529 int vn_writechk (struct vnode *vp);
530 int vop_stdopen (struct vop_open_args *ap);
531 int vop_stdclose (struct vop_close_args *ap);
532 int vop_stdislocked (struct vop_islocked_args *ap);
533 int vop_stdlock (struct vop_lock_args *ap);
534 int vop_stdrlock (struct vop_lock_args *ap);
535 int vop_stdunlock (struct vop_unlock_args *ap);
536 int vop_nopoll (struct vop_poll_args *ap);
537 int vop_stdpathconf (struct vop_pathconf_args *ap);
538 int vop_stdpoll (struct vop_poll_args *ap);
539 int vop_stdrevoke (struct vop_revoke_args *ap);
540 int vop_eopnotsupp (struct vop_generic_args *ap);
541 int vop_ebadf (struct vop_generic_args *ap);
542 int vop_einval (struct vop_generic_args *ap);
543 int vop_enotty (struct vop_generic_args *ap);
544 int vop_defaultop (struct vop_generic_args *ap);
545 int vop_null (struct vop_generic_args *ap);
546 int vop_panic (struct vop_generic_args *ap);
547 int vop_write_dirent(int *, struct uio *, ino_t, uint8_t, uint16_t,
550 int vop_compat_nresolve(struct vop_nresolve_args *ap);
551 int vop_compat_nlookupdotdot(struct vop_nlookupdotdot_args *ap);
552 int vop_compat_ncreate(struct vop_ncreate_args *ap);
553 int vop_compat_nmkdir(struct vop_nmkdir_args *ap);
554 int vop_compat_nmknod(struct vop_nmknod_args *ap);
555 int vop_compat_nlink(struct vop_nlink_args *ap);
556 int vop_compat_nsymlink(struct vop_nsymlink_args *ap);
557 int vop_compat_nwhiteout(struct vop_nwhiteout_args *ap);
558 int vop_compat_nremove(struct vop_nremove_args *ap);
559 int vop_compat_nrmdir(struct vop_nrmdir_args *ap);
560 int vop_compat_nrename(struct vop_nrename_args *ap);
562 int vx_lock (struct vnode *vp);
563 void vx_unlock (struct vnode *vp);
564 int vx_get (struct vnode *vp);
565 int vx_get_nonblock (struct vnode *vp);
566 void vx_put (struct vnode *vp);
567 int vget (struct vnode *vp, int lockflag);
568 void vput (struct vnode *vp);
569 void vhold (struct vnode *);
570 void vdrop (struct vnode *);
571 void vref (struct vnode *vp);
572 void vrele (struct vnode *vp);
573 void vsetflags (struct vnode *vp, int flags);
574 void vclrflags (struct vnode *vp, int flags);
576 void vfs_subr_init(void);
577 void vfs_mount_init(void);
578 void vfs_lock_init(void);
579 void vfs_sync_init(void);
581 void vn_syncer_add_to_worklist(struct vnode *, int);
582 void vnlru_proc_wait(void);
584 extern struct vop_ops default_vnode_vops;
585 extern struct vop_ops spec_vnode_vops;
586 extern struct vop_ops dead_vnode_vops;
588 extern struct vop_ops *default_vnode_vops_p;
589 extern struct vop_ops *spec_vnode_vops_p;
590 extern struct vop_ops *dead_vnode_vops_p;
594 #endif /* _KERNEL || _KERNEL_STRUCTURES */
595 #endif /* !_SYS_VNODE_H_ */