2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)vnode.h 8.7 (Berkeley) 2/4/94
34 * $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $
35 * $DragonFly: src/sys/sys/vnode.h,v 1.71 2006/09/19 16:06:12 dillon Exp $
41 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
44 #include <sys/queue.h>
49 #ifndef _SYS_SELINFO_H_
50 #include <sys/selinfo.h>
52 #ifndef _SYS_BIOTRACK_H_
53 #include <sys/biotrack.h>
61 #ifndef _SYS_NAMECACHE_H_
62 #include <sys/namecache.h>
64 #ifndef _SYS_THREAD_H_
65 #include <sys/thread.h>
67 #ifndef _SYS_VFSOPS_H_
68 #include <sys/vfsops.h>
70 #ifndef _SYS_VFSCACHE_H_
71 #include <sys/vfscache.h>
76 #ifndef _SYS_SYSLINK_H_
77 #include <sys/syslink.h>
82 #ifndef _MACHINE_LOCK_H_
83 #include <machine/lock.h>
87 * The vnode is the focus of all file activity in UNIX. There is a
88 * unique vnode allocated for each active file, each current directory,
89 * each mounted-on file, text file, and the root.
93 * Each underlying filesystem allocates its own private area and hangs
94 * it from v_data. If non-null, this area is freed in getnewvnode().
96 TAILQ_HEAD(buflists, buf);
99 * Range locks protect offset ranges in files and directories at a high
100 * level, allowing the actual I/O to be broken down into smaller pieces.
101 * Range locks will eventually be integrated into the clustered cache
102 * coherency infrastructure.
104 * We use a simple data structure for now, but eventually this should
105 * probably be a btree or red-black tree.
109 TAILQ_HEAD(vrangelock_list, vrangelock);
112 struct vrangelock_list vh_list;
116 TAILQ_ENTRY(vrangelock) vr_node;
122 #define RNGL_WAITING 0x0001 /* waiting for lock, else has lock */
123 #define RNGL_CHECK 0x0002 /* check for work on unlock */
124 #define RNGL_SHARED 0x0004 /* shared lock, else exclusive */
125 #define RNGL_ONLIST 0x0008 /* sanity check */
129 vrange_init(struct vrangelock *vr, int flags, off_t offset, off_t length)
131 vr->vr_flags = flags;
132 vr->vr_offset = offset;
133 vr->vr_length = length;
138 void vrange_lock(struct vnode *vp, struct vrangelock *vr);
139 void vrange_unlock(struct vnode *vp, struct vrangelock *vr);
143 vrange_lock_shared(struct vnode *vp, struct vrangelock *vr,
144 off_t offset, off_t length)
146 vrange_init(vr, RNGL_SHARED, offset, length);
152 vrange_lock_excl(struct vnode *vp, struct vrangelock *vr,
153 off_t offset, off_t length)
155 vrange_init(vr, 0, offset, length);
162 * The vnode infrastructure is being reorgranized. Most reference-related
163 * fields are locked by the BGL, and most file I/O related operations and
164 * vnode teardown functions are locked by the vnode lock.
166 * File read operations require a shared lock, file write operations require
167 * an exclusive lock. Most directory operations (read or write) currently
168 * require an exclusive lock due to the side effects stored in the directory
169 * inode (which we intend to fix).
171 * File reads and writes are further protected by a range lock. The intention
172 * is to be able to break I/O operations down into more easily managed pieces
173 * so vm_page arrays can be passed through rather then UIOs. This work will
174 * occur in multiple stages. The range locks will also eventually be used to
175 * deal with clustered cache coherency issues and, more immediately, to
176 * protect operations associated with the kernel-managed journaling module.
178 * NOTE: The vnode operations vector, v_ops, is a double-indirect that
179 * typically points to &v_mount->mnt_vn_use_ops. We use a double
180 * pointer because mnt_vn_use_ops may change dynamically when e.g.
181 * journaling is turned on or off.
183 * NOTE: v_filesize is currently only applicable when a VM object is
184 * associated with the vnode. Otherwise it will be set to NOOFFSET.
186 RB_HEAD(buf_rb_tree, buf);
187 RB_HEAD(buf_rb_hash, buf);
190 int v_flag; /* vnode flags (see below) */
191 int v_usecount; /* reference count of users */
193 int v_holdcnt; /* page & buffer references */
194 int v_opencount; /* number of explicit opens */
195 struct bio_track v_track_read; /* track I/O's in progress */
196 struct bio_track v_track_write; /* track I/O's in progress */
197 struct mount *v_mount; /* ptr to vfs we are in */
198 struct vop_ops **v_ops; /* vnode operations vector */
199 TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */
200 TAILQ_ENTRY(vnode) v_nmntvnodes; /* vnodes for mount point */
201 struct buf_rb_tree v_rbclean_tree; /* RB tree of clean bufs */
202 struct buf_rb_tree v_rbdirty_tree; /* RB tree of dirty bufs */
203 struct buf_rb_hash v_rbhash_tree; /* RB tree general lookup */
204 LIST_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */
205 enum vtype v_type; /* vnode type */
207 struct socket *vu_socket; /* unix ipc (VSOCK) */
209 udev_t vu_udev; /* device number for attach */
210 struct cdev *vu_cdevinfo; /* device (VCHR, VBLK) */
211 SLIST_ENTRY(vnode) vu_cdevnext;
213 struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
215 off_t v_filesize; /* file EOF or NOOFFSET */
216 off_t v_lazyw; /* lazy write iterator */
217 off_t v_lastw; /* last write (write cluster) */
218 off_t v_cstart; /* start block of cluster */
219 off_t v_lasta; /* last allocation */
220 int v_clen; /* length of current cluster */
221 struct vm_object *v_object; /* Place to store VM object */
222 struct lock v_lock; /* file/dir ops lock */
223 enum vtagtype v_tag; /* type of underlying data */
224 void *v_data; /* private data for fs */
225 struct namecache_list v_namecache; /* associated nc entries */
227 struct lwkt_token vpi_token; /* lock to protect below */
228 struct selinfo vpi_selinfo; /* identity of poller(s) */
229 short vpi_events; /* what they are looking for */
230 short vpi_revents; /* what has happened */
232 struct vmresident *v_resident; /* optional vmresident */
233 struct vrangehead v_range; /* range lock */
234 struct ccms_dataspace v_ccms; /* cache coherency */
236 const char *filename; /* Source file doing locking */
237 int line; /* Line number doing locking */
241 #define v_socket v_un.vu_socket
242 #define v_udev v_un.vu_cdev.vu_udev
243 #define v_rdev v_un.vu_cdev.vu_cdevinfo
244 #define v_cdevnext v_un.vu_cdev.vu_cdevnext
245 #define v_fifoinfo v_un.vu_fifoinfo
246 #define v_spinlock v_lock.lk_spinlock
248 #define VN_POLLEVENT(vp, events) \
250 if ((vp)->v_pollinfo.vpi_events & (events)) \
251 vn_pollevent((vp), (events)); \
257 #define VROOT 0x00001 /* root of its file system */
258 #define VTEXT 0x00002 /* vnode is a pure text prototype */
259 #define VSYSTEM 0x00004 /* vnode being used by kernel */
260 #define VISTTY 0x00008 /* vnode represents a tty */
261 #define VCTTYISOPEN 0x00010 /* controlling terminal tty is open */
262 #define VCKPT 0x00020 /* checkpoint-restored vnode */
263 #define VFSMID 0x00040 /* request FSMID update */
264 #define VMAYHAVELOCKS 0x00080 /* there may be posix or flock locks on vp */
265 /* open for business 0x00100 */
266 /* open for business 0x00200 */
267 /* open for business 0x00400 */
268 /* open for business 0x00800 */
269 /* open for business 0x01000 */
270 #define VOBJBUF 0x02000 /* Allocate buffers in VM object */
271 #define VINACTIVE 0x04000 /* The vnode is inactive */
272 #define VAGE 0x08000 /* Insert vnode at head of free list */
273 #define VOLOCK 0x10000 /* vnode is locked waiting for an object */
274 #define VOWANT 0x20000 /* a process is waiting for VOLOCK */
275 #define VRECLAIMED 0x40000 /* This vnode has been destroyed */
276 #define VFREE 0x80000 /* This vnode is on the freelist */
277 /* open for business 0x100000 */
278 #define VONWORKLST 0x200000 /* On syncer work-list */
279 #define VMOUNT 0x400000 /* Mount in progress */
280 #define VOBJDIRTY 0x800000 /* object might be dirty */
283 * vmntvnodescan() flags
287 #define VMSC_NOWAIT 0x10
290 * Flags for ioflag. (high 16 bits used to ask for read-ahead and
291 * help with write clustering)
293 #define IO_UNIT 0x0001 /* do I/O as atomic unit */
294 #define IO_APPEND 0x0002 /* append write to end */
295 #define IO_SYNC 0x0004 /* do I/O synchronously */
296 #define IO_NODELOCKED 0x0008 /* underlying node already locked */
297 #define IO_NDELAY 0x0010 /* FNDELAY flag set in file table */
298 #define IO_VMIO 0x0020 /* data already in VMIO space */
299 #define IO_INVAL 0x0040 /* invalidate after I/O */
300 #define IO_ASYNC 0x0080 /* bawrite rather then bdwrite */
301 #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */
302 #define IO_NOWDRAIN 0x0200 /* do not block on wdrain */
303 #define IO_CORE 0x0400 /* I/O is part of core dump */
305 #define IO_SEQMAX 0x7F /* seq heuristic max value */
306 #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */
309 * Modes. Note that these V-modes must match file S_I*USR, SUID, SGID,
310 * and SVTX flag bits.
312 * VCREATE, VDELETE, and VEXCL may only be used in naccess() calls.
314 #define VDELETE 040000 /* delete if the file/dir exists */
315 #define VCREATE 020000 /* create if the file/dir does not exist */
316 #define VEXCL 010000 /* error if the file/dir already exists */
318 #define VSUID 04000 /* set user id on execution */
319 #define VSGID 02000 /* set group id on execution */
320 #define VSVTX 01000 /* save swapped text even after use */
321 #define VREAD 00400 /* read, write, execute permissions */
326 * Token indicating no attribute value yet assigned.
331 * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon)
333 #define VLKTIMEOUT (hz / 20 + 1)
338 * Convert between vnode types and inode formats (since POSIX.1
339 * defines mode word of stat structure in terms of inode formats).
341 extern enum vtype iftovt_tab[];
342 extern int vttoif_tab[];
343 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
344 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
345 #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
348 * Flags to various vnode functions.
350 #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
351 #define FORCECLOSE 0x0002 /* vflush: force file closure */
352 #define WRITECLOSE 0x0004 /* vflush: only close writable files */
353 #define DOCLOSE 0x0008 /* vclean: close active files */
354 #define V_SAVE 0x0001 /* vinvalbuf: sync file first */
355 #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
358 #define VATTR_NULL(vap) vattr_null(vap)
360 #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
361 #endif /* DIAGNOSTIC */
363 #define NULLVP ((struct vnode *)NULL)
365 #define VNODEOP_SET(f) \
366 SYSINIT(f##init, SI_SUB_VFS, SI_ORDER_SECOND, vfs_nadd_vnodeops_sysinit, &f); \
367 SYSUNINIT(f##uninit, SI_SUB_VFS, SI_ORDER_SECOND,vfs_nrm_vnodeops_sysinit, &f);
374 extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
375 extern struct namecache *rootncp; /* root (i.e. "/") namecache */
376 extern int desiredvnodes; /* number of vnodes desired */
377 extern time_t syncdelay; /* max time to delay syncing data */
378 extern time_t filedelay; /* time to delay syncing files */
379 extern time_t dirdelay; /* time to delay syncing directories */
380 extern time_t metadelay; /* time to delay syncing metadata */
381 extern struct objcache *namei_oc;
382 extern int prtactive; /* nonzero to call vprint() */
383 extern struct vattr va_null; /* predefined null vattr structure */
384 extern int vfs_ioopt;
385 extern int numvnodes;
386 extern int freevnodes;
387 extern int vfs_fastdev; /* fast specfs device access */
390 * Interlock for scanning list of vnodes attached to a mountpoint
392 extern struct lwkt_token mntvnode_token;
395 * This macro is very helpful in defining those offsets in the vdesc struct.
397 * This is stolen from X11R4. I ignored all the fancy stuff for
398 * Crays, so if you decide to port this to such a serious machine,
399 * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
401 #define VOPARG_OFFSET(p_type,field) \
402 ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
403 #define VOPARG_OFFSETOF(s_type,field) \
404 VOPARG_OFFSET(s_type*,field)
405 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
406 ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
408 typedef int (*vnodeopv_entry_t)(struct vop_generic_args *);
411 * VOCALL calls an op given an ops vector. We break it out because BSD's
412 * vclean changes the ops vector and then wants to call ops with the old
416 typedef int (*vocall_func_t)(struct vop_generic_args *);
419 * This call executes the vops vector for the offset stored in the ap's
420 * descriptor of the passed vops rather then the one related to the
421 * ap's vop_ops structure. It is used to chain VOPS calls on behalf of
422 * filesystems from a VFS's context ONLY (that is, from a VFS's own vops
425 #define VOCALL(vops, ap) \
426 (*(vocall_func_t *)((char *)(vops)+((ap)->a_desc->sd_offset)))(ap)
428 #define VDESC(OP) (& __CONCAT(OP,_desc))
431 * Public vnode manipulation functions.
444 void addaliasu (struct vnode *vp, udev_t nvp_udev);
445 int v_associate_rdev(struct vnode *vp, cdev_t dev);
446 void v_release_rdev(struct vnode *vp);
447 int bdevvp (cdev_t dev, struct vnode **vpp);
448 struct vnode *allocvnode(int lktimeout, int lkflags);
449 int getnewvnode (enum vtagtype tag, struct mount *mp,
450 struct vnode **vpp, int timo, int lkflags);
451 int getspecialvnode (enum vtagtype tag, struct mount *mp,
452 struct vop_ops **ops, struct vnode **vpp, int timo,
454 int spec_vnoperate (struct vop_generic_args *);
455 int speedup_syncer (void);
456 void vattr_null (struct vattr *vap);
457 int vcount (struct vnode *vp);
458 int vfinddev (cdev_t dev, enum vtype type, struct vnode **vpp);
459 void vfs_nadd_vnodeops_sysinit (void *);
460 void vfs_nrm_vnodeops_sysinit (void *);
461 void vfs_add_vnodeops(struct mount *, struct vop_ops *, struct vop_ops **);
462 void vfs_rm_vnodeops(struct mount *, struct vop_ops *, struct vop_ops **);
463 int vflush (struct mount *mp, int rootrefs, int flags);
464 int vmntvnodescan(struct mount *mp, int flags,
465 int (*fastfunc)(struct mount *mp, struct vnode *vp, void *data),
466 int (*slowfunc)(struct mount *mp, struct vnode *vp, void *data),
468 void insmntque(struct vnode *vp, struct mount *mp);
470 void vclean_interlocked (struct vnode *vp, int flags);
471 void vgone (struct vnode *vp);
472 void vgone_interlocked (struct vnode *vp);
473 void vupdatefsmid (struct vnode *vp);
474 int vinvalbuf (struct vnode *vp, int save, int slpflag, int slptimeo);
475 int vtruncbuf (struct vnode *vp, off_t length, int blksize);
476 int vfsync(struct vnode *vp, int waitfor, int passes,
477 int (*checkdef)(struct buf *),
478 int (*waitoutput)(struct vnode *, struct thread *));
479 int vinitvmio(struct vnode *vp, off_t filesize);
480 void vprint (char *label, struct vnode *vp);
481 int vrecycle (struct vnode *vp);
482 void vn_strategy(struct vnode *vp, struct bio *bio);
483 int vn_close (struct vnode *vp, int flags);
484 int vn_isdisk (struct vnode *vp, int *errp);
485 int vn_lock (struct vnode *vp, int flags);
486 int vn_islocked (struct vnode *vp);
487 void vn_unlock (struct vnode *vp);
489 int debug_vn_lock (struct vnode *vp, int flags,
490 const char *filename, int line);
491 #define vn_lock(vp,flags) debug_vn_lock(vp, flags, __FILE__, __LINE__)
494 int vn_get_namelen(struct vnode *, int *);
495 void vn_setspecops (struct file *fp);
496 int vn_fullpath (struct proc *p, struct vnode *vn, char **retbuf, char **freebuf);
497 int vn_open (struct nlookupdata *ndp, struct file *fp, int fmode, int cmode);
498 void vn_pollevent (struct vnode *vp, int events);
499 void vn_pollgone (struct vnode *vp);
500 int vn_pollrecord (struct vnode *vp, int events);
501 int vn_rdwr (enum uio_rw rw, struct vnode *vp, caddr_t base,
502 int len, off_t offset, enum uio_seg segflg, int ioflg,
503 struct ucred *cred, int *aresid);
504 int vn_rdwr_inchunks (enum uio_rw rw, struct vnode *vp, caddr_t base,
505 int len, off_t offset, enum uio_seg segflg, int ioflg,
506 struct ucred *cred, int *aresid);
507 int vn_stat (struct vnode *vp, struct stat *sb, struct ucred *cred);
508 cdev_t vn_todev (struct vnode *vp);
509 void vfs_timestamp (struct timespec *);
510 int vn_writechk (struct vnode *vp, struct namecache *ncp);
511 int ncp_writechk(struct namecache *ncp);
512 int vop_stdopen (struct vop_open_args *ap);
513 int vop_stdclose (struct vop_close_args *ap);
514 int vop_nopoll (struct vop_poll_args *ap);
515 int vop_stdpathconf (struct vop_pathconf_args *ap);
516 int vop_stdpoll (struct vop_poll_args *ap);
517 int vop_stdrevoke (struct vop_revoke_args *ap);
518 int vop_eopnotsupp (struct vop_generic_args *ap);
519 int vop_ebadf (struct vop_generic_args *ap);
520 int vop_einval (struct vop_generic_args *ap);
521 int vop_enotty (struct vop_generic_args *ap);
522 int vop_defaultop (struct vop_generic_args *ap);
523 int vop_null (struct vop_generic_args *ap);
524 int vop_panic (struct vop_generic_args *ap);
525 int vop_write_dirent(int *, struct uio *, ino_t, uint8_t, uint16_t,
528 int vop_compat_nresolve(struct vop_nresolve_args *ap);
529 int vop_compat_nlookupdotdot(struct vop_nlookupdotdot_args *ap);
530 int vop_compat_ncreate(struct vop_ncreate_args *ap);
531 int vop_compat_nmkdir(struct vop_nmkdir_args *ap);
532 int vop_compat_nmknod(struct vop_nmknod_args *ap);
533 int vop_compat_nlink(struct vop_nlink_args *ap);
534 int vop_compat_nsymlink(struct vop_nsymlink_args *ap);
535 int vop_compat_nwhiteout(struct vop_nwhiteout_args *ap);
536 int vop_compat_nremove(struct vop_nremove_args *ap);
537 int vop_compat_nrmdir(struct vop_nrmdir_args *ap);
538 int vop_compat_nrename(struct vop_nrename_args *ap);
540 void vx_lock (struct vnode *vp);
541 void vx_unlock (struct vnode *vp);
542 void vx_get (struct vnode *vp);
543 int vx_get_nonblock (struct vnode *vp);
544 void vx_put (struct vnode *vp);
545 int vget (struct vnode *vp, int lockflag);
546 void vput (struct vnode *vp);
547 void vhold (struct vnode *);
548 void vdrop (struct vnode *);
549 void vref (struct vnode *vp);
550 void vref_initial (struct vnode *vp, int reactivate);
551 void vrele (struct vnode *vp);
552 void vsetflags (struct vnode *vp, int flags);
553 void vclrflags (struct vnode *vp, int flags);
555 void vfs_subr_init(void);
556 void vfs_mount_init(void);
557 void vfs_lock_init(void);
558 void vfs_sync_init(void);
560 void vn_syncer_add_to_worklist(struct vnode *, int);
561 void vnlru_proc_wait(void);
563 extern struct vop_ops default_vnode_vops;
564 extern struct vop_ops spec_vnode_vops;
565 extern struct vop_ops dead_vnode_vops;
567 extern struct vop_ops *default_vnode_vops_p;
568 extern struct vop_ops *spec_vnode_vops_p;
569 extern struct vop_ops *dead_vnode_vops_p;
573 #endif /* _KERNEL || _KERNEL_STRUCTURES */
574 #endif /* !_SYS_VNODE_H_ */