2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
34 * tmpfs vnode interface.
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
41 #include <sys/fcntl.h>
42 #include <sys/lockf.h>
45 #include <sys/resourcevar.h>
46 #include <sys/sched.h>
48 #include <sys/systm.h>
49 #include <sys/sysctl.h>
50 #include <sys/unistd.h>
51 #include <sys/vfsops.h>
52 #include <sys/vnode.h>
53 #include <sys/mountctl.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pageout.h>
60 #include <vm/vm_pager.h>
61 #include <vm/swap_pager.h>
64 #include <vm/vm_page2.h>
66 #include <vfs/fifofs/fifo.h>
67 #include <vfs/tmpfs/tmpfs_vnops.h>
70 static void tmpfs_strategy_done(struct bio *bio);
71 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst);
73 static int tmpfs_cluster_enable = 1;
74 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem");
75 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_enable, CTLFLAG_RW,
76 &tmpfs_cluster_enable, 0, "");
80 tmpfs_knote(struct vnode *vp, int flags)
83 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
87 /* --------------------------------------------------------------------- */
90 tmpfs_nresolve(struct vop_nresolve_args *ap)
92 struct vnode *dvp = ap->a_dvp;
93 struct vnode *vp = NULL;
94 struct namecache *ncp = ap->a_nch->ncp;
95 struct tmpfs_node *tnode;
96 struct tmpfs_dirent *de;
97 struct tmpfs_node *dnode;
100 dnode = VP_TO_TMPFS_DIR(dvp);
102 TMPFS_NODE_LOCK_SH(dnode);
104 de = tmpfs_dir_lookup(dnode, NULL, ncp);
109 * Allocate a vnode for the node we found. Use
110 * tmpfs_alloc_vp()'s deadlock handling mode.
113 error = tmpfs_alloc_vp(dvp->v_mount, dnode, tnode,
114 LK_EXCLUSIVE | LK_RETRY, &vp);
123 TMPFS_NODE_UNLOCK(dnode);
125 if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) {
126 TMPFS_NODE_LOCK(dnode);
127 dnode->tn_status |= TMPFS_NODE_ACCESSED;
128 TMPFS_NODE_UNLOCK(dnode);
132 * Store the result of this lookup in the cache. Avoid this if the
133 * request was for creation, as it does not improve timings on
138 cache_setvp(ap->a_nch, vp);
140 } else if (error == ENOENT) {
141 cache_setvp(ap->a_nch, NULL);
147 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
149 struct vnode *dvp = ap->a_dvp;
150 struct vnode **vpp = ap->a_vpp;
151 struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
152 struct ucred *cred = ap->a_cred;
157 /* Check accessibility of requested node as a first step. */
158 error = VOP_ACCESS(dvp, VEXEC, cred);
162 if (dnode->tn_dir.tn_parent != NULL) {
163 /* Allocate a new vnode on the matching entry. */
164 error = tmpfs_alloc_vp(dvp->v_mount,
165 NULL, dnode->tn_dir.tn_parent,
166 LK_EXCLUSIVE | LK_RETRY, vpp);
171 return (*vpp == NULL) ? ENOENT : 0;
174 /* --------------------------------------------------------------------- */
177 tmpfs_ncreate(struct vop_ncreate_args *ap)
179 struct vnode *dvp = ap->a_dvp;
180 struct vnode **vpp = ap->a_vpp;
181 struct namecache *ncp = ap->a_nch->ncp;
182 struct vattr *vap = ap->a_vap;
183 struct ucred *cred = ap->a_cred;
186 KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
188 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
190 cache_setunresolved(ap->a_nch);
191 cache_setvp(ap->a_nch, *vpp);
192 tmpfs_knote(dvp, NOTE_WRITE);
196 /* --------------------------------------------------------------------- */
199 tmpfs_nmknod(struct vop_nmknod_args *ap)
201 struct vnode *dvp = ap->a_dvp;
202 struct vnode **vpp = ap->a_vpp;
203 struct namecache *ncp = ap->a_nch->ncp;
204 struct vattr *vap = ap->a_vap;
205 struct ucred *cred = ap->a_cred;
208 if (vap->va_type != VBLK && vap->va_type != VCHR &&
209 vap->va_type != VFIFO) {
213 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
215 cache_setunresolved(ap->a_nch);
216 cache_setvp(ap->a_nch, *vpp);
217 tmpfs_knote(dvp, NOTE_WRITE);
222 /* --------------------------------------------------------------------- */
225 tmpfs_open(struct vop_open_args *ap)
227 struct vnode *vp = ap->a_vp;
228 int mode = ap->a_mode;
229 struct tmpfs_node *node;
232 node = VP_TO_TMPFS_NODE(vp);
235 /* The file is still active but all its names have been removed
236 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as
237 * it is about to die. */
238 if (node->tn_links < 1)
242 /* If the file is marked append-only, deny write requests. */
243 if ((node->tn_flags & APPEND) &&
244 (mode & (FWRITE | O_APPEND)) == FWRITE) {
247 if (node->tn_reg.tn_pages_in_aobj) {
248 TMPFS_NODE_LOCK(node);
249 if (node->tn_reg.tn_pages_in_aobj) {
250 tmpfs_move_pages(node->tn_reg.tn_aobj,
252 node->tn_reg.tn_pages_in_aobj = 0;
254 TMPFS_NODE_UNLOCK(node);
256 error = vop_stdopen(ap);
262 /* --------------------------------------------------------------------- */
265 tmpfs_close(struct vop_close_args *ap)
267 struct vnode *vp = ap->a_vp;
268 struct tmpfs_node *node;
271 node = VP_TO_TMPFS_NODE(vp);
273 if (node->tn_links > 0) {
275 * Update node times. No need to do it if the node has
276 * been deleted, because it will vanish after we return.
281 error = vop_stdclose(ap);
286 /* --------------------------------------------------------------------- */
289 tmpfs_access(struct vop_access_args *ap)
291 struct vnode *vp = ap->a_vp;
293 struct tmpfs_node *node;
295 node = VP_TO_TMPFS_NODE(vp);
297 switch (vp->v_type) {
303 if ((ap->a_mode & VWRITE) &&
304 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
324 if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
329 error = vop_helper_access(ap, node->tn_uid, node->tn_gid,
335 /* --------------------------------------------------------------------- */
338 tmpfs_getattr(struct vop_getattr_args *ap)
340 struct vnode *vp = ap->a_vp;
341 struct vattr *vap = ap->a_vap;
342 struct tmpfs_node *node;
344 node = VP_TO_TMPFS_NODE(vp);
348 TMPFS_NODE_LOCK_SH(node);
349 vap->va_type = vp->v_type;
350 vap->va_mode = node->tn_mode;
351 vap->va_nlink = node->tn_links;
352 vap->va_uid = node->tn_uid;
353 vap->va_gid = node->tn_gid;
354 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
355 vap->va_fileid = node->tn_id;
356 vap->va_size = node->tn_size;
357 vap->va_blocksize = PAGE_SIZE;
358 vap->va_atime.tv_sec = node->tn_atime;
359 vap->va_atime.tv_nsec = node->tn_atimensec;
360 vap->va_mtime.tv_sec = node->tn_mtime;
361 vap->va_mtime.tv_nsec = node->tn_mtimensec;
362 vap->va_ctime.tv_sec = node->tn_ctime;
363 vap->va_ctime.tv_nsec = node->tn_ctimensec;
364 vap->va_gen = node->tn_gen;
365 vap->va_flags = node->tn_flags;
366 if (vp->v_type == VBLK || vp->v_type == VCHR) {
367 vap->va_rmajor = umajor(node->tn_rdev);
368 vap->va_rminor = uminor(node->tn_rdev);
370 vap->va_bytes = round_page(node->tn_size);
372 TMPFS_NODE_UNLOCK(node);
377 /* --------------------------------------------------------------------- */
380 tmpfs_setattr(struct vop_setattr_args *ap)
382 struct vnode *vp = ap->a_vp;
383 struct vattr *vap = ap->a_vap;
384 struct ucred *cred = ap->a_cred;
385 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
389 TMPFS_NODE_LOCK(node);
390 if (error == 0 && (vap->va_flags != VNOVAL)) {
391 error = tmpfs_chflags(vp, vap->va_flags, cred);
392 kflags |= NOTE_ATTRIB;
395 if (error == 0 && (vap->va_size != VNOVAL)) {
396 /* restore any saved pages before proceeding */
397 if (node->tn_reg.tn_pages_in_aobj) {
398 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
399 node->tn_reg.tn_pages_in_aobj = 0;
401 if (vap->va_size > node->tn_size)
402 kflags |= NOTE_WRITE | NOTE_EXTEND;
404 kflags |= NOTE_WRITE;
405 error = tmpfs_chsize(vp, vap->va_size, cred);
408 if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
409 vap->va_gid != (gid_t)VNOVAL)) {
410 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
411 kflags |= NOTE_ATTRIB;
414 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
415 error = tmpfs_chmod(vp, vap->va_mode, cred);
416 kflags |= NOTE_ATTRIB;
419 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
420 vap->va_atime.tv_nsec != VNOVAL) ||
421 (vap->va_mtime.tv_sec != VNOVAL &&
422 vap->va_mtime.tv_nsec != VNOVAL) )) {
423 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
424 vap->va_vaflags, cred);
425 kflags |= NOTE_ATTRIB;
429 * Update the node times. We give preference to the error codes
430 * generated by this function rather than the ones that may arise
434 TMPFS_NODE_UNLOCK(node);
435 tmpfs_knote(vp, kflags);
440 /* --------------------------------------------------------------------- */
443 * fsync is usually a NOP, but we must take action when unmounting or
447 tmpfs_fsync(struct vop_fsync_args *ap)
449 struct tmpfs_node *node;
450 struct vnode *vp = ap->a_vp;
452 node = VP_TO_TMPFS_NODE(vp);
455 * tmpfs vnodes typically remain dirty, avoid long syncer scans
456 * by forcing removal from the syncer list.
458 vn_syncer_remove(vp, 1);
461 if (vp->v_type == VREG) {
462 if (vp->v_flag & VRECLAIMED) {
463 if (node->tn_links == 0)
464 tmpfs_truncate(vp, 0);
466 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
473 /* --------------------------------------------------------------------- */
476 tmpfs_read(struct vop_read_args *ap)
479 struct vnode *vp = ap->a_vp;
480 struct uio *uio = ap->a_uio;
481 struct tmpfs_node *node;
492 if (uio->uio_offset < 0)
494 if (vp->v_type != VREG)
498 * Extract node, try to shortcut the operation through
499 * the VM page cache, allowing us to avoid buffer cache
502 node = VP_TO_TMPFS_NODE(vp);
503 resid = uio->uio_resid;
504 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
505 error = vop_helper_read_shortcut(ap);
508 if (uio->uio_resid == 0) {
515 * restore any saved pages before proceeding
517 if (node->tn_reg.tn_pages_in_aobj) {
518 TMPFS_NODE_LOCK(node);
519 if (node->tn_reg.tn_pages_in_aobj) {
520 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
521 node->tn_reg.tn_pages_in_aobj = 0;
523 TMPFS_NODE_UNLOCK(node);
527 * Fall-through to our normal read code.
529 while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
531 * Use buffer cache I/O (via tmpfs_strategy)
533 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
534 base_offset = (off_t)uio->uio_offset - offset;
535 bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, GETBLK_KVABIO);
537 if (tmpfs_cluster_enable) {
538 error = cluster_readx(vp, node->tn_size,
541 B_NOTMETA | B_KVABIO,
546 error = bread_kvabio(vp, base_offset,
551 kprintf("tmpfs_read bread error %d\n", error);
556 * tmpfs pretty much fiddles directly with the VM
557 * system, don't let it exhaust it or we won't play
558 * nice with other processes.
560 * Only do this if the VOP is coming from a normal
561 * read/write. The VM system handles the case for
564 if (uio->uio_segflg != UIO_NOCOPY)
567 bp->b_flags |= B_CLUSTEROK;
571 * Figure out how many bytes we can actually copy this loop.
573 len = TMPFS_BLKSIZE - offset;
574 if (len > uio->uio_resid)
575 len = uio->uio_resid;
576 if (len > node->tn_size - uio->uio_offset)
577 len = (size_t)(node->tn_size - uio->uio_offset);
579 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
582 kprintf("tmpfs_read uiomove error %d\n", error);
588 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
589 TMPFS_NODE_LOCK(node);
590 node->tn_status |= TMPFS_NODE_ACCESSED;
591 TMPFS_NODE_UNLOCK(node);
597 tmpfs_write(struct vop_write_args *ap)
600 struct vnode *vp = ap->a_vp;
601 struct uio *uio = ap->a_uio;
602 struct thread *td = uio->uio_td;
603 struct tmpfs_node *node;
616 if (uio->uio_resid == 0) {
620 node = VP_TO_TMPFS_NODE(vp);
622 if (vp->v_type != VREG)
624 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
626 TMPFS_NODE_LOCK(node);
629 * restore any saved pages before proceeding
631 if (node->tn_reg.tn_pages_in_aobj) {
632 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
633 node->tn_reg.tn_pages_in_aobj = 0;
636 oldsize = node->tn_size;
637 if (ap->a_ioflag & IO_APPEND)
638 uio->uio_offset = node->tn_size;
641 * Check for illegal write offsets.
643 if (uio->uio_offset + uio->uio_resid >
644 VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
650 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
652 if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
653 error = kern_getrlimit(RLIMIT_FSIZE, &limit);
656 if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
657 ksignal(td->td_proc, SIGXFSZ);
664 * Extend the file's size if necessary
666 extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
668 while (uio->uio_resid > 0) {
670 * Don't completely blow out running buffer I/O
671 * when being hit from the pageout daemon.
673 if (uio->uio_segflg == UIO_NOCOPY &&
674 (ap->a_ioflag & IO_RECURSE) == 0) {
675 bwillwrite(TMPFS_BLKSIZE);
679 * Use buffer cache I/O (via tmpfs_strategy)
681 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
682 base_offset = (off_t)uio->uio_offset - offset;
683 len = TMPFS_BLKSIZE - offset;
684 if (len > uio->uio_resid)
685 len = uio->uio_resid;
687 if ((uio->uio_offset + len) > node->tn_size) {
688 trivial = (uio->uio_offset <= node->tn_size);
689 error = tmpfs_reg_resize(vp, uio->uio_offset + len,
696 * Read to fill in any gaps. Theoretically we could
697 * optimize this if the write covers the entire buffer
698 * and is not a UIO_NOCOPY write, however this can lead
699 * to a security violation exposing random kernel memory
700 * (whatever junk was in the backing VM pages before).
702 * So just use bread() to do the right thing.
704 error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp);
706 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
708 kprintf("tmpfs_write uiomove error %d\n", error);
713 if (uio->uio_offset > node->tn_size) {
714 node->tn_size = uio->uio_offset;
715 kflags |= NOTE_EXTEND;
717 kflags |= NOTE_WRITE;
720 * Always try to flush the page in the UIO_NOCOPY case. This
721 * can come from the pageout daemon or during vnode eviction.
722 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
724 * For the normal case we buwrite(), dirtying the underlying
725 * VM pages instead of dirtying the buffer and releasing the
726 * buffer as a clean buffer. This allows tmpfs to use
727 * essentially all available memory to cache file data.
728 * If we used bdwrite() the buffer cache would wind up
729 * flushing the data to swap too quickly.
731 * But because tmpfs can seriously load the VM system we
732 * fall-back to using bdwrite() when free memory starts
733 * to get low. This shifts the load away from the VM system
734 * and makes tmpfs act more like a normal filesystem with
735 * regards to disk activity.
737 * tmpfs pretty much fiddles directly with the VM
738 * system, don't let it exhaust it or we won't play
739 * nice with other processes. Only do this if the
740 * VOP is coming from a normal read/write. The VM system
741 * handles the case for UIO_NOCOPY.
743 bp->b_flags |= B_CLUSTEROK;
744 if (uio->uio_segflg == UIO_NOCOPY) {
746 * Flush from the pageout daemon, deal with
747 * potentially very heavy tmpfs write activity
748 * causing long stalls in the pageout daemon
749 * before pages get to free/cache.
751 * (a) Under severe pressure setting B_DIRECT will
752 * cause a buffer release to try to free the
755 * (b) Under modest memory pressure the B_RELBUF
756 * alone is sufficient to get the pages moved
757 * to the cache. We could also force this by
758 * setting B_NOTMETA but that might have other
759 * unintended side-effects (e.g. setting
760 * PG_NOTMETA on the VM page).
762 * Hopefully this will unblock the VM system more
763 * quickly under extreme tmpfs write load.
765 if (vm_page_count_min(vm_page_free_hysteresis))
766 bp->b_flags |= B_DIRECT;
767 bp->b_flags |= B_AGE | B_RELBUF;
768 bp->b_act_count = 0; /* buffer->deactivate pgs */
770 } else if (vm_page_count_target()) {
772 * Normal (userland) write but we are low on memory,
773 * run the buffer the buffer cache.
775 bp->b_act_count = 0; /* buffer->deactivate pgs */
779 * Otherwise run the buffer directly through to the
783 /*vm_wait_nominal();*/
787 kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
794 (void)tmpfs_reg_resize(vp, oldsize, trivial);
795 kflags &= ~NOTE_EXTEND;
801 * Currently we don't set the mtime on files modified via mmap()
802 * because we can't tell the difference between those modifications
803 * and an attempt by the pageout daemon to flush tmpfs pages to
806 * This is because in order to defer flushes as long as possible
807 * buwrite() works by marking the underlying VM pages dirty in
808 * order to be able to dispose of the buffer cache buffer without
811 if (uio->uio_segflg == UIO_NOCOPY) {
812 if (vp->v_flag & VLASTWRITETS) {
813 node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
814 node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
817 node->tn_status |= TMPFS_NODE_MODIFIED;
818 vclrflags(vp, VLASTWRITETS);
822 node->tn_status |= TMPFS_NODE_CHANGED;
824 if (node->tn_mode & (S_ISUID | S_ISGID)) {
825 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
826 node->tn_mode &= ~(S_ISUID | S_ISGID);
829 TMPFS_NODE_UNLOCK(node);
831 tmpfs_knote(vp, kflags);
837 tmpfs_advlock(struct vop_advlock_args *ap)
839 struct tmpfs_node *node;
840 struct vnode *vp = ap->a_vp;
843 node = VP_TO_TMPFS_NODE(vp);
844 error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
850 * The strategy function is typically only called when memory pressure
851 * forces the system to attempt to pageout pages. It can also be called
852 * by [n]vtruncbuf() when a truncation cuts a page in half. Normal write
855 * We set VKVABIO for VREG files so bp->b_data may not be synchronized to
856 * our cpu. swap_pager_strategy() is all we really use, and it directly
860 tmpfs_strategy(struct vop_strategy_args *ap)
862 struct bio *bio = ap->a_bio;
864 struct buf *bp = bio->bio_buf;
865 struct vnode *vp = ap->a_vp;
866 struct tmpfs_node *node;
871 if (vp->v_type != VREG) {
872 bp->b_resid = bp->b_bcount;
873 bp->b_flags |= B_ERROR | B_INVAL;
874 bp->b_error = EINVAL;
879 node = VP_TO_TMPFS_NODE(vp);
881 uobj = node->tn_reg.tn_aobj;
884 * Don't bother flushing to swap if there is no swap, just
885 * ensure that the pages are marked as needing a commit (still).
887 if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
888 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
889 m = bp->b_xio.xio_pages[i];
890 vm_page_need_commit(m);
896 nbio = push_bio(bio);
897 nbio->bio_done = tmpfs_strategy_done;
898 nbio->bio_offset = bio->bio_offset;
899 swap_pager_strategy(uobj, nbio);
905 * If we were unable to commit the pages to swap make sure they are marked
906 * as needing a commit (again). If we were, clear the flag to allow the
909 * Do not error-out the buffer. In particular, vinvalbuf() needs to
913 tmpfs_strategy_done(struct bio *bio)
921 if (bp->b_flags & B_ERROR) {
922 bp->b_flags &= ~B_ERROR;
925 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
926 m = bp->b_xio.xio_pages[i];
927 vm_page_need_commit(m);
930 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
931 m = bp->b_xio.xio_pages[i];
932 vm_page_clear_commit(m);
940 tmpfs_bmap(struct vop_bmap_args *ap)
942 if (ap->a_doffsetp != NULL)
943 *ap->a_doffsetp = ap->a_loffset;
944 if (ap->a_runp != NULL)
946 if (ap->a_runb != NULL)
952 /* --------------------------------------------------------------------- */
955 tmpfs_nremove(struct vop_nremove_args *ap)
957 struct vnode *dvp = ap->a_dvp;
958 struct namecache *ncp = ap->a_nch->ncp;
961 struct tmpfs_dirent *de;
962 struct tmpfs_mount *tmp;
963 struct tmpfs_node *dnode;
964 struct tmpfs_node *node;
967 * We have to acquire the vp from ap->a_nch because we will likely
968 * unresolve the namecache entry, and a vrele/vput is needed to
969 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
971 * We have to use vget to clear any inactive state on the vnode,
972 * otherwise the vnode may remain inactive and thus tmpfs_inactive
973 * will not get called when we release it.
975 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
976 KKASSERT(vp->v_mount == dvp->v_mount);
977 KKASSERT(error == 0);
980 if (vp->v_type == VDIR) {
985 dnode = VP_TO_TMPFS_DIR(dvp);
986 node = VP_TO_TMPFS_NODE(vp);
987 tmp = VFS_TO_TMPFS(vp->v_mount);
989 TMPFS_NODE_LOCK(dnode);
990 de = tmpfs_dir_lookup(dnode, node, ncp);
993 TMPFS_NODE_UNLOCK(dnode);
997 /* Files marked as immutable or append-only cannot be deleted. */
998 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
999 (dnode->tn_flags & APPEND)) {
1001 TMPFS_NODE_UNLOCK(dnode);
1005 /* Remove the entry from the directory; as it is a file, we do not
1006 * have to change the number of hard links of the directory. */
1007 tmpfs_dir_detach(dnode, de);
1008 TMPFS_NODE_UNLOCK(dnode);
1010 /* Free the directory entry we just deleted. Note that the node
1011 * referred by it will not be removed until the vnode is really
1013 tmpfs_free_dirent(tmp, de);
1015 if (node->tn_links > 0) {
1016 TMPFS_NODE_LOCK(node);
1017 node->tn_status |= TMPFS_NODE_CHANGED;
1018 TMPFS_NODE_UNLOCK(node);
1021 cache_unlink(ap->a_nch);
1022 tmpfs_knote(vp, NOTE_DELETE);
1027 tmpfs_knote(dvp, NOTE_WRITE);
1034 /* --------------------------------------------------------------------- */
1037 tmpfs_nlink(struct vop_nlink_args *ap)
1039 struct vnode *dvp = ap->a_dvp;
1040 struct vnode *vp = ap->a_vp;
1041 struct namecache *ncp = ap->a_nch->ncp;
1042 struct tmpfs_dirent *de;
1043 struct tmpfs_node *node;
1044 struct tmpfs_node *dnode;
1047 KKASSERT(dvp != vp); /* XXX When can this be false? */
1049 node = VP_TO_TMPFS_NODE(vp);
1050 dnode = VP_TO_TMPFS_NODE(dvp);
1051 TMPFS_NODE_LOCK(dnode);
1053 /* XXX: Why aren't the following two tests done by the caller? */
1055 /* Hard links of directories are forbidden. */
1056 if (vp->v_type == VDIR) {
1061 /* Cannot create cross-device links. */
1062 if (dvp->v_mount != vp->v_mount) {
1067 /* Ensure that we do not overflow the maximum number of links imposed
1069 KKASSERT(node->tn_links <= LINK_MAX);
1070 if (node->tn_links >= LINK_MAX) {
1075 /* We cannot create links of files marked immutable or append-only. */
1076 if (node->tn_flags & (IMMUTABLE | APPEND)) {
1081 /* Allocate a new directory entry to represent the node. */
1082 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
1083 ncp->nc_name, ncp->nc_nlen, &de);
1087 /* Insert the new directory entry into the appropriate directory. */
1088 tmpfs_dir_attach(dnode, de);
1090 /* vp link count has changed, so update node times. */
1092 TMPFS_NODE_LOCK(node);
1093 node->tn_status |= TMPFS_NODE_CHANGED;
1094 TMPFS_NODE_UNLOCK(node);
1097 tmpfs_knote(vp, NOTE_LINK);
1098 cache_setunresolved(ap->a_nch);
1099 cache_setvp(ap->a_nch, vp);
1103 TMPFS_NODE_UNLOCK(dnode);
1105 tmpfs_knote(dvp, NOTE_WRITE);
1109 /* --------------------------------------------------------------------- */
1112 tmpfs_nrename(struct vop_nrename_args *ap)
1114 struct vnode *fdvp = ap->a_fdvp;
1115 struct namecache *fncp = ap->a_fnch->ncp;
1116 struct vnode *fvp = fncp->nc_vp;
1117 struct vnode *tdvp = ap->a_tdvp;
1118 struct namecache *tncp = ap->a_tnch->ncp;
1120 struct tmpfs_dirent *de, *tde;
1121 struct tmpfs_mount *tmp;
1122 struct tmpfs_node *fdnode;
1123 struct tmpfs_node *fnode;
1124 struct tmpfs_node *tnode;
1125 struct tmpfs_node *tdnode;
1130 KKASSERT(fdvp->v_mount == fvp->v_mount);
1133 * Because tvp can get overwritten we have to vget it instead of
1134 * just vref or use it, otherwise it's VINACTIVE flag may not get
1135 * cleared and the node won't get destroyed.
1137 error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
1139 tnode = VP_TO_TMPFS_NODE(tvp);
1145 /* Disallow cross-device renames.
1146 * XXX Why isn't this done by the caller? */
1147 if (fvp->v_mount != tdvp->v_mount ||
1148 (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1153 tmp = VFS_TO_TMPFS(tdvp->v_mount);
1154 tdnode = VP_TO_TMPFS_DIR(tdvp);
1156 /* If source and target are the same file, there is nothing to do. */
1162 fdnode = VP_TO_TMPFS_DIR(fdvp);
1163 fnode = VP_TO_TMPFS_NODE(fvp);
1164 TMPFS_NODE_LOCK(fdnode);
1165 de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1166 TMPFS_NODE_UNLOCK(fdnode); /* XXX depend on namecache lock */
1168 /* Avoid manipulating '.' and '..' entries. */
1173 KKASSERT(de->td_node == fnode);
1176 * If replacing an entry in the target directory and that entry
1177 * is a directory, it must be empty.
1179 * Kern_rename gurantees the destination to be a directory
1180 * if the source is one (it does?).
1183 KKASSERT(tnode != NULL);
1185 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1186 (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1191 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1192 if (tnode->tn_size > 0) {
1196 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1199 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1203 KKASSERT(fnode->tn_type != VDIR &&
1204 tnode->tn_type != VDIR);
1208 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1209 (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1215 * Ensure that we have enough memory to hold the new name, if it
1216 * has to be changed.
1218 if (fncp->nc_nlen != tncp->nc_nlen ||
1219 bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1220 newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1221 M_WAITOK | M_NULLOK);
1222 if (newname == NULL) {
1226 bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1227 newname[tncp->nc_nlen] = '\0';
1233 * Unlink entry from source directory. Note that the kernel has
1234 * already checked for illegal recursion cases (renaming a directory
1235 * into a subdirectory of itself).
1237 if (fdnode != tdnode) {
1238 tmpfs_dir_detach(fdnode, de);
1240 /* XXX depend on namecache lock */
1241 TMPFS_NODE_LOCK(fdnode);
1242 KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp));
1243 RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1244 RB_REMOVE(tmpfs_dirtree_cookie,
1245 &fdnode->tn_dir.tn_cookietree, de);
1246 TMPFS_NODE_UNLOCK(fdnode);
1250 * Handle any name change. Swap with newname, we will
1251 * deallocate it at the end.
1253 if (newname != NULL) {
1255 TMPFS_NODE_LOCK(fnode);
1256 fnode->tn_status |= TMPFS_NODE_CHANGED;
1257 TMPFS_NODE_UNLOCK(fnode);
1259 oldname = de->td_name;
1260 de->td_name = newname;
1261 de->td_namelen = (uint16_t)tncp->nc_nlen;
1266 * If we are overwriting an entry, we have to remove the old one
1267 * from the target directory.
1270 /* Remove the old entry from the target directory. */
1271 TMPFS_NODE_LOCK(tdnode);
1272 tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1273 tmpfs_dir_detach(tdnode, tde);
1274 TMPFS_NODE_UNLOCK(tdnode);
1275 tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1278 * Free the directory entry we just deleted. Note that the
1279 * node referred by it will not be removed until the vnode is
1282 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1283 /*cache_inval_vp(tvp, CINV_DESTROY);*/
1287 * Link entry to target directory. If the entry
1288 * represents a directory move the parent linkage
1291 if (fdnode != tdnode) {
1292 if (de->td_node->tn_type == VDIR) {
1293 TMPFS_VALIDATE_DIR(fnode);
1295 tmpfs_dir_attach(tdnode, de);
1297 TMPFS_NODE_LOCK(tdnode);
1298 tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1299 RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1300 RB_INSERT(tmpfs_dirtree_cookie,
1301 &tdnode->tn_dir.tn_cookietree, de);
1302 TMPFS_NODE_UNLOCK(tdnode);
1309 kfree(newname, tmp->tm_name_zone);
1312 cache_rename(ap->a_fnch, ap->a_tnch);
1313 tmpfs_knote(ap->a_fdvp, NOTE_WRITE);
1314 tmpfs_knote(ap->a_tdvp, NOTE_WRITE);
1315 if (fnode->tn_vnode)
1316 tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1327 /* --------------------------------------------------------------------- */
1330 tmpfs_nmkdir(struct vop_nmkdir_args *ap)
1332 struct vnode *dvp = ap->a_dvp;
1333 struct vnode **vpp = ap->a_vpp;
1334 struct namecache *ncp = ap->a_nch->ncp;
1335 struct vattr *vap = ap->a_vap;
1336 struct ucred *cred = ap->a_cred;
1339 KKASSERT(vap->va_type == VDIR);
1341 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1343 cache_setunresolved(ap->a_nch);
1344 cache_setvp(ap->a_nch, *vpp);
1345 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1350 /* --------------------------------------------------------------------- */
1353 tmpfs_nrmdir(struct vop_nrmdir_args *ap)
1355 struct vnode *dvp = ap->a_dvp;
1356 struct namecache *ncp = ap->a_nch->ncp;
1358 struct tmpfs_dirent *de;
1359 struct tmpfs_mount *tmp;
1360 struct tmpfs_node *dnode;
1361 struct tmpfs_node *node;
1365 * We have to acquire the vp from ap->a_nch because we will likely
1366 * unresolve the namecache entry, and a vrele/vput is needed to
1367 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1369 * We have to use vget to clear any inactive state on the vnode,
1370 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1371 * will not get called when we release it.
1373 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
1374 KKASSERT(error == 0);
1378 * Prevalidate so we don't hit an assertion later
1380 if (vp->v_type != VDIR) {
1385 tmp = VFS_TO_TMPFS(dvp->v_mount);
1386 dnode = VP_TO_TMPFS_DIR(dvp);
1387 node = VP_TO_TMPFS_DIR(vp);
1390 * Directories with more than two entries ('.' and '..') cannot
1393 if (node->tn_size > 0) {
1398 if ((dnode->tn_flags & APPEND)
1399 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1405 * This invariant holds only if we are not trying to
1406 * remove "..". We checked for that above so this is safe now.
1408 KKASSERT(node->tn_dir.tn_parent == dnode);
1411 * Get the directory entry associated with node (vp). This
1412 * was filled by tmpfs_lookup while looking up the entry.
1414 TMPFS_NODE_LOCK(dnode);
1415 de = tmpfs_dir_lookup(dnode, node, ncp);
1416 KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen));
1418 /* Check flags to see if we are allowed to remove the directory. */
1419 if ((dnode->tn_flags & APPEND) ||
1420 node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1422 TMPFS_NODE_UNLOCK(dnode);
1426 /* Detach the directory entry from the directory (dnode). */
1427 tmpfs_dir_detach(dnode, de);
1428 TMPFS_NODE_UNLOCK(dnode);
1430 /* No vnode should be allocated for this entry from this point */
1431 TMPFS_NODE_LOCK(dnode);
1432 TMPFS_ASSERT_ELOCKED(dnode);
1433 TMPFS_NODE_LOCK(node);
1434 TMPFS_ASSERT_ELOCKED(node);
1437 * Must set parent linkage to NULL (tested by ncreate to disallow
1438 * the creation of new files/dirs in a deleted directory)
1440 node->tn_status |= TMPFS_NODE_CHANGED;
1442 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1443 TMPFS_NODE_MODIFIED;
1445 TMPFS_NODE_UNLOCK(node);
1446 TMPFS_NODE_UNLOCK(dnode);
1448 /* Free the directory entry we just deleted. Note that the node
1449 * referred by it will not be removed until the vnode is really
1451 tmpfs_free_dirent(tmp, de);
1453 /* Release the deleted vnode (will destroy the node, notify
1454 * interested parties and clean it from the cache). */
1456 TMPFS_NODE_LOCK(dnode);
1457 dnode->tn_status |= TMPFS_NODE_CHANGED;
1458 TMPFS_NODE_UNLOCK(dnode);
1461 cache_unlink(ap->a_nch);
1462 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1471 /* --------------------------------------------------------------------- */
1474 tmpfs_nsymlink(struct vop_nsymlink_args *ap)
1476 struct vnode *dvp = ap->a_dvp;
1477 struct vnode **vpp = ap->a_vpp;
1478 struct namecache *ncp = ap->a_nch->ncp;
1479 struct vattr *vap = ap->a_vap;
1480 struct ucred *cred = ap->a_cred;
1481 char *target = ap->a_target;
1484 vap->va_type = VLNK;
1485 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1487 tmpfs_knote(*vpp, NOTE_WRITE);
1488 cache_setunresolved(ap->a_nch);
1489 cache_setvp(ap->a_nch, *vpp);
1494 /* --------------------------------------------------------------------- */
1497 tmpfs_readdir(struct vop_readdir_args *ap)
1499 struct vnode *vp = ap->a_vp;
1500 struct uio *uio = ap->a_uio;
1501 int *eofflag = ap->a_eofflag;
1502 off_t **cookies = ap->a_cookies;
1503 int *ncookies = ap->a_ncookies;
1504 struct tmpfs_mount *tmp;
1508 struct tmpfs_node *node;
1510 /* This operation only makes sense on directory nodes. */
1511 if (vp->v_type != VDIR) {
1515 tmp = VFS_TO_TMPFS(vp->v_mount);
1516 node = VP_TO_TMPFS_DIR(vp);
1517 startoff = uio->uio_offset;
1519 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1520 error = tmpfs_dir_getdotdent(node, uio);
1522 TMPFS_NODE_LOCK_SH(node);
1528 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1529 /* may lock parent, cannot hold node lock */
1530 error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1532 TMPFS_NODE_LOCK_SH(node);
1538 TMPFS_NODE_LOCK_SH(node);
1539 error = tmpfs_dir_getdents(node, uio, &cnt);
1542 KKASSERT(error >= -1);
1547 if (eofflag != NULL)
1549 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1551 /* Update NFS-related variables. */
1552 if (error == 0 && cookies != NULL && ncookies != NULL) {
1554 off_t off = startoff;
1555 struct tmpfs_dirent *de = NULL;
1558 *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1560 for (i = 0; i < cnt; i++) {
1561 KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1562 if (off == TMPFS_DIRCOOKIE_DOT) {
1563 off = TMPFS_DIRCOOKIE_DOTDOT;
1565 if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1566 de = RB_MIN(tmpfs_dirtree_cookie,
1567 &node->tn_dir.tn_cookietree);
1568 } else if (de != NULL) {
1569 de = RB_NEXT(tmpfs_dirtree_cookie,
1570 &node->tn_dir.tn_cookietree, de);
1572 de = tmpfs_dir_lookupbycookie(node,
1574 KKASSERT(de != NULL);
1575 de = RB_NEXT(tmpfs_dirtree_cookie,
1576 &node->tn_dir.tn_cookietree, de);
1579 off = TMPFS_DIRCOOKIE_EOF;
1581 off = tmpfs_dircookie(de);
1583 (*cookies)[i] = off;
1585 KKASSERT(uio->uio_offset == off);
1587 TMPFS_NODE_UNLOCK(node);
1589 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1590 TMPFS_NODE_LOCK(node);
1591 node->tn_status |= TMPFS_NODE_ACCESSED;
1592 TMPFS_NODE_UNLOCK(node);
1597 /* --------------------------------------------------------------------- */
1600 tmpfs_readlink(struct vop_readlink_args *ap)
1602 struct vnode *vp = ap->a_vp;
1603 struct uio *uio = ap->a_uio;
1605 struct tmpfs_node *node;
1607 KKASSERT(uio->uio_offset == 0);
1608 KKASSERT(vp->v_type == VLNK);
1610 node = VP_TO_TMPFS_NODE(vp);
1611 TMPFS_NODE_LOCK_SH(node);
1612 error = uiomove(node->tn_link,
1613 MIN(node->tn_size, uio->uio_resid), uio);
1614 TMPFS_NODE_UNLOCK(node);
1615 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1616 TMPFS_NODE_LOCK(node);
1617 node->tn_status |= TMPFS_NODE_ACCESSED;
1618 TMPFS_NODE_UNLOCK(node);
1623 /* --------------------------------------------------------------------- */
1626 tmpfs_inactive(struct vop_inactive_args *ap)
1628 struct vnode *vp = ap->a_vp;
1629 struct tmpfs_node *node;
1633 lwkt_gettoken(&mp->mnt_token);
1634 node = VP_TO_TMPFS_NODE(vp);
1641 lwkt_reltoken(&mp->mnt_token);
1646 * Get rid of unreferenced deleted vnodes sooner rather than
1647 * later so the data memory can be recovered immediately.
1649 * We must truncate the vnode to prevent the normal reclamation
1650 * path from flushing the data for the removed file to disk.
1652 TMPFS_NODE_LOCK(node);
1653 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1654 node->tn_links == 0)
1656 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1657 TMPFS_NODE_UNLOCK(node);
1658 if (node->tn_type == VREG)
1659 tmpfs_truncate(vp, 0);
1663 * We must retain any VM pages belonging to the vnode's
1664 * object as the vnode will destroy the object during a
1665 * later reclaim. We call vinvalbuf(V_SAVE) to clean
1666 * out the buffer cache.
1668 * On DragonFlyBSD, vnodes are not immediately deactivated
1669 * on the 1->0 refs, so this is a relatively optimal
1670 * operation. We have to do this in tmpfs_inactive()
1671 * because the pages will have already been thrown away
1672 * at the time tmpfs_reclaim() is called.
1674 if (node->tn_type == VREG &&
1675 node->tn_reg.tn_pages_in_aobj == 0) {
1676 vinvalbuf(vp, V_SAVE, 0, 0);
1677 KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree));
1678 KKASSERT(RB_EMPTY(&vp->v_rbclean_tree));
1679 tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj);
1680 node->tn_reg.tn_pages_in_aobj = 1;
1683 TMPFS_NODE_UNLOCK(node);
1685 lwkt_reltoken(&mp->mnt_token);
1690 /* --------------------------------------------------------------------- */
1693 tmpfs_reclaim(struct vop_reclaim_args *ap)
1695 struct vnode *vp = ap->a_vp;
1696 struct tmpfs_mount *tmp;
1697 struct tmpfs_node *node;
1701 lwkt_gettoken(&mp->mnt_token);
1703 node = VP_TO_TMPFS_NODE(vp);
1704 tmp = VFS_TO_TMPFS(vp->v_mount);
1705 KKASSERT(mp == tmp->tm_mount);
1707 TMPFS_NODE_LOCK(node);
1708 KKASSERT(node->tn_vnode == vp);
1709 node->tn_vnode = NULL;
1713 * If the node referenced by this vnode was deleted by the
1714 * user, we must free its associated data structures now that
1715 * the vnode is being reclaimed.
1717 * Directories have an extra link ref.
1719 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1720 node->tn_links == 0) {
1721 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1722 tmpfs_free_node(tmp, node);
1725 TMPFS_NODE_UNLOCK(node);
1727 lwkt_reltoken(&mp->mnt_token);
1729 KKASSERT(vp->v_data == NULL);
1733 /* --------------------------------------------------------------------- */
1736 tmpfs_mountctl(struct vop_mountctl_args *ap)
1738 struct tmpfs_mount *tmp;
1742 mp = ap->a_head.a_ops->head.vv_mount;
1743 lwkt_gettoken(&mp->mnt_token);
1746 case (MOUNTCTL_SET_EXPORT):
1747 tmp = (struct tmpfs_mount *) mp->mnt_data;
1749 if (ap->a_ctllen != sizeof(struct export_args))
1752 rc = vfs_export(mp, &tmp->tm_export,
1753 (const struct export_args *) ap->a_ctl);
1756 rc = vop_stdmountctl(ap);
1760 lwkt_reltoken(&mp->mnt_token);
1764 /* --------------------------------------------------------------------- */
1767 tmpfs_print(struct vop_print_args *ap)
1769 struct vnode *vp = ap->a_vp;
1771 struct tmpfs_node *node;
1773 node = VP_TO_TMPFS_NODE(vp);
1775 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1776 node, node->tn_flags, node->tn_links);
1777 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1778 node->tn_mode, node->tn_uid, node->tn_gid,
1779 (uintmax_t)node->tn_size, node->tn_status);
1781 if (vp->v_type == VFIFO)
1789 /* --------------------------------------------------------------------- */
1792 tmpfs_pathconf(struct vop_pathconf_args *ap)
1794 struct vnode *vp = ap->a_vp;
1795 int name = ap->a_name;
1796 register_t *retval = ap->a_retval;
1797 struct tmpfs_mount *tmp;
1803 case _PC_CHOWN_RESTRICTED:
1807 case _PC_FILESIZEBITS:
1808 tmp = VFS_TO_TMPFS(vp->v_mount);
1809 *retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1);
1836 case _PC_2_SYMLINKS:
1847 /************************************************************************
1849 ************************************************************************/
1851 static void filt_tmpfsdetach(struct knote *kn);
1852 static int filt_tmpfsread(struct knote *kn, long hint);
1853 static int filt_tmpfswrite(struct knote *kn, long hint);
1854 static int filt_tmpfsvnode(struct knote *kn, long hint);
1856 static struct filterops tmpfsread_filtops =
1857 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1858 NULL, filt_tmpfsdetach, filt_tmpfsread };
1859 static struct filterops tmpfswrite_filtops =
1860 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1861 NULL, filt_tmpfsdetach, filt_tmpfswrite };
1862 static struct filterops tmpfsvnode_filtops =
1863 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1864 NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1867 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1869 struct vnode *vp = ap->a_vp;
1870 struct knote *kn = ap->a_kn;
1872 switch (kn->kn_filter) {
1874 kn->kn_fop = &tmpfsread_filtops;
1877 kn->kn_fop = &tmpfswrite_filtops;
1880 kn->kn_fop = &tmpfsvnode_filtops;
1883 return (EOPNOTSUPP);
1886 kn->kn_hook = (caddr_t)vp;
1888 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1894 filt_tmpfsdetach(struct knote *kn)
1896 struct vnode *vp = (void *)kn->kn_hook;
1898 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1902 filt_tmpfsread(struct knote *kn, long hint)
1904 struct vnode *vp = (void *)kn->kn_hook;
1905 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1908 if (hint == NOTE_REVOKE) {
1909 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1914 * Interlock against MP races when performing this function.
1916 TMPFS_NODE_LOCK_SH(node);
1917 off = node->tn_size - kn->kn_fp->f_offset;
1918 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1919 if (kn->kn_sfflags & NOTE_OLDAPI) {
1920 TMPFS_NODE_UNLOCK(node);
1923 if (kn->kn_data == 0) {
1924 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1926 TMPFS_NODE_UNLOCK(node);
1927 return (kn->kn_data != 0);
1931 filt_tmpfswrite(struct knote *kn, long hint)
1933 if (hint == NOTE_REVOKE)
1934 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1940 filt_tmpfsvnode(struct knote *kn, long hint)
1942 if (kn->kn_sfflags & hint)
1943 kn->kn_fflags |= hint;
1944 if (hint == NOTE_REVOKE) {
1945 kn->kn_flags |= (EV_EOF | EV_NODATA);
1948 return (kn->kn_fflags != 0);
1952 * Helper to move VM pages between objects
1954 * NOTE: The vm_page_rename() dirties the page, so we can clear the
1955 * PG_NEED_COMMIT flag. If the pages are being moved into tn_aobj,
1956 * the pageout daemon will be able to page them out.
1959 tmpfs_move_pages_callback(vm_page_t p, void *data)
1961 struct rb_vm_page_scan_info *info = data;
1965 if (vm_page_busy_try(p, TRUE)) {
1966 vm_page_sleep_busy(p, TRUE, "tpgmov");
1970 if (p->object != info->object || p->pindex != pindex) {
1975 vm_page_rename(p, info->dest_object, pindex);
1976 vm_page_clear_commit(p);
1978 /* page automaticaly made dirty */
1985 tmpfs_move_pages(vm_object_t src, vm_object_t dst)
1987 struct rb_vm_page_scan_info info;
1989 vm_object_hold(src);
1990 vm_object_hold(dst);
1992 info.dest_object = dst;
1994 if (src->paging_in_progress)
1995 vm_object_pip_wait(src, "objtfs");
1997 vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL,
1998 tmpfs_move_pages_callback, &info);
1999 } while (info.error < 0 || !RB_EMPTY(&src->rb_memq) ||
2000 src->paging_in_progress);
2001 vm_object_drop(dst);
2002 vm_object_drop(src);
2005 /* --------------------------------------------------------------------- */
2008 * vnode operations vector used for files stored in a tmpfs file system.
2010 struct vop_ops tmpfs_vnode_vops = {
2011 .vop_default = vop_defaultop,
2012 .vop_getpages = vop_stdgetpages,
2013 .vop_putpages = vop_stdputpages,
2014 .vop_ncreate = tmpfs_ncreate,
2015 .vop_nresolve = tmpfs_nresolve,
2016 .vop_nlookupdotdot = tmpfs_nlookupdotdot,
2017 .vop_nmknod = tmpfs_nmknod,
2018 .vop_open = tmpfs_open,
2019 .vop_close = tmpfs_close,
2020 .vop_access = tmpfs_access,
2021 .vop_getattr = tmpfs_getattr,
2022 .vop_setattr = tmpfs_setattr,
2023 .vop_read = tmpfs_read,
2024 .vop_write = tmpfs_write,
2025 .vop_fsync = tmpfs_fsync,
2026 .vop_mountctl = tmpfs_mountctl,
2027 .vop_nremove = tmpfs_nremove,
2028 .vop_nlink = tmpfs_nlink,
2029 .vop_nrename = tmpfs_nrename,
2030 .vop_nmkdir = tmpfs_nmkdir,
2031 .vop_nrmdir = tmpfs_nrmdir,
2032 .vop_nsymlink = tmpfs_nsymlink,
2033 .vop_readdir = tmpfs_readdir,
2034 .vop_readlink = tmpfs_readlink,
2035 .vop_inactive = tmpfs_inactive,
2036 .vop_reclaim = tmpfs_reclaim,
2037 .vop_print = tmpfs_print,
2038 .vop_pathconf = tmpfs_pathconf,
2039 .vop_bmap = tmpfs_bmap,
2040 .vop_strategy = tmpfs_strategy,
2041 .vop_advlock = tmpfs_advlock,
2042 .vop_kqfilter = tmpfs_kqfilter