kernel -- tmpfs: Mark tmpfs_write MPSAFE.
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vnops.c
index 40754e4..5ae3183 100644 (file)
@@ -1,5 +1,3 @@
-/*     $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $    */
-
 /*-
  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
  * All rights reserved.
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
  */
 
 /*
  * tmpfs vnode interface.
  */
-#include <sys/cdefs.h>
 
 #include <sys/kernel.h>
 #include <sys/kern_syscall.h>
@@ -44,7 +43,6 @@
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
-#include <sys/sfbuf.h>
 #include <sys/stat.h>
 #include <sys/systm.h>
 #include <sys/unistd.h>
@@ -57,6 +55,7 @@
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
+#include <vm/swap_pager.h>
 
 #include <vfs/fifofs/fifo.h>
 #include <vfs/tmpfs/tmpfs_vnops.h>
 
 MALLOC_DECLARE(M_TMPFS);
 
+static __inline
+void
+tmpfs_knote(struct vnode *vp, int flags)
+{
+       if (flags)
+               KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
+}
+
+
 /* --------------------------------------------------------------------- */
 
 static int
@@ -72,7 +80,7 @@ tmpfs_nresolve(struct vop_nresolve_args *v)
        struct vnode *dvp = v->a_dvp;
        struct vnode *vp = NULL;
        struct namecache *ncp = v->a_nch->ncp;
-       struct ucred *cred = v->a_cred;
+       struct tmpfs_node *tnode;
 
        int error;
        struct tmpfs_dirent *de;
@@ -80,80 +88,32 @@ tmpfs_nresolve(struct vop_nresolve_args *v)
 
        dnode = VP_TO_TMPFS_DIR(dvp);
 
-       if (!vn_islocked(dvp));
-               vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-
-       /* Check accessibility of requested node as a first step. */
-       error = VOP_ACCESS(dvp, VEXEC, cred);
-       if (error != 0)
-               goto out;
-
-       if (dnode->tn_dir.tn_parent == NULL) {
-               error = ENOENT;
-               goto out;
-       }
-
        de = tmpfs_dir_lookup(dnode, NULL, ncp);
        if (de == NULL) {
-               /* The entry was not found in the directory.
-                * This is OK if we are creating or renaming an
-                * entry and are working on the last component of
-                * the path name. */
-               error = VOP_ACCESS(dvp, VWRITE, cred);
-               if (error != 0)
-                       goto out;
-               else {
-                       error = ENOENT;
-                       goto out;
-               }
+               error = ENOENT;
        } else {
-               struct tmpfs_node *tnode;
-
-               /* The entry was found, so get its associated
-                * tmpfs_node. */
+               /*
+                * Allocate a vnode for the node we found.
+                */
                tnode = de->td_node;
-
-               /* If we are not at the last path component and
-                * found a non-directory or non-link entry (which
-                * may itself be pointing to a directory), raise
-                * an error. */
-               if (tnode->tn_links > 1 &&
-                   tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
-                       error = ENOTDIR;
-                       goto out;
-               }
-
-               error = VOP_ACCESS(dvp, VWRITE, cred);
-               if (error != 0)
-                       goto out;
-
-               /* Allocate a new vnode on the matching entry. */
                error = tmpfs_alloc_vp(dvp->v_mount, tnode,
-                               LK_EXCLUSIVE | LK_RETRY, &vp);
-               if (error != 0)
+                                      LK_EXCLUSIVE | LK_RETRY, &vp);
+               if (error)
                        goto out;
-
-               if ((dnode->tn_mode & S_ISTXT) &&
-                   VOP_ACCESS(vp, VWRITE, cred)) {
-                       error = EPERM;
-                       vp = NULL;
-                       goto out;
-               }
+               KKASSERT(vp);
        }
 
-       KKASSERT(vp);
-
 out:
-       vn_unlock(dvp);
-       /* Store the result of this lookup in the cache.  Avoid this if the
+       /*
+        * Store the result of this lookup in the cache.  Avoid this if the
         * request was for creation, as it does not improve timings on
-        * emprical tests. */
+        * emprical tests.
+        */
        if (vp) {
                vn_unlock(vp);
                cache_setvp(v->a_nch, vp);
                vrele(vp);
-       }
-       if (error == ENOENT) {
+       } else if (error == ENOENT) {
                cache_setvp(v->a_nch, NULL);
        }
        return error;
@@ -200,13 +160,12 @@ tmpfs_ncreate(struct vop_ncreate_args *v)
 
        KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
 
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
        error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
        if (error == 0) {
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
+               tmpfs_knote(dvp, NOTE_WRITE);
        }
-       vn_unlock(dvp);
 
        return error;
 }
@@ -226,13 +185,12 @@ tmpfs_nmknod(struct vop_nmknod_args *v)
            vap->va_type != VFIFO)
                return EINVAL;
 
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
        error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
        if (error == 0) {
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
+               tmpfs_knote(dvp, NOTE_WRITE);
        }
-       vn_unlock(dvp);
 
        return error;
 }
@@ -248,8 +206,6 @@ tmpfs_open(struct vop_open_args *v)
        int error;
        struct tmpfs_node *node;
 
-       KKASSERT(vn_islocked(vp));
-
        node = VP_TO_TMPFS_NODE(vp);
 
        /* The file is still active but all its names have been removed
@@ -259,13 +215,12 @@ tmpfs_open(struct vop_open_args *v)
                return (ENOENT);
 
        /* If the file is marked append-only, deny write requests. */
-       if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
+       if ((node->tn_flags & APPEND) &&
+           (mode & (FWRITE | O_APPEND)) == FWRITE) {
                error = EPERM;
-       else {
+       else {
                return (vop_stdopen(v));
        }
-
-       KKASSERT(vn_islocked(vp));
        return error;
 }
 
@@ -297,8 +252,6 @@ tmpfs_access(struct vop_access_args *v)
        int error;
        struct tmpfs_node *node;
 
-       KKASSERT(vn_islocked(vp));
-
        node = VP_TO_TMPFS_NODE(vp);
 
        switch (vp->v_type) {
@@ -307,7 +260,7 @@ tmpfs_access(struct vop_access_args *v)
        case VLNK:
                /* FALLTHROUGH */
        case VREG:
-               if (VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
+               if ((v->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
                        error = EROFS;
                        goto out;
                }
@@ -327,7 +280,7 @@ tmpfs_access(struct vop_access_args *v)
                goto out;
        }
 
-       if (VWRITE && node->tn_flags & IMMUTABLE) {
+       if ((v->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
                error = EPERM;
                goto out;
        }
@@ -346,17 +299,11 @@ tmpfs_getattr(struct vop_getattr_args *v)
 {
        struct vnode *vp = v->a_vp;
        struct vattr *vap = v->a_vap;
-
        struct tmpfs_node *node;
-       int needunlock = 0;
-
-       if(!vn_islocked(vp)) {
-               needunlock = 1;
-               vn_lock(vp, LK_SHARED | LK_RETRY);
-       }
 
        node = VP_TO_TMPFS_NODE(vp);
 
+       lwkt_gettoken(&vp->v_mount->mnt_token);
        tmpfs_update(vp);
 
        vap->va_type = vp->v_type;
@@ -384,8 +331,7 @@ tmpfs_getattr(struct vop_getattr_args *v)
        vap->va_bytes = round_page(node->tn_size);
        vap->va_filerev = 0;
 
-       if (needunlock)
-               vn_unlock(vp);
+       lwkt_reltoken(&vp->v_mount->mnt_token);
 
        return 0;
 }
@@ -398,66 +344,77 @@ tmpfs_setattr(struct vop_setattr_args *v)
        struct vnode *vp = v->a_vp;
        struct vattr *vap = v->a_vap;
        struct ucred *cred = v->a_cred;
-
+       struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
        int error = 0;
-       int needunlock = 0;
+       int kflags = 0;
 
-       if(!vn_islocked(vp)) {
-               needunlock = 1;
-               vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-       }
-
-       /* Abort if any unsettable attribute is given. */
-       if (vap->va_type != VNON ||
-           vap->va_nlink != VNOVAL ||
-           vap->va_fsid != VNOVAL ||
-           vap->va_fileid != VNOVAL ||
-           vap->va_blocksize != VNOVAL ||
-           vap->va_gen != VNOVAL ||
-           vap->va_rmajor != VNOVAL ||
-           vap->va_bytes != VNOVAL)
-               error = EINVAL;
-
-       if (error == 0 && (vap->va_flags != VNOVAL))
+       if (error == 0 && (vap->va_flags != VNOVAL)) {
                error = tmpfs_chflags(vp, vap->va_flags, cred);
+               kflags |= NOTE_ATTRIB;
+       }
 
-       if (error == 0 && (vap->va_size != VNOVAL))
+       if (error == 0 && (vap->va_size != VNOVAL)) {
+               if (vap->va_size > node->tn_size)
+                       kflags |= NOTE_WRITE | NOTE_EXTEND;
+               else
+                       kflags |= NOTE_WRITE;
                error = tmpfs_chsize(vp, vap->va_size, cred);
+       }
 
-       if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
+       if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
+                          vap->va_gid != (gid_t)VNOVAL)) {
                error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
+               kflags |= NOTE_ATTRIB;
+       }
 
-       if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
+       if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
                error = tmpfs_chmod(vp, vap->va_mode, cred);
+               kflags |= NOTE_ATTRIB;
+       }
 
        if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
            vap->va_atime.tv_nsec != VNOVAL) ||
            (vap->va_mtime.tv_sec != VNOVAL &&
-           vap->va_mtime.tv_nsec != VNOVAL) ))
+           vap->va_mtime.tv_nsec != VNOVAL) )) {
                error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
-                       vap->va_vaflags, cred);
+                                     vap->va_vaflags, cred);
+               kflags |= NOTE_ATTRIB;
+       }
 
        /* Update the node times.  We give preference to the error codes
         * generated by this function rather than the ones that may arise
         * from tmpfs_update. */
        tmpfs_update(vp);
-
-       if (needunlock)
-               vn_unlock(vp);
+       tmpfs_knote(vp, kflags);
 
        return error;
 }
 
 /* --------------------------------------------------------------------- */
 
+/*
+ * fsync is usually a NOP, but we must take action when unmounting or
+ * when recycling.
+ */
 static int
 tmpfs_fsync(struct vop_fsync_args *v)
 {
+       struct tmpfs_mount *tmp;
+       struct tmpfs_node *node;
        struct vnode *vp = v->a_vp;
 
-       tmpfs_update(vp);
-
+       tmp = VFS_TO_TMPFS(vp->v_mount);
+       node = VP_TO_TMPFS_NODE(vp);
 
+       tmpfs_update(vp);
+       if (vp->v_type == VREG) {
+               if (vp->v_flag & VRECLAIMED) {
+                       if (node->tn_links == 0)
+                               tmpfs_truncate(vp, 0);
+                       else
+                               vfsync(v->a_vp, v->a_waitfor, 1, NULL, NULL);
+               }
+       }
        return 0;
 }
 
@@ -470,11 +427,10 @@ tmpfs_read (struct vop_read_args *ap)
        struct vnode *vp = ap->a_vp;
        struct uio *uio = ap->a_uio;
        struct tmpfs_node *node;
-       int error;
-       off_t offset;
        off_t base_offset;
+       size_t offset;
        size_t len;
-       int got_mplock;
+       int error;
 
        error = 0;
        if (uio->uio_resid == 0) {
@@ -488,42 +444,24 @@ tmpfs_read (struct vop_read_args *ap)
        if (vp->v_type != VREG)
                return (EINVAL);
 
-       vn_lock(vp, LK_SHARED | LK_RETRY);
-
-#ifdef SMP
-       if(curthread->td_mpcount)
-               got_mplock = -1;
-       else
-               got_mplock = 0;
-#else
-               got_mplock = -1;
-#endif
-
        while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
                /*
                 * Use buffer cache I/O (via tmpfs_strategy)
                 */
-               offset = (off_t)uio->uio_offset & BMASK;
+               offset = (size_t)uio->uio_offset & BMASK;
                base_offset = (off_t)uio->uio_offset - offset;
-               bp = getcacheblk(vp, base_offset);
+               bp = getcacheblk(vp, base_offset, BSIZE);
                if (bp == NULL)
                {
-                       if (got_mplock == 0) {
-                               got_mplock = 1;
-                               get_mplock();
-                       }
-
+                       lwkt_gettoken(&vp->v_mount->mnt_token);
                        error = bread(vp, base_offset, BSIZE, &bp);
                        if (error) {
                                brelse(bp);
+                               lwkt_reltoken(&vp->v_mount->mnt_token);
                                kprintf("tmpfs_read bread error %d\n", error);
                                break;
                        }
-               }
-
-               if (got_mplock == 0) {
-                       got_mplock = 1;
-                       get_mplock();
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
                }
 
                /*
@@ -543,15 +481,10 @@ tmpfs_read (struct vop_read_args *ap)
                }
        }
 
-       if (got_mplock > 0)
-               rel_mplock();
-
        TMPFS_NODE_LOCK(node);
        node->tn_status |= TMPFS_NODE_ACCESSED;
        TMPFS_NODE_UNLOCK(node);
 
-       vn_unlock(vp);
-
        return(error);
 }
 
@@ -566,12 +499,12 @@ tmpfs_write (struct vop_write_args *ap)
        boolean_t extended;
        off_t oldsize;
        int error;
-       off_t offset;
        off_t base_offset;
+       size_t offset;
        size_t len;
        struct rlimit limit;
-       int got_mplock;
        int trivial = 0;
+       int kflags = 0;
 
        error = 0;
        if (uio->uio_resid == 0) {
@@ -583,6 +516,8 @@ tmpfs_write (struct vop_write_args *ap)
        if (vp->v_type != VREG)
                return (EINVAL);
 
+       lwkt_gettoken(&vp->v_mount->mnt_token);
+
        oldsize = node->tn_size;
        if (ap->a_ioflag & IO_APPEND)
                uio->uio_offset = node->tn_size;
@@ -591,15 +526,20 @@ tmpfs_write (struct vop_write_args *ap)
         * Check for illegal write offsets.
         */
        if (uio->uio_offset + uio->uio_resid >
-         VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
+         VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
+               lwkt_reltoken(&vp->v_mount->mnt_token);
                return (EFBIG);
+       }
 
        if (vp->v_type == VREG && td != NULL) {
                error = kern_getrlimit(RLIMIT_FSIZE, &limit);
-               if (error != 0)
+               if (error != 0) {
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
                        return error;
+               }
                if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
                        ksignal(td->td_proc, SIGXFSZ);
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
                        return (EFBIG);
                }
        }
@@ -608,40 +548,37 @@ tmpfs_write (struct vop_write_args *ap)
        /*
         * Extend the file's size if necessary
         */
-       extended = (uio->uio_offset + uio->uio_resid) > node->tn_size;
-
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-#ifdef SMP
-       if(curthread->td_mpcount)
-               got_mplock = -1;
-       else {
-               got_mplock = 1;
-               get_mplock();
-       }
-#else
-               got_mplock = -1;
-#endif
-       crit_enter();
+       extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
+
+       get_mplock();
+
        while (uio->uio_resid > 0) {
                /*
                 * Use buffer cache I/O (via tmpfs_strategy)
                 */
-               offset = (off_t)uio->uio_offset & BMASK;
+               offset = (size_t)uio->uio_offset & BMASK;
                base_offset = (off_t)uio->uio_offset - offset;
                len = BSIZE - offset;
                if (len > uio->uio_resid)
                        len = uio->uio_resid;
 
                if ((uio->uio_offset + len) > node->tn_size) {
-                       trivial = uio->uio_offset <= node->tn_size;
+                       trivial = (uio->uio_offset <= node->tn_size);
                        error = tmpfs_reg_resize(vp, uio->uio_offset + len,  trivial);
                        if (error)
                                break;
                }
 
-               bp = getblk(vp, base_offset, BSIZE, GETBLK_BHEAVY, 0);
-               vfs_bio_clrbuf(bp);
-
+               /*
+                * Read to fill in any gaps.  Theoretically we could
+                * optimize this if the write covers the entire buffer
+                * and is not a UIO_NOCOPY write, however this can lead
+                * to a security violation exposing random kernel memory
+                * (whatever junk was in the backing VM pages before).
+                *
+                * So just use bread() to do the right thing.
+                */
+               error = bread(vp, base_offset, BSIZE, &bp);
                error = uiomove((char *)bp->b_data + offset, len, uio);
                if (error) {
                        kprintf("tmpfs_write uiomove error %d\n", error);
@@ -649,32 +586,60 @@ tmpfs_write (struct vop_write_args *ap)
                        break;
                }
 
-               if (uio->uio_offset > node->tn_size)
+               if (uio->uio_offset > node->tn_size) {
                        node->tn_size = uio->uio_offset;
+                       kflags |= NOTE_EXTEND;
+               }
+               kflags |= NOTE_WRITE;
 
                /*
-                * The data has been loaded into the buffer, write it out. (via tmpfs_strategy)
+                * The data has been loaded into the buffer, write it out.
                 *
-                * call bdwrite() because we don't care about storage io flag (ap->a_ioflag) for a swap I/O
-                * maybe bawrite() for IO_DIRECT, bwrite() for IO_SYNC
+                * We want tmpfs to be able to use all available ram, not
+                * just the buffer cache, so if not explicitly paging we
+                * use buwrite() to leave the buffer clean but mark all the
+                * VM pages valid+dirty.
                 *
-                * XXX: need to implement tmpfs_bmap() for a dirty bit handling of bdwrite()
+                * When the kernel is paging, either via normal pageout
+                * operation or when cleaning the object during a recycle,
+                * the underlying VM pages are going to get thrown away
+                * so we MUST write them to swap.
+                *
+                * XXX unfortunately this catches msync() system calls too
+                * for the moment.
                 */
-               bdwrite(bp);
+               if (vm_swap_size == 0) {
+                       /*
+                        * if swap isn't configured yet, force a buwrite() to
+                        * avoid problems further down the line, due to flushing
+                        * to swap.
+                        */
+                       buwrite(bp);
+               } else {
+                       if (ap->a_ioflag & IO_SYNC) {
+                               bwrite(bp);
+                       } else if ((ap->a_ioflag & IO_ASYNC) ||
+                                (uio->uio_segflg == UIO_NOCOPY)) {
+                               bawrite(bp);
+                       } else {
+                               buwrite(bp);
+                       }
+               }
+
                if (bp->b_error) {
-                       kprintf("tmpfs_write bwrite error %d\n", error);
+                       kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
                        break;
                }
        }
-       crit_exit();
 
-       if (got_mplock > 0)
-               rel_mplock();
+       rel_mplock();
 
        if (error) {
-               if (extended)
+               if (extended) {
                        (void)tmpfs_reg_resize(vp, oldsize, trivial);
-               return error;
+                       kflags &= ~NOTE_EXTEND;
+               }
+               goto done;
        }
 
        TMPFS_NODE_LOCK(node);
@@ -686,9 +651,12 @@ tmpfs_write (struct vop_write_args *ap)
                        node->tn_mode &= ~(S_ISUID | S_ISGID);
        }
        TMPFS_NODE_UNLOCK(node);
+done:
 
-       vn_unlock(vp);
+       tmpfs_knote(vp, kflags);
 
+
+       lwkt_reltoken(&vp->v_mount->mnt_token);
        return(error);
 }
 
@@ -703,26 +671,35 @@ tmpfs_advlock (struct vop_advlock_args *ap)
        return (lf_advlock(ap, &node->tn_advlock, node->tn_size));
 }
 
-
 static int
 tmpfs_strategy(struct vop_strategy_args *ap)
 {
        struct bio *bio = ap->a_bio;
+       struct buf *bp = bio->bio_buf;
        struct vnode *vp = ap->a_vp;
        struct tmpfs_node *node;
        vm_object_t uobj;
 
-       if (vp->v_type != VREG)
-               return EINVAL;
+       if (vp->v_type != VREG) {
+               bp->b_resid = bp->b_bcount;
+               bp->b_flags |= B_ERROR | B_INVAL;
+               bp->b_error = EINVAL;
+               biodone(bio);
+               return(0);
+       }
 
+       lwkt_gettoken(&vp->v_mount->mnt_token);
        node = VP_TO_TMPFS_NODE(vp);
 
        uobj = node->tn_reg.tn_aobj;
+
        /*
-        * call swap_pager_strategy to store vm object into swap device
+        * Call swap_pager_strategy to read or write between the VM
+        * object and the buffer cache.
         */
        swap_pager_strategy(uobj, bio);
 
+       lwkt_reltoken(&vp->v_mount->mnt_token);
        return 0;
 }
 
@@ -738,6 +715,7 @@ tmpfs_bmap(struct vop_bmap_args *ap)
 
        return 0;
 }
+
 /* --------------------------------------------------------------------- */
 
 static int
@@ -745,15 +723,21 @@ tmpfs_nremove(struct vop_nremove_args *v)
 {
        struct vnode *dvp = v->a_dvp;
        struct namecache *ncp = v->a_nch->ncp;
-       struct vnode *vp = ncp->nc_vp;
+       struct vnode *vp;
        int error;
        struct tmpfs_dirent *de;
        struct tmpfs_mount *tmp;
        struct tmpfs_node *dnode;
        struct tmpfs_node *node;
 
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+       /*
+        * We have to acquire the vp from v->a_nch because
+        * we will likely unresolve the namecache entry, and
+        * a vrele is needed to trigger the tmpfs_inactive/tmpfs_reclaim
+        * sequence to recover space from the file.
+        */
+       error = cache_vref(v->a_nch, v->a_cred, &vp);
+       KKASSERT(error == 0);
 
        if (vp->v_type == VDIR) {
                error = EISDIR;
@@ -778,12 +762,12 @@ tmpfs_nremove(struct vop_nremove_args *v)
 
        /* Remove the entry from the directory; as it is a file, we do not
         * have to change the number of hard links of the directory. */
-       tmpfs_dir_detach(dvp, de);
+       tmpfs_dir_detach(dnode, de);
 
        /* Free the directory entry we just deleted.  Note that the node
         * referred by it will not be removed until the vnode is really
         * reclaimed. */
-       tmpfs_free_dirent(tmp, de, TRUE);
+       tmpfs_free_dirent(tmp, de);
 
        if (node->tn_links > 0) {
                TMPFS_NODE_LOCK(node);
@@ -794,13 +778,13 @@ tmpfs_nremove(struct vop_nremove_args *v)
 
        cache_setunresolved(v->a_nch);
        cache_setvp(v->a_nch, NULL);
-       cache_inval_vp(vp, CINV_DESTROY);
+       tmpfs_knote(vp, NOTE_DELETE);
+       /*cache_inval_vp(vp, CINV_DESTROY);*/
+       tmpfs_knote(dvp, NOTE_WRITE);
        error = 0;
 
-
 out:
-       vn_unlock(vp);
-       vn_unlock(dvp);
+       vrele(vp);
 
        return error;
 }
@@ -813,18 +797,15 @@ tmpfs_nlink(struct vop_nlink_args *v)
        struct vnode *dvp = v->a_dvp;
        struct vnode *vp = v->a_vp;
        struct namecache *ncp = v->a_nch->ncp;
-
-       int error;
        struct tmpfs_dirent *de;
        struct tmpfs_node *node;
-
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
+       struct tmpfs_node *dnode;
+       int error;
 
        KKASSERT(dvp != vp); /* XXX When can this be false? */
 
        node = VP_TO_TMPFS_NODE(vp);
+       dnode = VP_TO_TMPFS_NODE(dvp);
 
        /* XXX: Why aren't the following two tests done by the caller? */
 
@@ -861,7 +842,7 @@ tmpfs_nlink(struct vop_nlink_args *v)
                goto out;
 
        /* Insert the new directory entry into the appropriate directory. */
-       tmpfs_dir_attach(dvp, de);
+       tmpfs_dir_attach(dnode, de);
 
        /* vp link count has changed, so update node times. */
 
@@ -870,14 +851,13 @@ tmpfs_nlink(struct vop_nlink_args *v)
        TMPFS_NODE_UNLOCK(node);
        tmpfs_update(vp);
 
+       tmpfs_knote(vp, NOTE_LINK);
        cache_setunresolved(v->a_nch);
        cache_setvp(v->a_nch, vp);
+       tmpfs_knote(dvp, NOTE_WRITE);
        error = 0;
 
 out:
-       vn_unlock(vp);
-       vn_unlock(dvp);
-
        return error;
 }
 
@@ -892,19 +872,15 @@ tmpfs_nrename(struct vop_nrename_args *v)
        struct vnode *tdvp = v->a_tdvp;
        struct namecache *tncp = v->a_tnch->ncp;
        struct vnode *tvp = tncp->nc_vp;
-
-       char *newname;
-       int error;
        struct tmpfs_dirent *de;
        struct tmpfs_mount *tmp;
        struct tmpfs_node *fdnode;
        struct tmpfs_node *fnode;
        struct tmpfs_node *tnode;
        struct tmpfs_node *tdnode;
-
-       vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
-       if(tvp != NULL && tdvp != tvp)
-               vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
+       char *newname;
+       char *oldname;
+       int error;
 
        tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
 
@@ -925,13 +901,6 @@ tmpfs_nrename(struct vop_nrename_args *v)
                goto out;
        }
 
-       /* If we need to move the directory between entries, lock the
-        * source so that we can safely operate on it. */
-       if (tdvp != fdvp) {
-               error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-               if (error != 0)
-                       goto out;
-       }
        fdnode = VP_TO_TMPFS_DIR(fdvp);
        fnode = VP_TO_TMPFS_NODE(fvp);
        de = tmpfs_dir_lookup(fdnode, fnode, fncp);
@@ -943,11 +912,13 @@ tmpfs_nrename(struct vop_nrename_args *v)
        }
        KKASSERT(de->td_node == fnode);
 
-       /* If re-naming a directory to another preexisting directory
-        * ensure that the target directory is empty so that its
-        * removal causes no side effects.
+       /*
+        * If replacing an entry in the target directory and that entry
+        * is a directory, it must be empty.
+        *
         * Kern_rename gurantees the destination to be a directory
-        * if the source is one. */
+        * if the source is one (it does?).
+        */
        if (tvp != NULL) {
                KKASSERT(tnode != NULL);
 
@@ -974,138 +945,120 @@ tmpfs_nrename(struct vop_nrename_args *v)
                }
        }
 
-       if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
-           || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
+       if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+           (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
                error = EPERM;
                goto out_locked;
        }
 
-       /* Ensure that we have enough memory to hold the new name, if it
-        * has to be changed. */
+       /*
+        * Ensure that we have enough memory to hold the new name, if it
+        * has to be changed.
+        */
        if (fncp->nc_nlen != tncp->nc_nlen ||
            bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
-               newname = kmalloc(tncp->nc_nlen, M_TMPFSNAME, M_WAITOK);
-       } else
+               newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, 
+                                 M_WAITOK | M_NULLOK);
+               if (newname == NULL) {
+                       error = ENOSPC;
+                       goto out_locked;
+               }
+               bcopy(tncp->nc_name, newname, tncp->nc_nlen);
+               newname[tncp->nc_nlen] = '\0';
+       } else {
                newname = NULL;
+       }
+
+       /*
+        * Unlink entry from source directory.  Note that the kernel has
+        * already checked for illegal recursion cases (renaming a directory
+        * into a subdirectory of itself).
+        */
+       if (fdnode != tdnode)
+               tmpfs_dir_detach(fdnode, de);
 
-       /* If the node is being moved to another directory, we have to do
-        * the move. */
+       /*
+        * Handle any name change.  Swap with newname, we will
+        * deallocate it at the end.
+        */
+       if (newname != NULL) {
+#if 0
+               TMPFS_NODE_LOCK(fnode);
+               fnode->tn_status |= TMPFS_NODE_CHANGED;
+               TMPFS_NODE_UNLOCK(fnode);
+#endif
+               oldname = de->td_name;
+               de->td_name = newname;
+               de->td_namelen = (uint16_t)tncp->nc_nlen;
+               newname = oldname;
+       }
+
+       /*
+        * Link entry to target directory.  If the entry
+        * represents a directory move the parent linkage
+        * as well.
+        */
        if (fdnode != tdnode) {
-               /* In case we are moving a directory, we have to adjust its
-                * parent to point to the new parent. */
                if (de->td_node->tn_type == VDIR) {
-                       struct tmpfs_node *n;
-
-                       /* Ensure the target directory is not a child of the
-                        * directory being moved.  Otherwise, we'd end up
-                        * with stale nodes. */
-                       n = tdnode;
-                       /* TMPFS_LOCK garanties that no nodes are freed while
-                        * traversing the list. Nodes can only be marked as
-                        * removed: tn_parent == NULL. */
-                       TMPFS_LOCK(tmp);
-                       TMPFS_NODE_LOCK(n);
-                       while (n != n->tn_dir.tn_parent) {
-                               struct tmpfs_node *parent;
-
-                               if (n == fnode) {
-                                       TMPFS_NODE_UNLOCK(n);
-                                       TMPFS_UNLOCK(tmp);
-                                       error = EINVAL;
-                                       if (newname != NULL)
-                                                   kfree(newname, M_TMPFSNAME);
-                                       goto out_locked;
-                               }
-                               parent = n->tn_dir.tn_parent;
-                               if (parent == NULL) {
-                                       n = NULL;
-                                       break;
-                               }
-                               TMPFS_NODE_LOCK(parent);
-                               if (parent->tn_dir.tn_parent == NULL) {
-                                       TMPFS_NODE_UNLOCK(parent);
-                                       n = NULL;
-                                       break;
-                               }
-                               n = parent;
-                       }
-                       TMPFS_NODE_UNLOCK(n);
-                       TMPFS_UNLOCK(tmp);
-                       if (n == NULL) {
-                               error = EINVAL;
-                               if (newname != NULL)
-                                           kfree(newname, M_TMPFSNAME);
-                               goto out_locked;
-                       }
-
-                       /* Adjust the parent pointer. */
                        TMPFS_VALIDATE_DIR(fnode);
-                       TMPFS_NODE_LOCK(de->td_node);
-                       de->td_node->tn_dir.tn_parent = tdnode;
 
-                       /* As a result of changing the target of the '..'
-                        * entry, the link count of the source and target
-                        * directories has to be adjusted. */
                        TMPFS_NODE_LOCK(tdnode);
-                       TMPFS_ASSERT_LOCKED(tdnode);
-                       TMPFS_NODE_LOCK(fdnode);
-                       TMPFS_ASSERT_LOCKED(fdnode);
-
                        tdnode->tn_links++;
-                       fdnode->tn_links--;
-
-                       TMPFS_NODE_UNLOCK(fdnode);
+                       tdnode->tn_status |= TMPFS_NODE_MODIFIED;
                        TMPFS_NODE_UNLOCK(tdnode);
-                       TMPFS_NODE_UNLOCK(de->td_node);
-               }
 
-               /* Do the move: just remove the entry from the source directory
-                * and insert it into the target one. */
-               tmpfs_dir_detach(fdvp, de);
-               tmpfs_dir_attach(tdvp, de);
-       }
-
-       /* If the name has changed, we need to make it effective by changing
-        * it in the directory entry. */
-       if (newname != NULL) {
-
-               kfree(de->td_name, M_TMPFSNAME);
-               de->td_namelen = (uint16_t)tncp->nc_nlen;
-               bcopy(tncp->nc_name, newname, tncp->nc_nlen);
-               newname[tncp->nc_nlen] = '\0';
-               de->td_name = newname;
+                       TMPFS_NODE_LOCK(fnode);
+                       fnode->tn_dir.tn_parent = tdnode;
+                       fnode->tn_status |= TMPFS_NODE_CHANGED;
+                       TMPFS_NODE_UNLOCK(fnode);
 
+                       TMPFS_NODE_LOCK(fdnode);
+                       fdnode->tn_links--;
+                       fdnode->tn_status |= TMPFS_NODE_MODIFIED;
+                       TMPFS_NODE_UNLOCK(fdnode);
+               }
+               tmpfs_dir_attach(tdnode, de);
+       } else {
                TMPFS_NODE_LOCK(tdnode);
-               TMPFS_NODE_LOCK(fdnode);
-
-               fnode->tn_status |= TMPFS_NODE_CHANGED;
                tdnode->tn_status |= TMPFS_NODE_MODIFIED;
-
-               TMPFS_NODE_UNLOCK(fdnode);
                TMPFS_NODE_UNLOCK(tdnode);
        }
 
-       /* If we are overwriting an entry, we have to remove the old one
-        * from the target directory. */
+       /*
+        * If we are overwriting an entry, we have to remove the old one
+        * from the target directory.
+        */
        if (tvp != NULL) {
                /* Remove the old entry from the target directory. */
                de = tmpfs_dir_lookup(tdnode, tnode, tncp);
-               tmpfs_dir_detach(tdvp, de);
+               tmpfs_dir_detach(tdnode, de);
+               tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
 
-               /* Free the directory entry we just deleted.  Note that the
+               /*
+                * Free the directory entry we just deleted.  Note that the
                 * node referred by it will not be removed until the vnode is
-                * really reclaimed. */
-               tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
-
-               cache_inval_vp(tvp, CINV_DESTROY);
+                * really reclaimed.
+                */
+               tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de);
+               /*cache_inval_vp(tvp, CINV_DESTROY);*/
        }
 
+       /*
+        * Finish up
+        */
+       if (newname) {
+               kfree(newname, tmp->tm_name_zone);
+               newname = NULL;
+       }
        cache_rename(v->a_fnch, v->a_tnch);
+       tmpfs_knote(v->a_fdvp, NOTE_WRITE);
+       tmpfs_knote(v->a_tdvp, NOTE_WRITE);
+       if (fnode->tn_vnode)
+               tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
        error = 0;
 
 out_locked:
-       if (fdnode != tdnode)
-               vn_unlock(fdvp);
+       ;
 
 out:
        /* Release target nodes. */
@@ -1113,11 +1066,6 @@ out:
         * other code takes care of this... */
        if (tdvp == tvp)
                vrele(tdvp);
-       else {
-               if (tvp != NULL)
-                       vn_unlock(tvp);
-               vn_unlock(tdvp);
-       }
 
        return error;
 }
@@ -1136,13 +1084,12 @@ tmpfs_nmkdir(struct vop_nmkdir_args *v)
 
        KKASSERT(vap->va_type == VDIR);
 
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
        error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
        if (error == 0) {
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
+               tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
        }
-       vn_unlock(dvp);
 
        return error;
 }
@@ -1154,16 +1101,29 @@ tmpfs_nrmdir(struct vop_nrmdir_args *v)
 {
        struct vnode *dvp = v->a_dvp;
        struct namecache *ncp = v->a_nch->ncp;
-       struct vnode *vp = ncp->nc_vp;
-
-       int error;
+       struct vnode *vp;
        struct tmpfs_dirent *de;
        struct tmpfs_mount *tmp;
        struct tmpfs_node *dnode;
        struct tmpfs_node *node;
+       int error;
 
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+       /*
+        * We have to acquire the vp from v->a_nch because
+        * we will likely unresolve the namecache entry, and
+        * a vrele is needed to trigger the tmpfs_inactive/tmpfs_reclaim
+        * sequence.
+        */
+       error = cache_vref(v->a_nch, v->a_cred, &vp);
+       KKASSERT(error == 0);
+
+       /*
+        * Prevalidate so we don't hit an assertion later
+        */
+       if (vp->v_type != VDIR) {
+               error = ENOTDIR;
+               goto out;
+       }
 
        tmp = VFS_TO_TMPFS(dvp->v_mount);
        dnode = VP_TO_TMPFS_DIR(dvp);
@@ -1194,15 +1154,15 @@ tmpfs_nrmdir(struct vop_nrmdir_args *v)
            ncp->nc_nlen));
 
        /* Check flags to see if we are allowed to remove the directory. */
-       if (dnode->tn_flags & APPEND
-               || node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
+       if ((dnode->tn_flags & APPEND) ||
+           node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
                error = EPERM;
                goto out;
        }
 
 
        /* Detach the directory entry from the directory (dnode). */
-       tmpfs_dir_detach(dvp, de);
+       tmpfs_dir_detach(dnode, de);
 
        /* No vnode should be allocated for this entry from this point */
        TMPFS_NODE_LOCK(node);
@@ -1210,12 +1170,20 @@ tmpfs_nrmdir(struct vop_nrmdir_args *v)
        TMPFS_NODE_LOCK(dnode);
        TMPFS_ASSERT_ELOCKED(dnode);
 
+#if 0
+       /* handled by tmpfs_free_node */
+       KKASSERT(node->tn_links > 0);
        node->tn_links--;
        node->tn_dir.tn_parent = NULL;
+#endif
        node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
            TMPFS_NODE_MODIFIED;
 
+#if 0
+       /* handled by tmpfs_free_node */
+       KKASSERT(dnode->tn_links > 0);
        dnode->tn_links--;
+#endif
        dnode->tn_status |= TMPFS_NODE_ACCESSED | \
            TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 
@@ -1225,7 +1193,7 @@ tmpfs_nrmdir(struct vop_nrmdir_args *v)
        /* Free the directory entry we just deleted.  Note that the node
         * referred by it will not be removed until the vnode is really
         * reclaimed. */
-       tmpfs_free_dirent(tmp, de, TRUE);
+       tmpfs_free_dirent(tmp, de);
 
        /* Release the deleted vnode (will destroy the node, notify
         * interested parties and clean it from the cache). */
@@ -1237,12 +1205,12 @@ tmpfs_nrmdir(struct vop_nrmdir_args *v)
 
        cache_setunresolved(v->a_nch);
        cache_setvp(v->a_nch, NULL);
-       cache_inval_vp(vp, CINV_DESTROY);
+       /*cache_inval_vp(vp, CINV_DESTROY);*/
+       tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
        error = 0;
 
 out:
-       vn_unlock(vp);
-       vn_unlock(dvp);
+       vrele(vp);
 
        return error;
 }
@@ -1260,14 +1228,13 @@ tmpfs_nsymlink(struct vop_nsymlink_args *v)
        char *target = v->a_target;
        int error;
 
-       vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
        vap->va_type = VLNK;
        error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
        if (error == 0) {
+               tmpfs_knote(*vpp, NOTE_WRITE);
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
        }
-       vn_unlock(dvp);
 
        return error;
 }
@@ -1282,7 +1249,7 @@ tmpfs_readdir(struct vop_readdir_args *v)
        int *eofflag = v->a_eofflag;
        off_t **cookies = v->a_cookies;
        int *ncookies = v->a_ncookies;
-
+       struct tmpfs_mount *tmp;
        int error;
        off_t startoff;
        off_t cnt = 0;
@@ -1292,8 +1259,7 @@ tmpfs_readdir(struct vop_readdir_args *v)
        if (vp->v_type != VDIR)
                return ENOTDIR;
 
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
+       tmp = VFS_TO_TMPFS(vp->v_mount);
        node = VP_TO_TMPFS_DIR(vp);
        startoff = uio->uio_offset;
 
@@ -1305,7 +1271,7 @@ tmpfs_readdir(struct vop_readdir_args *v)
        }
 
        if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
-               error = tmpfs_dir_getdotdotdent(node, uio);
+               error = tmpfs_dir_getdotdotdent(tmp, node, uio);
                if (error != 0)
                        goto outok;
                cnt++;
@@ -1357,7 +1323,6 @@ outok:
                }
                KKASSERT(uio->uio_offset == off);
        }
-       vn_unlock(vp);
 
        return error;
 }
@@ -1396,18 +1361,29 @@ tmpfs_inactive(struct vop_inactive_args *v)
 
        struct tmpfs_node *node;
 
-       KKASSERT(vn_islocked(vp));
-
        node = VP_TO_TMPFS_NODE(vp);
 
+       /*
+        * Get rid of unreferenced deleted vnodes sooner rather than
+        * later so the data memory can be recovered immediately.
+        *
+        * We must truncate the vnode to prevent the normal reclamation
+        * path from flushing the data for the removed file to disk.
+        */
        TMPFS_NODE_LOCK(node);
-       if (node->tn_links == 0 &&
-           (node->tn_vpstate & TMPFS_VNODE_DOOMED)) {
+       if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
+           (node->tn_links == 0 ||
+            (node->tn_links == 1 && node->tn_type == VDIR &&
+             node->tn_dir.tn_parent)))
+       {
+               node->tn_vpstate = TMPFS_VNODE_DOOMED;
                TMPFS_NODE_UNLOCK(node);
+               if (node->tn_type == VREG)
+                       tmpfs_truncate(vp, 0);
                vrecycle(vp);
-       }
-       else
+       } else {
                TMPFS_NODE_UNLOCK(node);
+       }
 
        return 0;
 }
@@ -1418,30 +1394,33 @@ int
 tmpfs_reclaim(struct vop_reclaim_args *v)
 {
        struct vnode *vp = v->a_vp;
-
        struct tmpfs_mount *tmp;
        struct tmpfs_node *node;
 
        node = VP_TO_TMPFS_NODE(vp);
        tmp = VFS_TO_TMPFS(vp->v_mount);
 
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        tmpfs_free_vp(vp);
 
-       /* If the node referenced by this vnode was deleted by the user,
-        * we must free its associated data structures (now that the vnode
-        * is being reclaimed). */
+       /*
+        * If the node referenced by this vnode was deleted by the
+        * user, we must free its associated data structures now that
+        * the vnode is being reclaimed.
+        *
+        * Directories have an extra link ref.
+        */
        TMPFS_NODE_LOCK(node);
-       if (node->tn_links == 0 &&
-           (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
+       if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
+           (node->tn_links == 0 ||
+            (node->tn_links == 1 && node->tn_type == VDIR &&
+             node->tn_dir.tn_parent)))
+       {
                node->tn_vpstate = TMPFS_VNODE_DOOMED;
-               TMPFS_NODE_UNLOCK(node);
                tmpfs_free_node(tmp, node);
-       }
-       else
+               /* eats the lock */
+       } else {
                TMPFS_NODE_UNLOCK(node);
-
-       vn_unlock(vp);
+       }
 
        KKASSERT(vp->v_data == NULL);
        return 0;
@@ -1524,6 +1503,106 @@ tmpfs_pathconf(struct vop_pathconf_args *v)
        return error;
 }
 
+/************************************************************************
+ *                          KQFILTER OPS                                *
+ ************************************************************************/
+
+static void filt_tmpfsdetach(struct knote *kn);
+static int filt_tmpfsread(struct knote *kn, long hint);
+static int filt_tmpfswrite(struct knote *kn, long hint);
+static int filt_tmpfsvnode(struct knote *kn, long hint);
+
+static struct filterops tmpfsread_filtops =
+       { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsread };
+static struct filterops tmpfswrite_filtops =
+       { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfswrite };
+static struct filterops tmpfsvnode_filtops =
+       { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
+
+static int
+tmpfs_kqfilter (struct vop_kqfilter_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct knote *kn = ap->a_kn;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               kn->kn_fop = &tmpfsread_filtops;
+               break;
+       case EVFILT_WRITE:
+               kn->kn_fop = &tmpfswrite_filtops;
+               break;
+       case EVFILT_VNODE:
+               kn->kn_fop = &tmpfsvnode_filtops;
+               break;
+       default:
+               return (EOPNOTSUPP);
+       }
+
+       kn->kn_hook = (caddr_t)vp;
+
+       knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
+
+       return(0);
+}
+
+static void
+filt_tmpfsdetach(struct knote *kn)
+{
+       struct vnode *vp = (void *)kn->kn_hook;
+
+       knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
+}
+
+static int
+filt_tmpfsread(struct knote *kn, long hint)
+{
+       struct vnode *vp = (void *)kn->kn_hook;
+       struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
+       off_t off;
+
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return(1);
+       }
+       off = node->tn_size - kn->kn_fp->f_offset;
+       kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
+       if (kn->kn_sfflags & NOTE_OLDAPI)
+               return(1);
+
+       /*
+        * Handle possible MP race interlock on filter check/write
+        */
+       if (kn->kn_data == 0) {
+               get_mplock();
+               kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
+               rel_mplock();
+       }
+       return (kn->kn_data != 0);
+}
+
+static int
+filt_tmpfswrite(struct knote *kn, long hint)
+{
+       if (hint == NOTE_REVOKE)
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+       kn->kn_data = 0;
+       return (1);
+}
+
+static int
+filt_tmpfsvnode(struct knote *kn, long hint)
+{
+       if (kn->kn_sfflags & hint)
+               kn->kn_fflags |= hint;
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= EV_EOF;
+               return (1);
+       }
+       return (kn->kn_fflags != 0);
+}
+
+
 /* --------------------------------------------------------------------- */
 
 /*
@@ -1557,8 +1636,8 @@ struct vop_ops tmpfs_vnode_vops = {
        .vop_reclaim =                  tmpfs_reclaim,
        .vop_print =                    tmpfs_print,
        .vop_pathconf =                 tmpfs_pathconf,
-//     .vop_bmap =                     tmpfs_bmap,
-       .vop_bmap =                     (void *)vop_eopnotsupp,
+       .vop_bmap =                     tmpfs_bmap,
        .vop_strategy =                 tmpfs_strategy,
        .vop_advlock =                  tmpfs_advlock,
+       .vop_kqfilter =                 tmpfs_kqfilter
 };