kernel -- tmpfs: Mark tmpfs_write MPSAFE.
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vnops.c
index 8c92866..5ae3183 100644 (file)
@@ -1,5 +1,3 @@
-/*     $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $    */
-
 /*-
  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
  * All rights reserved.
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
  */
 
 /*
  * tmpfs vnode interface.
  */
-#include <sys/cdefs.h>
 
 #include <sys/kernel.h>
 #include <sys/kern_syscall.h>
@@ -44,7 +43,6 @@
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
-#include <sys/sfbuf.h>
 #include <sys/stat.h>
 #include <sys/systm.h>
 #include <sys/unistd.h>
@@ -57,6 +55,7 @@
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
+#include <vm/swap_pager.h>
 
 #include <vfs/fifofs/fifo.h>
 #include <vfs/tmpfs/tmpfs_vnops.h>
 
 MALLOC_DECLARE(M_TMPFS);
 
+static __inline
+void
+tmpfs_knote(struct vnode *vp, int flags)
+{
+       if (flags)
+               KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
+}
+
+
 /* --------------------------------------------------------------------- */
 
 static int
@@ -156,6 +164,7 @@ tmpfs_ncreate(struct vop_ncreate_args *v)
        if (error == 0) {
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
+               tmpfs_knote(dvp, NOTE_WRITE);
        }
 
        return error;
@@ -180,6 +189,7 @@ tmpfs_nmknod(struct vop_nmknod_args *v)
        if (error == 0) {
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
+               tmpfs_knote(dvp, NOTE_WRITE);
        }
 
        return error;
@@ -250,7 +260,7 @@ tmpfs_access(struct vop_access_args *v)
        case VLNK:
                /* FALLTHROUGH */
        case VREG:
-               if (VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
+               if ((v->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
                        error = EROFS;
                        goto out;
                }
@@ -270,7 +280,7 @@ tmpfs_access(struct vop_access_args *v)
                goto out;
        }
 
-       if (VWRITE && node->tn_flags & IMMUTABLE) {
+       if ((v->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
                error = EPERM;
                goto out;
        }
@@ -293,6 +303,7 @@ tmpfs_getattr(struct vop_getattr_args *v)
 
        node = VP_TO_TMPFS_NODE(vp);
 
+       lwkt_gettoken(&vp->v_mount->mnt_token);
        tmpfs_update(vp);
 
        vap->va_type = vp->v_type;
@@ -320,6 +331,8 @@ tmpfs_getattr(struct vop_getattr_args *v)
        vap->va_bytes = round_page(node->tn_size);
        vap->va_filerev = 0;
 
+       lwkt_reltoken(&vp->v_mount->mnt_token);
+
        return 0;
 }
 
@@ -331,21 +344,33 @@ tmpfs_setattr(struct vop_setattr_args *v)
        struct vnode *vp = v->a_vp;
        struct vattr *vap = v->a_vap;
        struct ucred *cred = v->a_cred;
+       struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
        int error = 0;
+       int kflags = 0;
 
-       if (error == 0 && (vap->va_flags != VNOVAL))
+       if (error == 0 && (vap->va_flags != VNOVAL)) {
                error = tmpfs_chflags(vp, vap->va_flags, cred);
+               kflags |= NOTE_ATTRIB;
+       }
 
-       if (error == 0 && (vap->va_size != VNOVAL))
+       if (error == 0 && (vap->va_size != VNOVAL)) {
+               if (vap->va_size > node->tn_size)
+                       kflags |= NOTE_WRITE | NOTE_EXTEND;
+               else
+                       kflags |= NOTE_WRITE;
                error = tmpfs_chsize(vp, vap->va_size, cred);
+       }
 
        if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
                           vap->va_gid != (gid_t)VNOVAL)) {
                error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
+               kflags |= NOTE_ATTRIB;
        }
 
-       if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
+       if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
                error = tmpfs_chmod(vp, vap->va_mode, cred);
+               kflags |= NOTE_ATTRIB;
+       }
 
        if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
            vap->va_atime.tv_nsec != VNOVAL) ||
@@ -353,12 +378,14 @@ tmpfs_setattr(struct vop_setattr_args *v)
            vap->va_mtime.tv_nsec != VNOVAL) )) {
                error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
                                      vap->va_vaflags, cred);
+               kflags |= NOTE_ATTRIB;
        }
 
        /* Update the node times.  We give preference to the error codes
         * generated by this function rather than the ones that may arise
         * from tmpfs_update. */
        tmpfs_update(vp);
+       tmpfs_knote(vp, kflags);
 
        return error;
 }
@@ -403,7 +430,6 @@ tmpfs_read (struct vop_read_args *ap)
        off_t base_offset;
        size_t offset;
        size_t len;
-       int got_mplock;
        int error;
 
        error = 0;
@@ -418,40 +444,24 @@ tmpfs_read (struct vop_read_args *ap)
        if (vp->v_type != VREG)
                return (EINVAL);
 
-#ifdef SMP
-       if(curthread->td_mpcount)
-               got_mplock = -1;
-       else
-               got_mplock = 0;
-#else
-               got_mplock = -1;
-#endif
-
        while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
                /*
                 * Use buffer cache I/O (via tmpfs_strategy)
                 */
                offset = (size_t)uio->uio_offset & BMASK;
                base_offset = (off_t)uio->uio_offset - offset;
-               bp = getcacheblk(vp, base_offset);
+               bp = getcacheblk(vp, base_offset, BSIZE);
                if (bp == NULL)
                {
-                       if (got_mplock == 0) {
-                               got_mplock = 1;
-                               get_mplock();
-                       }
-
+                       lwkt_gettoken(&vp->v_mount->mnt_token);
                        error = bread(vp, base_offset, BSIZE, &bp);
                        if (error) {
                                brelse(bp);
+                               lwkt_reltoken(&vp->v_mount->mnt_token);
                                kprintf("tmpfs_read bread error %d\n", error);
                                break;
                        }
-               }
-
-               if (got_mplock == 0) {
-                       got_mplock = 1;
-                       get_mplock();
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
                }
 
                /*
@@ -471,9 +481,6 @@ tmpfs_read (struct vop_read_args *ap)
                }
        }
 
-       if (got_mplock > 0)
-               rel_mplock();
-
        TMPFS_NODE_LOCK(node);
        node->tn_status |= TMPFS_NODE_ACCESSED;
        TMPFS_NODE_UNLOCK(node);
@@ -496,8 +503,8 @@ tmpfs_write (struct vop_write_args *ap)
        size_t offset;
        size_t len;
        struct rlimit limit;
-       int got_mplock;
        int trivial = 0;
+       int kflags = 0;
 
        error = 0;
        if (uio->uio_resid == 0) {
@@ -509,6 +516,8 @@ tmpfs_write (struct vop_write_args *ap)
        if (vp->v_type != VREG)
                return (EINVAL);
 
+       lwkt_gettoken(&vp->v_mount->mnt_token);
+
        oldsize = node->tn_size;
        if (ap->a_ioflag & IO_APPEND)
                uio->uio_offset = node->tn_size;
@@ -517,15 +526,20 @@ tmpfs_write (struct vop_write_args *ap)
         * Check for illegal write offsets.
         */
        if (uio->uio_offset + uio->uio_resid >
-         VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
+         VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
+               lwkt_reltoken(&vp->v_mount->mnt_token);
                return (EFBIG);
+       }
 
        if (vp->v_type == VREG && td != NULL) {
                error = kern_getrlimit(RLIMIT_FSIZE, &limit);
-               if (error != 0)
+               if (error != 0) {
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
                        return error;
+               }
                if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
                        ksignal(td->td_proc, SIGXFSZ);
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
                        return (EFBIG);
                }
        }
@@ -536,16 +550,8 @@ tmpfs_write (struct vop_write_args *ap)
         */
        extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
 
-#ifdef SMP
-       if (curthread->td_mpcount) {
-               got_mplock = -1;
-       } else {
-               got_mplock = 1;
-               get_mplock();
-       }
-#else
-       got_mplock = -1;
-#endif
+       get_mplock();
+
        while (uio->uio_resid > 0) {
                /*
                 * Use buffer cache I/O (via tmpfs_strategy)
@@ -580,8 +586,11 @@ tmpfs_write (struct vop_write_args *ap)
                        break;
                }
 
-               if (uio->uio_offset > node->tn_size)
+               if (uio->uio_offset > node->tn_size) {
                        node->tn_size = uio->uio_offset;
+                       kflags |= NOTE_EXTEND;
+               }
+               kflags |= NOTE_WRITE;
 
                /*
                 * The data has been loaded into the buffer, write it out.
@@ -599,28 +608,38 @@ tmpfs_write (struct vop_write_args *ap)
                 * XXX unfortunately this catches msync() system calls too
                 * for the moment.
                 */
-               if (ap->a_ioflag & IO_SYNC) {
-                       bwrite(bp);
-               } else if ((ap->a_ioflag & IO_ASYNC) ||
-                        (uio->uio_segflg == UIO_NOCOPY)) {
-                       bawrite(bp);
-               } else {
+               if (vm_swap_size == 0) {
+                       /*
+                        * if swap isn't configured yet, force a buwrite() to
+                        * avoid problems further down the line, due to flushing
+                        * to swap.
+                        */
                        buwrite(bp);
+               } else {
+                       if (ap->a_ioflag & IO_SYNC) {
+                               bwrite(bp);
+                       } else if ((ap->a_ioflag & IO_ASYNC) ||
+                                (uio->uio_segflg == UIO_NOCOPY)) {
+                               bawrite(bp);
+                       } else {
+                               buwrite(bp);
+                       }
                }
 
                if (bp->b_error) {
-                       kprintf("tmpfs_write bwrite error %d\n", error);
+                       kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
                        break;
                }
        }
 
-       if (got_mplock > 0)
-               rel_mplock();
+       rel_mplock();
 
        if (error) {
-               if (extended)
+               if (extended) {
                        (void)tmpfs_reg_resize(vp, oldsize, trivial);
-               return error;
+                       kflags &= ~NOTE_EXTEND;
+               }
+               goto done;
        }
 
        TMPFS_NODE_LOCK(node);
@@ -632,7 +651,12 @@ tmpfs_write (struct vop_write_args *ap)
                        node->tn_mode &= ~(S_ISUID | S_ISGID);
        }
        TMPFS_NODE_UNLOCK(node);
+done:
+
+       tmpfs_knote(vp, kflags);
+
 
+       lwkt_reltoken(&vp->v_mount->mnt_token);
        return(error);
 }
 
@@ -647,7 +671,6 @@ tmpfs_advlock (struct vop_advlock_args *ap)
        return (lf_advlock(ap, &node->tn_advlock, node->tn_size));
 }
 
-
 static int
 tmpfs_strategy(struct vop_strategy_args *ap)
 {
@@ -665,6 +688,7 @@ tmpfs_strategy(struct vop_strategy_args *ap)
                return(0);
        }
 
+       lwkt_gettoken(&vp->v_mount->mnt_token);
        node = VP_TO_TMPFS_NODE(vp);
 
        uobj = node->tn_reg.tn_aobj;
@@ -675,6 +699,7 @@ tmpfs_strategy(struct vop_strategy_args *ap)
         */
        swap_pager_strategy(uobj, bio);
 
+       lwkt_reltoken(&vp->v_mount->mnt_token);
        return 0;
 }
 
@@ -753,7 +778,9 @@ tmpfs_nremove(struct vop_nremove_args *v)
 
        cache_setunresolved(v->a_nch);
        cache_setvp(v->a_nch, NULL);
+       tmpfs_knote(vp, NOTE_DELETE);
        /*cache_inval_vp(vp, CINV_DESTROY);*/
+       tmpfs_knote(dvp, NOTE_WRITE);
        error = 0;
 
 out:
@@ -824,8 +851,10 @@ tmpfs_nlink(struct vop_nlink_args *v)
        TMPFS_NODE_UNLOCK(node);
        tmpfs_update(vp);
 
+       tmpfs_knote(vp, NOTE_LINK);
        cache_setunresolved(v->a_nch);
        cache_setvp(v->a_nch, vp);
+       tmpfs_knote(dvp, NOTE_WRITE);
        error = 0;
 
 out:
@@ -928,7 +957,12 @@ tmpfs_nrename(struct vop_nrename_args *v)
         */
        if (fncp->nc_nlen != tncp->nc_nlen ||
            bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
-               newname = kmalloc(tncp->nc_nlen + 1, M_TMPFSNAME, M_WAITOK);
+               newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, 
+                                 M_WAITOK | M_NULLOK);
+               if (newname == NULL) {
+                       error = ENOSPC;
+                       goto out_locked;
+               }
                bcopy(tncp->nc_name, newname, tncp->nc_nlen);
                newname[tncp->nc_nlen] = '\0';
        } else {
@@ -998,6 +1032,7 @@ tmpfs_nrename(struct vop_nrename_args *v)
                /* Remove the old entry from the target directory. */
                de = tmpfs_dir_lookup(tdnode, tnode, tncp);
                tmpfs_dir_detach(tdnode, de);
+               tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
 
                /*
                 * Free the directory entry we just deleted.  Note that the
@@ -1012,10 +1047,14 @@ tmpfs_nrename(struct vop_nrename_args *v)
         * Finish up
         */
        if (newname) {
-               kfree(newname, M_TMPFSNAME);
+               kfree(newname, tmp->tm_name_zone);
                newname = NULL;
        }
        cache_rename(v->a_fnch, v->a_tnch);
+       tmpfs_knote(v->a_fdvp, NOTE_WRITE);
+       tmpfs_knote(v->a_tdvp, NOTE_WRITE);
+       if (fnode->tn_vnode)
+               tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
        error = 0;
 
 out_locked:
@@ -1049,6 +1088,7 @@ tmpfs_nmkdir(struct vop_nmkdir_args *v)
        if (error == 0) {
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
+               tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
        }
 
        return error;
@@ -1166,6 +1206,7 @@ tmpfs_nrmdir(struct vop_nrmdir_args *v)
        cache_setunresolved(v->a_nch);
        cache_setvp(v->a_nch, NULL);
        /*cache_inval_vp(vp, CINV_DESTROY);*/
+       tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
        error = 0;
 
 out:
@@ -1190,6 +1231,7 @@ tmpfs_nsymlink(struct vop_nsymlink_args *v)
        vap->va_type = VLNK;
        error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
        if (error == 0) {
+               tmpfs_knote(*vpp, NOTE_WRITE);
                cache_setunresolved(v->a_nch);
                cache_setvp(v->a_nch, *vpp);
        }
@@ -1461,6 +1503,106 @@ tmpfs_pathconf(struct vop_pathconf_args *v)
        return error;
 }
 
+/************************************************************************
+ *                          KQFILTER OPS                                *
+ ************************************************************************/
+
+static void filt_tmpfsdetach(struct knote *kn);
+static int filt_tmpfsread(struct knote *kn, long hint);
+static int filt_tmpfswrite(struct knote *kn, long hint);
+static int filt_tmpfsvnode(struct knote *kn, long hint);
+
+static struct filterops tmpfsread_filtops =
+       { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsread };
+static struct filterops tmpfswrite_filtops =
+       { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfswrite };
+static struct filterops tmpfsvnode_filtops =
+       { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
+
+static int
+tmpfs_kqfilter (struct vop_kqfilter_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct knote *kn = ap->a_kn;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               kn->kn_fop = &tmpfsread_filtops;
+               break;
+       case EVFILT_WRITE:
+               kn->kn_fop = &tmpfswrite_filtops;
+               break;
+       case EVFILT_VNODE:
+               kn->kn_fop = &tmpfsvnode_filtops;
+               break;
+       default:
+               return (EOPNOTSUPP);
+       }
+
+       kn->kn_hook = (caddr_t)vp;
+
+       knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
+
+       return(0);
+}
+
+static void
+filt_tmpfsdetach(struct knote *kn)
+{
+       struct vnode *vp = (void *)kn->kn_hook;
+
+       knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
+}
+
+static int
+filt_tmpfsread(struct knote *kn, long hint)
+{
+       struct vnode *vp = (void *)kn->kn_hook;
+       struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
+       off_t off;
+
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return(1);
+       }
+       off = node->tn_size - kn->kn_fp->f_offset;
+       kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
+       if (kn->kn_sfflags & NOTE_OLDAPI)
+               return(1);
+
+       /*
+        * Handle possible MP race interlock on filter check/write
+        */
+       if (kn->kn_data == 0) {
+               get_mplock();
+               kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
+               rel_mplock();
+       }
+       return (kn->kn_data != 0);
+}
+
+static int
+filt_tmpfswrite(struct knote *kn, long hint)
+{
+       if (hint == NOTE_REVOKE)
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+       kn->kn_data = 0;
+       return (1);
+}
+
+static int
+filt_tmpfsvnode(struct knote *kn, long hint)
+{
+       if (kn->kn_sfflags & hint)
+               kn->kn_fflags |= hint;
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= EV_EOF;
+               return (1);
+       }
+       return (kn->kn_fflags != 0);
+}
+
+
 /* --------------------------------------------------------------------- */
 
 /*
@@ -1497,4 +1639,5 @@ struct vop_ops tmpfs_vnode_vops = {
        .vop_bmap =                     tmpfs_bmap,
        .vop_strategy =                 tmpfs_strategy,
        .vop_advlock =                  tmpfs_advlock,
+       .vop_kqfilter =                 tmpfs_kqfilter
 };