hammer2 - Fix inode & chain limits, improve flush pipeline.
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 30 Jan 2020 23:40:01 +0000 (15:40 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 30 Jan 2020 23:40:01 +0000 (15:40 -0800)
* Reorganize VFS_MODIFYING() to avoid certain deadlock conditions and
  adjust hammer2 to unconditionally stall in VFS_MODIFYING() when dirty
  limits are exceeded.

  Make sure VFS_MODIFYING() is called in all appropriate filesystem-
  modifying paths.

  This ensures that inode and chain structure allocation limits are
  adhered to.

* Fix hammer2's wakeup code for the dirty inode count hystereis.  This
  fixes a situation where stalls due to excessive dirty inodes were waiting
  a full second before resuming operation based on the dirty count
  hysteresis.

  The hysteresis now works as intended:

  (1) Trigger a sync when the dirty count reache 50% N.
  (2) Stall the frontend when the dirty count reaches 100% N.
  (3) Resume the frontend when the diirty count drops to 66% N.

* Fix trigger_syncer() to guarantee that the syncer will flush the
  filesystem ASAP when called.  If the filesystem is already in a flush,
  it will be flushed again.

  Previously if the filesystem was already in a flush it would wait one
  second before flushing again, which significantly reduces performance
  under conditions where the dirty chain limit or the dirty inode limit is
  constantly being hit (e.g. chown -R, etc).

Reported-by: tuxillo
15 files changed:
sys/kern/kern_fp.c
sys/kern/vfs_default.c
sys/kern/vfs_sync.c
sys/kern/vfs_syscalls.c
sys/kern/vfs_vnops.c
sys/sys/mount.h
sys/sys/vnode.h
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_strategy.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c
sys/vfs/nfs/nfs_serv.c
sys/vfs/nullfs/null_vfsops.c

index 198d397..0a468e9 100644 (file)
@@ -156,7 +156,7 @@ fp_vpopen(struct vnode *vp, int flags, file_t *fpp)
            error = EISDIR;
            goto bad2;
        }
-       error = vn_writechk(vp, NULL);
+       error = vn_writechk(vp);
        if (error)
            goto bad2;
        vmode |= VWRITE;
index c05b3a6..d3bfe1d 100644 (file)
@@ -1488,9 +1488,11 @@ vfs_stdncpgen_test(struct mount *mp, struct namecache *ncp)
        return 0;
 }
 
-void
+int
 vfs_stdmodifying(struct mount *mp)
 {
-       /* do nothing */
+       if (mp->mnt_flag & MNT_RDONLY)
+               return EROFS;
+       return 0;
 }
 /* end of vfs default ops */
index 5291bf3..235d703 100644 (file)
@@ -389,7 +389,8 @@ syncer_thread(void *_ctx)
 
                /*
                 * If syncer_trigger is set (from trigger_syncer(mp)),
-                * Immediately do a full filesystem sync.
+                * Immediately do a full filesystem sync and set up the
+                * following full filesystem sync to occur in 1 second.
                 */
                if (ctx->syncer_trigger) {
                        ctx->syncer_trigger = 0;
@@ -406,6 +407,9 @@ syncer_thread(void *_ctx)
                        }
                }
 
+               /*
+                * FSYNC items in this bucket
+                */
                while ((vp = LIST_FIRST(slp)) != NULL) {
                        vn_syncer_add(vp, retrydelay);
                        if (ctx->syncer_forced) {
@@ -424,7 +428,8 @@ syncer_thread(void *_ctx)
                }
 
                /*
-                * Increment the slot upon completion.
+                * Increment the slot upon completion.  This is typically
+                * one-second but may be faster if the syncer is triggered.
                 */
                ctx->syncer_delayno = (ctx->syncer_delayno + 1) &
                                      ctx->syncer_mask;
@@ -466,15 +471,17 @@ syncer_thread(void *_ctx)
                }
 
                /*
-                * If it has taken us less than a second to process the
-                * current work, then wait. Otherwise start right over
-                * again. We can still lose time if any single round
-                * takes more than two seconds, but it does not really
-                * matter as we are just trying to generally pace the
-                * filesystem activity.
+                * Normal syncer operation iterates once a second, unless
+                * specifically triggered.
                 */
-               if (time_uptime == starttime)
-                       tsleep(ctx, 0, "syncer", hz);
+               if (time_uptime == starttime &&
+                   ctx->syncer_trigger == 0) {
+                       tsleep_interlock(ctx, 0);
+                       if (time_uptime == starttime &&
+                           ctx->syncer_trigger == 0) {
+                               tsleep(ctx, PINTERLOCKED, "syncer", hz);
+                       }
+               }
        }
 
        /*
index 41cad6e..1df6ed6 100644 (file)
@@ -2859,6 +2859,8 @@ kern_access(struct nlookupdata *nd, int amode, int flags)
        nd->nl_flags |= NLC_SHAREDLOCK;
        if ((error = nlookup(nd)) != 0)
                return (error);
+       if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0)
+               return (error);
 retry:
        error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
        if (error)
@@ -2874,8 +2876,9 @@ retry:
                if (amode & X_OK)
                        mode |= VEXEC;
                if ((mode & VWRITE) == 0 || 
-                   (error = vn_writechk(vp, &nd->nl_nch)) == 0)
+                   (error = vn_writechk(vp)) == 0) {
                        error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
+               }
 
                /*
                 * If the file handle is stale we have to re-resolve the
@@ -3852,7 +3855,7 @@ kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags)
                return (error);
        if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
                return (error);
-       if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
+       if ((error = vn_writechk(vp)) == 0) {
                error = vget(vp, LK_EXCLUSIVE);
                if (error == 0) {
                        error = setutimes(vp, &vattr, newts, nullflag);
@@ -3928,7 +3931,7 @@ kern_truncate(struct nlookupdata *nd, off_t length)
                old_size = vattr.va_size;
        }
 
-       if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
+       if ((error = vn_writechk(vp)) == 0) {
                VATTR_NULL(&vattr);
                vattr.va_size = length;
                error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
@@ -4003,7 +4006,7 @@ kern_ftruncate(int fd, off_t length)
                old_size = vattr.va_size;
        }
 
-       if ((error = vn_writechk(vp, NULL)) == 0) {
+       if ((error = vn_writechk(vp)) == 0) {
                VATTR_NULL(&vattr);
                vattr.va_size = length;
                error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp);
@@ -4709,7 +4712,7 @@ sys_fhopen(struct fhopen_args *uap)
                        error = EISDIR;
                        goto bad;
                }
-               error = vn_writechk(vp, NULL);
+               error = vn_writechk(vp);
                if (error)
                        goto bad;
                mode |= VWRITE;
index 6280e9a..4865993 100644 (file)
@@ -161,12 +161,23 @@ vn_open(struct nlookupdata *nd, struct file *fp, int fmode, int cmode)
        /*
         * split case to allow us to re-resolve and retry the ncp in case
         * we get ESTALE.
+        *
+        * (error is 0 on entry / retry)
         */
 again:
+       /*
+        * Checks for (likely) filesystem-modifying cases and allows
+        * the filesystem to stall the front-end.
+        */
+       if ((fmode & (FWRITE | O_TRUNC)) ||
+           ((fmode & O_CREAT) && nd->nl_nch.ncp->nc_vp == NULL)) {
+               error = ncp_writechk(&nd->nl_nch);
+               if (error)
+                       return error;
+       }
+
        if (fmode & O_CREAT) {
                if (nd->nl_nch.ncp->nc_vp == NULL) {
-                       if ((error = ncp_writechk(&nd->nl_nch)) != 0)
-                               return (error);
                        VATTR_NULL(vap);
                        vap->va_type = VREG;
                        vap->va_mode = cmode;
@@ -223,7 +234,12 @@ again:
                                error = EISDIR;
                                goto bad;
                        }
-                       error = vn_writechk(vp, &nd->nl_nch);
+
+                       /*
+                        * Additional checks on vnode (does not substitute
+                        * for ncp_writechk()).
+                        */
+                       error = vn_writechk(vp);
                        if (error) {
                                /*
                                 * Special stale handling, re-resolve the
@@ -367,10 +383,15 @@ vn_opendisk(const char *devname, int fmode, struct vnode **vpp)
 }
 
 /*
- * Check for write permissions on the specified vnode.  nch may be NULL.
+ * Checks for special conditions on the vnode which might prevent writing
+ * after the vnode has (likely) been locked.  The vnode might or might not
+ * be locked as of this call, but will be at least referenced.
+ *
+ * Also re-checks the mount RDONLY flag that ncp_writechk() checked prior
+ * to the vnode being locked.
  */
 int
-vn_writechk(struct vnode *vp, struct nchandle *nch)
+vn_writechk(struct vnode *vp)
 {
        /*
         * If there's shared text associated with
@@ -379,18 +400,9 @@ vn_writechk(struct vnode *vp, struct nchandle *nch)
         */
        if (vp->v_flag & VTEXT)
                return (ETXTBSY);
-
-       /*
-        * If the vnode represents a regular file, check the mount
-        * point via the nch.  This may be a different mount point
-        * then the one embedded in the vnode (e.g. nullfs).
-        *
-        * We can still write to non-regular files (e.g. devices)
-        * via read-only mounts.
-        */
-       if (nch && nch->ncp && vp->v_type == VREG)
-               return (ncp_writechk(nch));
-       return (0);
+       if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
+               return (EROFS);
+       return 0;
 }
 
 /*
@@ -398,6 +410,8 @@ vn_writechk(struct vnode *vp, struct nchandle *nch)
  * referenced by the namecache may be different from the mount point
  * used by the underlying vnode in the case of NULLFS, so a separate
  * check is needed.
+ *
+ * Must be called PRIOR to any vnodes being locked.
  */
 int
 ncp_writechk(struct nchandle *nch)
index b9f26e6..c8327a4 100644 (file)
@@ -572,7 +572,7 @@ typedef void vfs_account_t(struct mount *mp,
                        uid_t uid, gid_t gid, int64_t delta);
 typedef void vfs_ncpgen_set_t(struct mount *mp, struct namecache *ncp);
 typedef int vfs_ncpgen_test_t(struct mount *mp, struct namecache *ncp);
-typedef void vfs_modifying_t(struct mount *mp);
+typedef int vfs_modifying_t(struct mount *mp);
 
 int vfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred);
 int vfs_start(struct mount *mp, int flags);
index faae1ce..3303bd9 100644 (file)
@@ -511,7 +511,7 @@ void        vfs_timestamp (struct timespec *);
 size_t vfs_flagstostr(int flags, const struct mountctl_opt *optp, char *buf, size_t len, int *errorp);
 void   vn_mark_atime(struct vnode *vp, struct thread *td);
 int    vfs_inodehashsize(void);
-int    vn_writechk (struct vnode *vp, struct nchandle *nch);
+int    vn_writechk (struct vnode *vp);
 int    ncp_writechk(struct nchandle *nch);
 int    vop_stdopen (struct vop_open_args *ap);
 int    vop_stdclose (struct vop_close_args *ap);
index 3397da3..5537cac 100644 (file)
@@ -1662,7 +1662,7 @@ int hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name,
 
 void hammer2_pfs_memory_wait(hammer2_pfs_t *pmp);
 void hammer2_pfs_memory_inc(hammer2_pfs_t *pmp);
-void hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp);
+void hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp, int count);
 
 void hammer2_base_delete(hammer2_chain_t *parent,
                                hammer2_blockref_t *base, int count,
index 79b9d58..b1d52aa 100644 (file)
@@ -593,7 +593,7 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth)
                        atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
                        atomic_add_long(&hammer2_count_modified_chains, -1);
                        if (chain->pmp)
-                               hammer2_pfs_memory_wakeup(chain->pmp);
+                               hammer2_pfs_memory_wakeup(chain->pmp, -1);
                }
                /* spinlock still held */
        }
@@ -1850,8 +1850,10 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                                                 HAMMER2_CHAIN_MODIFIED);
                                atomic_add_long(&hammer2_count_modified_chains,
                                                -1);
-                               if (chain->pmp)
-                                       hammer2_pfs_memory_wakeup(chain->pmp);
+                               if (chain->pmp) {
+                                       hammer2_pfs_memory_wakeup(
+                                               chain->pmp, -1);
+                               }
                                hammer2_freemap_adjust(hmp, &chain->bref,
                                                HAMMER2_FREEMAP_DORECOVER);
                                atomic_set_int(&chain->flags,
@@ -1905,7 +1907,7 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                        atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
                        atomic_add_long(&hammer2_count_modified_chains, -1);
                        if (chain->pmp)
-                               hammer2_pfs_memory_wakeup(chain->pmp);
+                               hammer2_pfs_memory_wakeup(chain->pmp, -1);
                }
                if (setupdate) {
                        atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
index d6466cf..b482b4e 100644 (file)
@@ -727,7 +727,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                 * be retired.
                 */
                if (chain->pmp)
-                       hammer2_pfs_memory_wakeup(chain->pmp);
+                       hammer2_pfs_memory_wakeup(chain->pmp, -1);
 
 #if 0
                if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0 &&
index 8f3d7d6..e972eba 100644 (file)
@@ -1505,7 +1505,7 @@ hammer2_dedup_record(hammer2_chain_t *chain, hammer2_io_t *dio,
                atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
                atomic_add_long(&hammer2_count_modified_chains, -1);
                if (chain->pmp)
-                       hammer2_pfs_memory_wakeup(chain->pmp);
+                       hammer2_pfs_memory_wakeup(chain->pmp, -1);
        }
 #endif
 }
index 122cc7e..730fce5 100644 (file)
@@ -224,7 +224,7 @@ static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp,
 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp);
 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
                                int *exflagsp, struct ucred **credanonp);
-static void hammer2_vfs_modifying(struct mount *mp);
+static int hammer2_vfs_modifying(struct mount *mp);
 
 static int hammer2_install_volume_header(hammer2_dev_t *hmp);
 #if 0
@@ -1852,7 +1852,7 @@ again:
        if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) {
                atomic_add_long(&hammer2_count_modified_chains, -1);
                atomic_clear_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED);
-               hammer2_pfs_memory_wakeup(hmp->vchain.pmp);
+               hammer2_pfs_memory_wakeup(hmp->vchain.pmp, -1);
        }
        if (hmp->vchain.flags & HAMMER2_CHAIN_UPDATE) {
                atomic_clear_int(&hmp->vchain.flags, HAMMER2_CHAIN_UPDATE);
@@ -1861,7 +1861,7 @@ again:
        if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) {
                atomic_add_long(&hammer2_count_modified_chains, -1);
                atomic_clear_int(&hmp->fchain.flags, HAMMER2_CHAIN_MODIFIED);
-               hammer2_pfs_memory_wakeup(hmp->fchain.pmp);
+               hammer2_pfs_memory_wakeup(hmp->fchain.pmp, -1);
        }
        if (hmp->fchain.flags & HAMMER2_CHAIN_UPDATE) {
                atomic_clear_int(&hmp->fchain.flags, HAMMER2_CHAIN_UPDATE);
@@ -2471,6 +2471,7 @@ hammer2_vfs_sync_pmp(hammer2_pfs_t *pmp, int waitfor)
        struct vnode *vp;
        uint32_t pass2;
        int error;
+       int wakecount;
        int dorestart;
 
        mp = pmp->mp;
@@ -2516,6 +2517,7 @@ restart:
         */
        hammer2_spin_ex(&pmp->list_spin);
        depend_next = TAILQ_FIRST(&pmp->depq);
+       wakecount = 0;
 
        while ((depend = depend_next) != NULL) {
                depend_next = TAILQ_NEXT(depend, entry);
@@ -2527,8 +2529,12 @@ restart:
                        atomic_clear_int(&ip->flags, HAMMER2_INODE_SIDEQ);
                        ip->depend = NULL;
                }
+
+               /*
+                * NOTE: pmp->sideq_count includes both sideq and syncq
+                */
                TAILQ_CONCAT(&pmp->syncq, &depend->sideq, entry);
-               pmp->sideq_count -= depend->count;
+
                depend->count = 0;
                depend->pass2 = 0;
                TAILQ_REMOVE(&pmp->depq, depend, entry);
@@ -2543,8 +2549,7 @@ restart:
         * sideq_count may have dropped enough to allow us to unstall
         * the frontend.
         */
-       hammer2_pfs_memory_inc(pmp);
-       hammer2_pfs_memory_wakeup(pmp);
+       hammer2_pfs_memory_wakeup(pmp, 0);
 
        /*
         * Now run through all inodes on syncq.
@@ -2571,9 +2576,19 @@ restart:
                        continue;
                }
                TAILQ_REMOVE(&pmp->syncq, ip, entry);
+               --pmp->sideq_count;
                hammer2_spin_unex(&pmp->list_spin);
+
+               /*
+                * Tickle anyone waiting on ip->flags or the hysteresis
+                * on the dirty inode count.
+                */
                if (pass2 & HAMMER2_INODE_SYNCQ_WAKEUP)
                        wakeup(&ip->flags);
+               if (++wakecount >= hammer2_limit_dirty_inodes / 20 + 1) {
+                       wakecount = 0;
+                       hammer2_pfs_memory_wakeup(pmp, 0);
+               }
 
                /*
                 * Relock the inode, and we inherit a ref from the above.
@@ -2737,6 +2752,8 @@ restart:
                hammer2_spin_ex(&pmp->list_spin);
        }
        hammer2_spin_unex(&pmp->list_spin);
+       hammer2_pfs_memory_wakeup(pmp, 0);
+
        if (dorestart || (pmp->trans.flags & HAMMER2_TRANS_RESCAN)) {
 #ifdef HAMMER2_DEBUG_SYNC
                kprintf("FILESYSTEM SYNC STAGE 1 RESTART\n");
@@ -3071,15 +3088,17 @@ hammer2_lwinprog_wait(hammer2_pfs_t *pmp, int flush_pipe)
  * If the level continues to build up, we stall, waiting for it to drop,
  * with some hysteresis.
  *
- * We limit the stall to two seconds per call.
- *
  * This relies on the kernel calling hammer2_vfs_modifying() prior to
  * obtaining any vnode locks before making a modifying VOP call.
  */
-static void
+static int
 hammer2_vfs_modifying(struct mount *mp)
 {
+       if (mp->mnt_flag & MNT_RDONLY)
+               return EROFS;
        hammer2_pfs_memory_wait(MPTOPMP(mp));
+
+       return 0;
 }
 
 /*
@@ -3090,12 +3109,13 @@ void
 hammer2_pfs_memory_wait(hammer2_pfs_t *pmp)
 {
        uint32_t waiting;
-       int loops;
+       int pcatch;
+       int error;
 
        if (pmp == NULL || pmp->mp == NULL)
                return;
 
-       for (loops = 0; loops < 2; ++loops) {
+       for (;;) {
                waiting = pmp->inmem_dirty_chains & HAMMER2_DIRTYCHAIN_MASK;
                cpu_ccfence();
 
@@ -3116,7 +3136,10 @@ hammer2_pfs_memory_wait(hammer2_pfs_t *pmp)
                    pmp->sideq_count < hammer2_limit_dirty_inodes) {
                        break;
                }
-               tsleep_interlock(&pmp->inmem_dirty_chains, 0);
+
+               pcatch = curthread->td_proc ? PCATCH : 0;
+
+               tsleep_interlock(&pmp->inmem_dirty_chains, pcatch);
                atomic_set_int(&pmp->inmem_dirty_chains,
                               HAMMER2_DIRTYCHAIN_WAITING);
                if (waiting < hammer2_limit_dirty_chains &&
@@ -3124,32 +3147,24 @@ hammer2_pfs_memory_wait(hammer2_pfs_t *pmp)
                        break;
                }
                trigger_syncer(pmp->mp);
-               tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, "h2memw", hz);
-#if 0
-               limit = pmp->mp->mnt_nvnodelistsize / 10;
-               if (limit < hammer2_limit_dirty_chains)
-                       limit = hammer2_limit_dirty_chains;
-               if (limit < 1000)
-                       limit = 1000;
-#endif
-       }
-}
-
-void
-hammer2_pfs_memory_inc(hammer2_pfs_t *pmp)
-{
-       if (pmp) {
-               atomic_add_int(&pmp->inmem_dirty_chains, 1);
+               error = tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED | pcatch,
+                              "h2memw", hz);
+               if (error == ERESTART)
+                       break;
        }
 }
 
+/*
+ * Wake up any stalled frontend ops waiting, with hysteresis, using
+ * 2/3 of the limit.
+ */
 void
-hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp)
+hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp, int count)
 {
        uint32_t waiting;
 
        if (pmp) {
-               waiting = atomic_fetchadd_int(&pmp->inmem_dirty_chains, -1);
+               waiting = atomic_fetchadd_int(&pmp->inmem_dirty_chains, count);
                /* don't need --waiting to test flag */
 
                if ((waiting & HAMMER2_DIRTYCHAIN_WAITING) &&
@@ -3163,6 +3178,14 @@ hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp)
        }
 }
 
+void
+hammer2_pfs_memory_inc(hammer2_pfs_t *pmp)
+{
+       if (pmp) {
+               atomic_add_int(&pmp->inmem_dirty_chains, 1);
+       }
+}
+
 /*
  * Returns 0 if the filesystem has tons of free space
  * Returns 1 if the filesystem has less than 10% remaining
index 6e0387d..78bef0b 100644 (file)
@@ -381,7 +381,6 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                return (ENOSPC);
        }
 
-       /*hammer2_pfs_memory_wait(ip->pmp);*/
        hammer2_trans_init(ip->pmp, 0);
        hammer2_inode_lock(ip, 0);
        error = 0;
@@ -824,7 +823,6 @@ hammer2_vop_write(struct vop_write_args *ap)
        if (uio->uio_segflg == UIO_NOCOPY) {
                hammer2_trans_init(ip->pmp, HAMMER2_TRANS_BUFCACHE);
        } else {
-               /*hammer2_pfs_memory_wait(ip->pmp);*/
                hammer2_trans_init(ip->pmp, 0);
        }
        error = hammer2_write_file(ip, uio, ioflag, seqcount);
@@ -1406,7 +1404,6 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
 
-       /*hammer2_pfs_memory_wait(dip->pmp);*/
        hammer2_trans_init(dip->pmp, 0);
 
        inum = hammer2_trans_newinum(dip->pmp);
@@ -1542,7 +1539,6 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
         */
        ip = VTOI(ap->a_vp);
        KASSERT(ip->pmp, ("ip->pmp is NULL %p %p", ip, ip->pmp));
-       /*hammer2_pfs_memory_wait(ip->pmp);*/
        hammer2_trans_init(ip->pmp, 0);
 
        /*
@@ -1618,7 +1614,6 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       /*hammer2_pfs_memory_wait(dip->pmp);*/
        hammer2_trans_init(dip->pmp, 0);
 
        inum = hammer2_trans_newinum(dip->pmp);
@@ -1701,7 +1696,6 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       /*hammer2_pfs_memory_wait(dip->pmp);*/
        hammer2_trans_init(dip->pmp, 0);
 
        /*
@@ -1779,7 +1773,6 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       /*hammer2_pfs_memory_wait(dip->pmp);*/
        hammer2_trans_init(dip->pmp, 0);
 
        ap->a_vap->va_type = VLNK;      /* enforce type */
@@ -1902,7 +1895,6 @@ hammer2_vop_nremove(struct vop_nremove_args *ap)
                }
        }
 
-       /*hammer2_pfs_memory_wait(dip->pmp);*/
        hammer2_trans_init(dip->pmp, 0);
        hammer2_inode_lock(dip, 0);
 
@@ -2002,7 +1994,6 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
                return (ENOSPC);
 #endif
 
-       /*hammer2_pfs_memory_wait(dip->pmp);*/
        hammer2_trans_init(dip->pmp, 0);
        hammer2_inode_lock(dip, 0);
 
@@ -2100,7 +2091,6 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
        tname = tncp->nc_name;
        tname_len = tncp->nc_nlen;
 
-       /*hammer2_pfs_memory_wait(tdip->pmp);*/
        hammer2_trans_init(tdip->pmp, 0);
 
        update_tdip = 0;
index 1f0ff3d..d6bfd59 100644 (file)
@@ -3966,7 +3966,8 @@ nfsmout:
 /*
  * Perform access checking for vnodes obtained from file handles that would
  * refer to files already opened by a Unix client. You cannot just use
- * vn_writechk() and VOP_ACCESS() for two reasons.
+ * vn_writechk() and VOP_ACCESS() for two reasons:
+ *
  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
  * 2 - The owner is to be given access irrespective of mode bits for some
  *     operations, so that processes that chmod after opening a file don't
index f6989fd..45ba32b 100644 (file)
@@ -394,6 +394,20 @@ nullfs_ncpgen_test(struct mount *mp, struct namecache *ncp)
        return VFS_NCPGEN_TEST(xmp->nullm_vfs, ncp);
 }
 
+static int
+nullfs_modifying(struct mount *mp)
+{
+       struct null_mount *xmp = MOUNTTONULLMOUNT(mp);
+       int error;
+
+       if (mp->mnt_flag & MNT_RDONLY)
+               error = EROFS;
+       else if (xmp->nullm_vfs)
+               error = VFS_MODIFYING(xmp->nullm_vfs);
+       else
+               error = 0;
+       return error;
+}
 
 static struct vfsops null_vfsops = {
        .vfs_mount =            nullfs_mount,
@@ -406,7 +420,8 @@ static struct vfsops null_vfsops = {
        .vfs_vptofh =           nullfs_vptofh,
        .vfs_ncpgen_set =       nullfs_ncpgen_set,
        .vfs_ncpgen_test =      nullfs_ncpgen_test,
-       .vfs_checkexp =         nullfs_checkexp
+       .vfs_checkexp =         nullfs_checkexp,
+       .vfs_modifying =        nullfs_modifying
 };
 
 VFS_SET(null_vfsops, null, VFCF_LOOPBACK | VFCF_MPSAFE);