kernel - Try to fix delayed mtime updates on SHARED+RW maps again
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 14 Feb 2018 07:02:18 +0000 (23:02 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 14 Feb 2018 07:02:18 +0000 (23:02 -0800)
* Attempt to fix continuing problems with mtime updates related to
  modifications made via SHARED+RW mmap()s.  As evidenced by builds
  under synth sometimes getting confused.

* Don't update vp->v_lastwrite_ts in vop_stdclose().  Instead, update
  the field and set a new flag VLASTWRITETS upon mmap(SHARED+PROT_WRITE)
  or upon mprotect(PROT_WRITE) on a SHARED mmap.

* Clear the flag on any regular write, utimes, or truncation.

* Adjust various filesystems to update mtime from vp->v_lastwrite_ts
  only upon UIO_NOCOPY writes, and only if VLASTWRITETS is set.

  tmpfs, ufs, hammer, hammer2 adjusted.

12 files changed:
sys/kern/vfs_default.c
sys/sys/vnode.h
sys/vfs/hammer/hammer_vnops.c
sys/vfs/hammer2/hammer2_vnops.c
sys/vfs/tmpfs/tmpfs.h
sys/vfs/tmpfs/tmpfs_subr.c
sys/vfs/tmpfs/tmpfs_vnops.c
sys/vfs/ufs/inode.h
sys/vfs/ufs/ufs_readwrite.c
sys/vfs/ufs/ufs_vnops.c
sys/vm/vm_map.c
sys/vm/vm_mmap.c

index 776af1c..5a468b4 100644 (file)
@@ -1207,7 +1207,6 @@ vop_stdclose(struct vop_close_args *ap)
                KASSERT(vp->v_writecount > 0,
                        ("VOP_STDCLOSE: BAD WRITECOUNT %p %d",
                        vp, vp->v_writecount));
-               vfs_timestamp(&vp->v_lastwrite_ts);
                atomic_add_int(&vp->v_writecount, -1);
        }
        atomic_add_int(&vp->v_opencount, -1);
index c456f2c..83b0bd4 100644 (file)
@@ -112,13 +112,9 @@ struct mountctl_opt {
  * deal with clustered cache coherency issues and, more immediately, to
  * protect operations associated with the kernel-managed journaling module.
  *
- * Generally speaking modification timestamps for inodes get set as-of the
- * time the write occurs.  However, files mmap()'d R+W may not see the
- * underlying pages get flushed until well after close().  In this situation,
- * v_lastwrite_ts is set to the timestamp that the filesystem should use
- * for such updates.  VFS's should be aware that symlink data sometimes uses
- * the file write mechanism to set the symlink content and v_writecount
- * will probably be 0.  v_lastwrite_ts should not be used in that situation.
+ * v_lastwrite_ts is set along with VLASTWRITETS when a file is mmap()'d
+ * SHARED+RW.  VM page flushes, which can be delayed substantially from when
+ * the page was actually modified, will use the v_lastwrite_ts if available.
  *
  * NOTE: Certain fields within the vnode structure requires v_token to be
  *      held.  The vnode's normal lock need not be held when accessing
@@ -225,7 +221,7 @@ struct vnode {
 /* open for business    0x00010000 */
 /* open for business    0x00020000 */
 #define        VRECLAIMED      0x00040000      /* This vnode has been destroyed */
-/* open for business   0x00080000 */
+#define VLASTWRITETS   0x00080000      /* VLASTWRITETS updated */
 #define VNOTSEEKABLE   0x00100000      /* rd/wr ignores file offset */
 #define        VONWORKLST      0x00200000      /* On syncer work-list */
 #define VISDIRTY       0x00400000      /* inode dirty from VFS */
index 6497f03..fe4015a 100644 (file)
@@ -531,12 +531,17 @@ hammer_vop_write(struct vop_write_args *ap)
         * Use v_lastwrite_ts if file not open for writing
         * (i.e. a late msync)
         */
-       if (vp->v_writecount == 0) {
-               trans.time = vp->v_lastwrite_ts.tv_sec * 1000000 +
-                            vp->v_lastwrite_ts.tv_nsec / 1000;
+       if (uio->uio_segflg == UIO_NOCOPY) {
+               if (vp->v_flag & VLASTWRITETS) {
+                       trans.time = vp->v_lastwrite_ts.tv_sec * 1000000 +
+                                    vp->v_lastwrite_ts.tv_nsec / 1000;
+               } else {
+                       trans.time = ip->ino_data.mtime;
+               }
+       } else {
+               vclrflags(vp, VLASTWRITETS);
        }
 
-
        /*
         * Check append mode
         */
@@ -2281,6 +2286,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                        ip->ino_data.mtime = trans.time;
                        /* XXX safe to use SDIRTY instead of DDIRTY here? */
                        modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY;
+                       vclrflags(ap->a_vp, VLASTWRITETS);
 
                        /*
                         * On-media truncation is cached in the inode until
@@ -2325,6 +2331,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                        ip->ino_data.size = vap->va_size;
                        ip->ino_data.mtime = trans.time;
                        modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY;
+                       vclrflags(ap->a_vp, VLASTWRITETS);
                        kflags |= NOTE_ATTRIB;
                        break;
                default:
@@ -2342,6 +2349,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                ip->ino_data.mtime = hammer_timespec_to_time(&vap->va_mtime);
                modflags |= HAMMER_INODE_MTIME;
                kflags |= NOTE_ATTRIB;
+               vclrflags(ap->a_vp, VLASTWRITETS);
        }
        if (vap->va_mode != (mode_t)VNOVAL) {
                mode_t   cur_mode = ip->ino_data.mode;
index d260720..fb17884 100644 (file)
@@ -422,6 +422,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                        }
                        hammer2_inode_modify(ip);
                        ip->meta.mtime = ctime;
+                       vclrflags(vp, VLASTWRITETS);
                        break;
                default:
                        error = EINVAL;
@@ -455,6 +456,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                hammer2_inode_modify(ip);
                ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime);
                kflags |= NOTE_ATTRIB;
+               vclrflags(vp, VLASTWRITETS);
        }
 
 done:
@@ -1087,16 +1089,20 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                        hammer2_inode_chain_sync(ip);
                hammer2_mtx_unlock(&ip->lock);
        } else if (modified) {
+               struct vnode *vp = ip->vp;
+
                hammer2_mtx_ex(&ip->lock);
                hammer2_inode_modify(ip);
-               if (ip->vp && ip->vp->v_writecount == 0 &&
-                   ip->vp->v_type == VREG) {
-                       ip->meta.mtime =
-                               (unsigned long)ip->vp->v_lastwrite_ts.tv_sec *
-                                1000000 +
-                               ip->vp->v_lastwrite_ts.tv_nsec / 1000;
+               if (uio->uio_segflg == UIO_NOCOPY) {
+                       if (vp->v_flag & VLASTWRITETS) {
+                               ip->meta.mtime =
+                                   (unsigned long)vp->v_lastwrite_ts.tv_sec *
+                                   1000000 +
+                                   vp->v_lastwrite_ts.tv_nsec / 1000;
+                       }
                } else {
                        hammer2_update_time(&ip->meta.mtime);
+                       vclrflags(vp, VLASTWRITETS);
                }
                if (ip->flags & HAMMER2_INODE_MODIFIED)
                        hammer2_inode_chain_sync(ip);
index 3b730bb..9565f9f 100644 (file)
@@ -177,12 +177,12 @@ struct tmpfs_node {
        mode_t                  tn_mode;
        int                     tn_flags;
        nlink_t                 tn_links;
-       int32_t                 tn_atime;
-       int32_t                 tn_atimensec;
-       int32_t                 tn_mtime;
-       int32_t                 tn_mtimensec;
-       int32_t                 tn_ctime;
-       int32_t                 tn_ctimensec;
+       long                    tn_atime;
+       long                    tn_atimensec;
+       long                    tn_mtime;
+       long                    tn_mtimensec;
+       long                    tn_ctime;
+       long                    tn_ctimensec;
        unsigned long           tn_gen;
        struct lockf            tn_advlock;
 
index d608bd1..e6a26c3 100644 (file)
@@ -1222,8 +1222,10 @@ tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
        if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
                node->tn_status |= TMPFS_NODE_ACCESSED;
 
-       if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
+       if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) {
                node->tn_status |= TMPFS_NODE_MODIFIED;
+               vclrflags(vp, VLASTWRITETS);
+       }
 
        TMPFS_NODE_UNLOCK(node);
 
@@ -1268,8 +1270,10 @@ tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
                node->tn_ctime = now.tv_sec;
                node->tn_ctimensec = now.tv_nsec;
        }
-       node->tn_status &=
-           ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
+
+       node->tn_status &= ~(TMPFS_NODE_ACCESSED |
+                            TMPFS_NODE_MODIFIED |
+                            TMPFS_NODE_CHANGED);
        TMPFS_NODE_UNLOCK(node);
 }
 
index 47e477f..80e5b28 100644 (file)
@@ -795,11 +795,14 @@ tmpfs_write(struct vop_write_args *ap)
         * order to be able to dispose of the buffer cache buffer without
         * flushing it.
         */
-       if (vp->v_writecount) {
-               node->tn_status |= TMPFS_NODE_MODIFIED;
+       if (uio->uio_segflg == UIO_NOCOPY) {
+               if (vp->v_flag & VLASTWRITETS) {
+                       node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
+                       node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
+               }
        } else {
-               node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
-               node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
+               node->tn_status |= TMPFS_NODE_MODIFIED;
+               vclrflags(vp, VLASTWRITETS);
        }
 
        if (extended)
index f04f6ff..2863686 100644 (file)
@@ -137,7 +137,7 @@ struct inode {
 #define        IN_EXLOCK       0x0040          /* File has exclusive lock. */
 #define        IN_HASHED       0x0080          /* Inode is on hash list */
 #define        IN_LAZYMOD      0x0100          /* Modified, but don't write yet. */
-#define IN_WRITING     0x0200          /* Write in progress */
+#define IN_NOCOPYWRITE 0x0200          /* Special NOCOPY write */
 
 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
 
index a986f7d..03ca3c1 100644 (file)
@@ -282,7 +282,10 @@ ffs_write(struct vop_write_args *ap)
        if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
                flags |= B_SYNC;
 
-       ip->i_flag |= IN_WRITING;
+       if (uio->uio_segflg == UIO_NOCOPY)
+               ip->i_flag |= IN_NOCOPYWRITE;
+       else
+               vclrflags(vp, VLASTWRITETS);
 
        for (error = 0; uio->uio_resid > 0;) {
                lbn = lblkno(fs, uio->uio_offset);
@@ -410,7 +413,8 @@ ffs_write(struct vop_write_args *ap)
        } else {
                ufs_itimes(vp);
        }
-       ip->i_flag &= ~IN_WRITING;
+       if (uio->uio_segflg == UIO_NOCOPY)
+               ip->i_flag &= ~IN_NOCOPYWRITE;
 
        return (error);
 }
index 003f9a2..73b02cf 100644 (file)
@@ -168,13 +168,16 @@ ufs_itimes(struct vnode *vp)
                        ip->i_ctimensec = ts.tv_nsec;
                }
                if (ip->i_flag & IN_UPDATE) {
-                       if (vp->v_writecount == 0 &&
-                           (ip->i_flag & IN_WRITING) &&
-                           vp->v_type == VREG) {
-                               ts = vp->v_lastwrite_ts;
+                       if (ip->i_flag & IN_NOCOPYWRITE) {
+                               if (vp->v_flag & VLASTWRITETS) {
+                                       ip->i_mtime = vp->v_lastwrite_ts.tv_sec;
+                                       ip->i_mtimensec =
+                                               vp->v_lastwrite_ts.tv_nsec;
+                               }
+                       } else {
+                               ip->i_mtime = ts.tv_sec;
+                               ip->i_mtimensec = ts.tv_nsec;
                        }
-                       ip->i_mtime = ts.tv_sec;
-                       ip->i_mtimensec = ts.tv_nsec;
                        ip->i_modrev++;
                }
        }
@@ -476,6 +479,7 @@ ufs_setattr(struct vop_setattr_args *ap)
                if (vap->va_mtime.tv_sec != VNOVAL) {
                        ip->i_mtime = vap->va_mtime.tv_sec;
                        ip->i_mtimensec = vap->va_mtime.tv_nsec;
+                       vclrflags(vp, VLASTWRITETS);
                }
                error = ffs_update(vp, 0);
                if (error)
index 764dc7f..188d5d8 100644 (file)
@@ -1943,6 +1943,26 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
                        vm_map_entry_release(count);
                        return (KERN_PROTECTION_FAILURE);
                }
+
+               /*
+                * When making a SHARED+RW file mmap writable, update
+                * v_lastwrite_ts.
+                */
+               if (new_prot & PROT_WRITE &&
+                   (current->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
+                   (current->maptype == VM_MAPTYPE_NORMAL ||
+                    current->maptype == VM_MAPTYPE_VPAGETABLE) &&
+                   current->object.vm_object &&
+                   current->object.vm_object->type == OBJT_VNODE) {
+                       struct vnode *vp;
+
+                       vp = current->object.vm_object->handle;
+                       if (vp && vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT) == 0) {
+                               vfs_timestamp(&vp->v_lastwrite_ts);
+                               vsetflags(vp, VLASTWRITETS);
+                               vn_unlock(vp);
+                       }
+               }
                current = current->next;
        }
 
index 44f3a90..27530b7 100644 (file)
@@ -339,6 +339,8 @@ kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
                         * for it, bail out.  Check for superuser, only if
                         * we're at securelevel < 1, to allow the XIG X server
                         * to continue to work.
+                        *
+                        * PROT_WRITE + MAP_SHARED
                         */
                        if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) {
                                if ((fp->f_flag & FWRITE) != 0) {
@@ -349,6 +351,17 @@ kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
                                        if ((va.va_flags &
                                            (IMMUTABLE|APPEND)) == 0) {
                                                maxprot |= VM_PROT_WRITE;
+
+                                               /*
+                                                * SHARED+RW file mmap()
+                                                * updates v_lastwrite_ts.
+                                                */
+                                               if ((prot & PROT_WRITE) &&
+                                                   vn_lock(vp, LK_EXCLUSIVE | LK_RETRY) == 0) {
+                                                       vfs_timestamp(&vp->v_lastwrite_ts);
+                                                       vsetflags(vp, VLASTWRITETS);
+                                                       vn_unlock(vp);
+                                               }
                                        } else if (prot & PROT_WRITE) {
                                                error = EPERM;
                                                goto done;