From: Matthew Dillon Date: Wed, 14 Feb 2018 07:02:18 +0000 (-0800) Subject: kernel - Try to fix delayed mtime updates on SHARED+RW maps again X-Git-Tag: v5.3.0~212 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/fa4a12c408e7819a51670d15087aadbf10659890 kernel - Try to fix delayed mtime updates on SHARED+RW maps again * Attempt to fix continuing problems with mtime updates related to modifications made via SHARED+RW mmap()s. As evidenced by builds under synth sometimes getting confused. * Don't update vp->v_lastwrite_ts in vop_stdclose(). Instead, update the field and set a new flag VLASTWRITETS upon mmap(SHARED+PROT_WRITE) or upon mprotect(PROT_WRITE) on a SHARED mmap. * Clear the flag on any regular write, utimes, or truncation. * Adjust various filesystems to update mtime from vp->v_lastwrite_ts only upon UIO_NOCOPY writes, and only if VLASTWRITETS is set. tmpfs, ufs, hammer, hammer2 adjusted. --- diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 776af1c72f..5a468b489f 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1207,7 +1207,6 @@ vop_stdclose(struct vop_close_args *ap) KASSERT(vp->v_writecount > 0, ("VOP_STDCLOSE: BAD WRITECOUNT %p %d", vp, vp->v_writecount)); - vfs_timestamp(&vp->v_lastwrite_ts); atomic_add_int(&vp->v_writecount, -1); } atomic_add_int(&vp->v_opencount, -1); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index c456f2cd7a..83b0bd4549 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -112,13 +112,9 @@ struct mountctl_opt { * deal with clustered cache coherency issues and, more immediately, to * protect operations associated with the kernel-managed journaling module. * - * Generally speaking modification timestamps for inodes get set as-of the - * time the write occurs. However, files mmap()'d R+W may not see the - * underlying pages get flushed until well after close(). In this situation, - * v_lastwrite_ts is set to the timestamp that the filesystem should use - * for such updates. VFS's should be aware that symlink data sometimes uses - * the file write mechanism to set the symlink content and v_writecount - * will probably be 0. v_lastwrite_ts should not be used in that situation. + * v_lastwrite_ts is set along with VLASTWRITETS when a file is mmap()'d + * SHARED+RW. VM page flushes, which can be delayed substantially from when + * the page was actually modified, will use the v_lastwrite_ts if available. * * NOTE: Certain fields within the vnode structure requires v_token to be * held. The vnode's normal lock need not be held when accessing @@ -225,7 +221,7 @@ struct vnode { /* open for business 0x00010000 */ /* open for business 0x00020000 */ #define VRECLAIMED 0x00040000 /* This vnode has been destroyed */ -/* open for business 0x00080000 */ +#define VLASTWRITETS 0x00080000 /* VLASTWRITETS updated */ #define VNOTSEEKABLE 0x00100000 /* rd/wr ignores file offset */ #define VONWORKLST 0x00200000 /* On syncer work-list */ #define VISDIRTY 0x00400000 /* inode dirty from VFS */ diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 6497f03b40..fe4015adff 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -531,12 +531,17 @@ hammer_vop_write(struct vop_write_args *ap) * Use v_lastwrite_ts if file not open for writing * (i.e. a late msync) */ - if (vp->v_writecount == 0) { - trans.time = vp->v_lastwrite_ts.tv_sec * 1000000 + - vp->v_lastwrite_ts.tv_nsec / 1000; + if (uio->uio_segflg == UIO_NOCOPY) { + if (vp->v_flag & VLASTWRITETS) { + trans.time = vp->v_lastwrite_ts.tv_sec * 1000000 + + vp->v_lastwrite_ts.tv_nsec / 1000; + } else { + trans.time = ip->ino_data.mtime; + } + } else { + vclrflags(vp, VLASTWRITETS); } - /* * Check append mode */ @@ -2281,6 +2286,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) ip->ino_data.mtime = trans.time; /* XXX safe to use SDIRTY instead of DDIRTY here? */ modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY; + vclrflags(ap->a_vp, VLASTWRITETS); /* * On-media truncation is cached in the inode until @@ -2325,6 +2331,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) ip->ino_data.size = vap->va_size; ip->ino_data.mtime = trans.time; modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY; + vclrflags(ap->a_vp, VLASTWRITETS); kflags |= NOTE_ATTRIB; break; default: @@ -2342,6 +2349,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) ip->ino_data.mtime = hammer_timespec_to_time(&vap->va_mtime); modflags |= HAMMER_INODE_MTIME; kflags |= NOTE_ATTRIB; + vclrflags(ap->a_vp, VLASTWRITETS); } if (vap->va_mode != (mode_t)VNOVAL) { mode_t cur_mode = ip->ino_data.mode; diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index d260720632..fb1788424c 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -422,6 +422,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) } hammer2_inode_modify(ip); ip->meta.mtime = ctime; + vclrflags(vp, VLASTWRITETS); break; default: error = EINVAL; @@ -455,6 +456,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) hammer2_inode_modify(ip); ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime); kflags |= NOTE_ATTRIB; + vclrflags(vp, VLASTWRITETS); } done: @@ -1087,16 +1089,20 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, hammer2_inode_chain_sync(ip); hammer2_mtx_unlock(&ip->lock); } else if (modified) { + struct vnode *vp = ip->vp; + hammer2_mtx_ex(&ip->lock); hammer2_inode_modify(ip); - if (ip->vp && ip->vp->v_writecount == 0 && - ip->vp->v_type == VREG) { - ip->meta.mtime = - (unsigned long)ip->vp->v_lastwrite_ts.tv_sec * - 1000000 + - ip->vp->v_lastwrite_ts.tv_nsec / 1000; + if (uio->uio_segflg == UIO_NOCOPY) { + if (vp->v_flag & VLASTWRITETS) { + ip->meta.mtime = + (unsigned long)vp->v_lastwrite_ts.tv_sec * + 1000000 + + vp->v_lastwrite_ts.tv_nsec / 1000; + } } else { hammer2_update_time(&ip->meta.mtime); + vclrflags(vp, VLASTWRITETS); } if (ip->flags & HAMMER2_INODE_MODIFIED) hammer2_inode_chain_sync(ip); diff --git a/sys/vfs/tmpfs/tmpfs.h b/sys/vfs/tmpfs/tmpfs.h index 3b730bb28b..9565f9fde9 100644 --- a/sys/vfs/tmpfs/tmpfs.h +++ b/sys/vfs/tmpfs/tmpfs.h @@ -177,12 +177,12 @@ struct tmpfs_node { mode_t tn_mode; int tn_flags; nlink_t tn_links; - int32_t tn_atime; - int32_t tn_atimensec; - int32_t tn_mtime; - int32_t tn_mtimensec; - int32_t tn_ctime; - int32_t tn_ctimensec; + long tn_atime; + long tn_atimensec; + long tn_mtime; + long tn_mtimensec; + long tn_ctime; + long tn_ctimensec; unsigned long tn_gen; struct lockf tn_advlock; diff --git a/sys/vfs/tmpfs/tmpfs_subr.c b/sys/vfs/tmpfs/tmpfs_subr.c index d608bd1339..e6a26c36af 100644 --- a/sys/vfs/tmpfs/tmpfs_subr.c +++ b/sys/vfs/tmpfs/tmpfs_subr.c @@ -1222,8 +1222,10 @@ tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) node->tn_status |= TMPFS_NODE_ACCESSED; - if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) + if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) { node->tn_status |= TMPFS_NODE_MODIFIED; + vclrflags(vp, VLASTWRITETS); + } TMPFS_NODE_UNLOCK(node); @@ -1268,8 +1270,10 @@ tmpfs_itimes(struct vnode *vp, const struct timespec *acc, node->tn_ctime = now.tv_sec; node->tn_ctimensec = now.tv_nsec; } - node->tn_status &= - ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); + + node->tn_status &= ~(TMPFS_NODE_ACCESSED | + TMPFS_NODE_MODIFIED | + TMPFS_NODE_CHANGED); TMPFS_NODE_UNLOCK(node); } diff --git a/sys/vfs/tmpfs/tmpfs_vnops.c b/sys/vfs/tmpfs/tmpfs_vnops.c index 47e477fab3..80e5b2846d 100644 --- a/sys/vfs/tmpfs/tmpfs_vnops.c +++ b/sys/vfs/tmpfs/tmpfs_vnops.c @@ -795,11 +795,14 @@ tmpfs_write(struct vop_write_args *ap) * order to be able to dispose of the buffer cache buffer without * flushing it. */ - if (vp->v_writecount) { - node->tn_status |= TMPFS_NODE_MODIFIED; + if (uio->uio_segflg == UIO_NOCOPY) { + if (vp->v_flag & VLASTWRITETS) { + node->tn_mtime = vp->v_lastwrite_ts.tv_sec; + node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec; + } } else { - node->tn_mtime = vp->v_lastwrite_ts.tv_sec; - node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec; + node->tn_status |= TMPFS_NODE_MODIFIED; + vclrflags(vp, VLASTWRITETS); } if (extended) diff --git a/sys/vfs/ufs/inode.h b/sys/vfs/ufs/inode.h index f04f6ff349..28636866e0 100644 --- a/sys/vfs/ufs/inode.h +++ b/sys/vfs/ufs/inode.h @@ -137,7 +137,7 @@ struct inode { #define IN_EXLOCK 0x0040 /* File has exclusive lock. */ #define IN_HASHED 0x0080 /* Inode is on hash list */ #define IN_LAZYMOD 0x0100 /* Modified, but don't write yet. */ -#define IN_WRITING 0x0200 /* Write in progress */ +#define IN_NOCOPYWRITE 0x0200 /* Special NOCOPY write */ #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) diff --git a/sys/vfs/ufs/ufs_readwrite.c b/sys/vfs/ufs/ufs_readwrite.c index a986f7d66c..03ca3c165f 100644 --- a/sys/vfs/ufs/ufs_readwrite.c +++ b/sys/vfs/ufs/ufs_readwrite.c @@ -282,7 +282,10 @@ ffs_write(struct vop_write_args *ap) if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) flags |= B_SYNC; - ip->i_flag |= IN_WRITING; + if (uio->uio_segflg == UIO_NOCOPY) + ip->i_flag |= IN_NOCOPYWRITE; + else + vclrflags(vp, VLASTWRITETS); for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); @@ -410,7 +413,8 @@ ffs_write(struct vop_write_args *ap) } else { ufs_itimes(vp); } - ip->i_flag &= ~IN_WRITING; + if (uio->uio_segflg == UIO_NOCOPY) + ip->i_flag &= ~IN_NOCOPYWRITE; return (error); } diff --git a/sys/vfs/ufs/ufs_vnops.c b/sys/vfs/ufs/ufs_vnops.c index 003f9a2955..73b02cff3f 100644 --- a/sys/vfs/ufs/ufs_vnops.c +++ b/sys/vfs/ufs/ufs_vnops.c @@ -168,13 +168,16 @@ ufs_itimes(struct vnode *vp) ip->i_ctimensec = ts.tv_nsec; } if (ip->i_flag & IN_UPDATE) { - if (vp->v_writecount == 0 && - (ip->i_flag & IN_WRITING) && - vp->v_type == VREG) { - ts = vp->v_lastwrite_ts; + if (ip->i_flag & IN_NOCOPYWRITE) { + if (vp->v_flag & VLASTWRITETS) { + ip->i_mtime = vp->v_lastwrite_ts.tv_sec; + ip->i_mtimensec = + vp->v_lastwrite_ts.tv_nsec; + } + } else { + ip->i_mtime = ts.tv_sec; + ip->i_mtimensec = ts.tv_nsec; } - ip->i_mtime = ts.tv_sec; - ip->i_mtimensec = ts.tv_nsec; ip->i_modrev++; } } @@ -476,6 +479,7 @@ ufs_setattr(struct vop_setattr_args *ap) if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_mtime = vap->va_mtime.tv_sec; ip->i_mtimensec = vap->va_mtime.tv_nsec; + vclrflags(vp, VLASTWRITETS); } error = ffs_update(vp, 0); if (error) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 764dc7fd14..188d5d8470 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1943,6 +1943,26 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_entry_release(count); return (KERN_PROTECTION_FAILURE); } + + /* + * When making a SHARED+RW file mmap writable, update + * v_lastwrite_ts. + */ + if (new_prot & PROT_WRITE && + (current->eflags & MAP_ENTRY_NEEDS_COPY) == 0 && + (current->maptype == VM_MAPTYPE_NORMAL || + current->maptype == VM_MAPTYPE_VPAGETABLE) && + current->object.vm_object && + current->object.vm_object->type == OBJT_VNODE) { + struct vnode *vp; + + vp = current->object.vm_object->handle; + if (vp && vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT) == 0) { + vfs_timestamp(&vp->v_lastwrite_ts); + vsetflags(vp, VLASTWRITETS); + vn_unlock(vp); + } + } current = current->next; } diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 44f3a90a72..27530b72b8 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -339,6 +339,8 @@ kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, * for it, bail out. Check for superuser, only if * we're at securelevel < 1, to allow the XIG X server * to continue to work. + * + * PROT_WRITE + MAP_SHARED */ if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) { if ((fp->f_flag & FWRITE) != 0) { @@ -349,6 +351,17 @@ kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) { maxprot |= VM_PROT_WRITE; + + /* + * SHARED+RW file mmap() + * updates v_lastwrite_ts. + */ + if ((prot & PROT_WRITE) && + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY) == 0) { + vfs_timestamp(&vp->v_lastwrite_ts); + vsetflags(vp, VLASTWRITETS); + vn_unlock(vp); + } } else if (prot & PROT_WRITE) { error = EPERM; goto done;