From: Matthew Dillon Date: Wed, 4 Oct 2017 03:06:04 +0000 (-0700) Subject: kernel - KVABIO stabilization X-Git-Tag: v5.3.0~1035 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/b3f55d8892ebb4d588921f0e08b1774035fa5ffa kernel - KVABIO stabilization * bp->b_cpumask must be cleared in vfs_vmio_release(). * Generally speaking, it is generally desireable for the kernel to set B_KVABIO when flushing or disposing of a buffer, as long as b_cpumask is also correct. This avoids unnecessary synchronization when underlying device drivers support KVABIO, even if the filesystem does not. * In findblk() we cannot just gratuitously clear B_KVABIO. We must issue a bkvasync_all() to clear the flag in order to ensure proper synchronization with the caller's desired B_KVABIO state. * It was intended that bkvasync_all() clear the B_KVABIO flag. Make sure it does. * In contrast, B_KVABIO can always be set at any time, so long as the cpumask is cleared whenever the mappings are changed, and also as long as the caller's B_KVABIO state is respected if the buffer is later returned to the caller in a locked state. If the buffer will simply be disposed of by the kernel instead, the flag can be set. The wrapper (typically a vn_strategy() or dev_dstrategy() call) will clear the flag via bkvasync_all() if the target does not support KVABIO. * Kernel support code outside of filesystem and device drivers is expected to support KVABIO. * nvtruncbuf() and nvextendbuf() now use bread_kvabio() (i.e. they now properly support KVABIO). * The buf_countdeps(), buf_checkread(), and buf_checkwrite() callbacks call bkvasync_all() in situations where the vnode does not support KVABIO. This is because the kernel might have set the flag for other incidental operations even if the filesystem did not. * As per above, devfs_spec_strategy() now sets B_KVABIO and properly calls bkvasync() when it needs to operate directly on buf->b_data. * Fix bug in tmpfs(). tmpfs() was using bread_kvabio() as intended, but failed to call bkvasync() prior to operating directly on buf->b_data (prior to calling uiomovebp()). * Any VFS function that calls BUF_LOCK*() itself may also have to call bkvasync_all() if it wishes to operate directly on buf->b_data, even if the VFS is not KVABIO aware. This is because the VFS bypassed the normal buffer cache APIs to obtain a locked buffer. --- diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 2ac85a6ed6..6d1115a20a 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1879,6 +1879,7 @@ vfs_vmio_release(struct buf *bp) */ pmap_qremove_noinval(trunc_page((vm_offset_t) bp->b_data), bp->b_xio.xio_npages); + CPUMASK_ASSZERO(bp->b_cpumask); if (bp->b_bufsize) { atomic_add_long(&bufspace, -bp->b_bufsize); bp->b_bufsize = 0; @@ -2470,7 +2471,7 @@ again: bp->b_flags &= ~B_AGE; cluster_awrite(bp); } else { - bp->b_flags |= B_AGE; + bp->b_flags |= B_AGE | B_KVABIO; cluster_awrite(bp); } /* bp invalid but needs to be NULL-tested if we break out */ @@ -2621,13 +2622,15 @@ findblk(struct vnode *vp, off_t loffset, int flags) * Revalidate the locked buf before allowing it to be * returned. * - * B_KVABIO is only set/cleared when locking. + * B_KVABIO is only set/cleared when locking. When + * clearing B_KVABIO, we must ensure that the buffer + * is synchronized to all cpus. */ if (bp->b_vp == vp && bp->b_loffset == loffset) { if (flags & FINDBLK_KVABIO) bp->b_flags |= B_KVABIO; else - bp->b_flags &= ~B_KVABIO; + bkvasync_all(bp); break; } atomic_subtract_int(&bp->b_refs, 1); @@ -4390,6 +4393,8 @@ bkvasync(struct buf *bp) * the KVABIO API. Make sure its data is synchronized to all cpus. * * If B_KVABIO is not set, the buffer is already fully synchronized. + * + * NOTE! This is the only safe way to clear B_KVABIO on a buffer. */ void bkvasync_all(struct buf *bp) @@ -4399,8 +4404,8 @@ bkvasync_all(struct buf *bp) smp_invltlb(); cpu_invltlb(); ATOMIC_CPUMASK_ORMASK(bp->b_cpumask, smp_active_mask); - bp->b_flags &= ~B_KVABIO; } + bp->b_flags &= ~B_KVABIO; } /* diff --git a/sys/kern/vfs_vm.c b/sys/kern/vfs_vm.c index e03790b0c0..1fb4606b59 100644 --- a/sys/kern/vfs_vm.c +++ b/sys/kern/vfs_vm.c @@ -180,8 +180,9 @@ nvtruncbuf(struct vnode *vp, off_t length, int blksize, int boff, int trivial) */ if (boff && trivial == 0) { truncboffset = length - boff; - error = bread(vp, truncboffset, blksize, &bp); + error = bread_kvabio(vp, truncboffset, blksize, &bp); if (error == 0) { + bkvasync(bp); bzero(bp->b_data + boff, blksize - boff); if (bp->b_flags & B_DELWRI) { if (bp->b_dirtyoff > boff) @@ -382,8 +383,9 @@ nvextendbuf(struct vnode *vp, off_t olength, off_t nlength, truncboffset = olength - oboff; if (oboff) { - error = bread(vp, truncboffset, oblksize, &bp); + error = bread_kvabio(vp, truncboffset, oblksize, &bp); if (error == 0) { + bkvasync(bp); bzero(bp->b_data + oboff, oblksize - oboff); bp->b_bio2.bio_offset = NOOFFSET; bdwrite(bp); diff --git a/sys/sys/buf2.h b/sys/sys/buf2.h index b699c40122..ed29fcd159 100644 --- a/sys/sys/buf2.h +++ b/sys/sys/buf2.h @@ -240,7 +240,9 @@ buf_deallocate(struct buf *bp) } /* - * MPSAFE + * This callback is made from flushbufqueues() which uses BUF_LOCK(). + * Since it isn't going through a normal buffer aquisition mechanic + * and calling the filesystem back enforce the vnode's KVABIO support. */ static __inline int buf_countdeps(struct buf *bp, int n) @@ -248,10 +250,13 @@ buf_countdeps(struct buf *bp, int n) struct bio_ops *ops = bp->b_ops; int r; - if (ops) + if (ops) { + if (bp->b_vp == NULL || (bp->b_vp->v_flag & VKVABIO) == 0) + bkvasync_all(bp); r = ops->io_countdeps(bp, n); - else + } else { r = 0; + } return(r); } @@ -321,15 +326,20 @@ buf_checkread(struct buf *bp) } /* - * MPSAFE + * This callback is made from flushbufqueues() which uses BUF_LOCK(). + * Since it isn't going through a normal buffer aquisition mechanic + * and calling the filesystem back enforce the vnode's KVABIO support. */ static __inline int buf_checkwrite(struct buf *bp) { struct bio_ops *ops = bp->b_ops; - if (ops) + if (ops) { + if (bp->b_vp == NULL || (bp->b_vp->v_flag & VKVABIO) == 0) + bkvasync_all(bp); return(ops->io_checkwrite(bp)); + } return(0); } diff --git a/sys/vfs/devfs/devfs_vnops.c b/sys/vfs/devfs/devfs_vnops.c index 1f6166fe8a..a5e6b49b92 100644 --- a/sys/vfs/devfs/devfs_vnops.c +++ b/sys/vfs/devfs/devfs_vnops.c @@ -1738,7 +1738,8 @@ devfs_spec_strategy(struct vop_strategy_args *ap) BUF_LOCK(nbp, LK_EXCLUSIVE); BUF_KERNPROC(nbp); nbp->b_vp = vp; - nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP); + nbp->b_flags = B_PAGING | B_KVABIO | (bp->b_flags & B_BNOCLIP); + nbp->b_cpumask = bp->b_cpumask; nbp->b_data = bp->b_data; nbp->b_bio1.bio_done = devfs_spec_strategy_done; nbp->b_bio1.bio_offset = bio->bio_offset; @@ -2002,10 +2003,11 @@ devfs_spec_getpages(struct vop_getpages_args *ap) /* * Map the pages to be read into the kva. */ - pmap_qenter(kva, ap->a_m, pcount); + pmap_qenter_noinval(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_cmd = BUF_CMD_READ; + bp->b_flags |= B_KVABIO; bp->b_bcount = size; bp->b_resid = 0; bsetrunningbufspace(bp, size); @@ -2042,9 +2044,11 @@ devfs_spec_getpages(struct vop_getpages_args *ap) * might indicate an EOF with b_resid instead of truncating b_bcount. */ nread = bp->b_bcount - bp->b_resid; - if (nread < ap->a_count) + if (nread < ap->a_count) { + bkvasync(bp); bzero((caddr_t)kva + nread, ap->a_count - nread); - pmap_qremove(kva, pcount); + } + pmap_qremove_noinval(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { diff --git a/sys/vfs/tmpfs/tmpfs_vnops.c b/sys/vfs/tmpfs/tmpfs_vnops.c index dfcae60b36..407c8cd074 100644 --- a/sys/vfs/tmpfs/tmpfs_vnops.c +++ b/sys/vfs/tmpfs/tmpfs_vnops.c @@ -507,6 +507,7 @@ tmpfs_read(struct vop_read_args *ap) vm_wait_nominal(); } bp->b_flags |= B_CLUSTEROK; + bkvasync(bp); /* * Figure out how many bytes we can actually copy this loop. @@ -635,6 +636,7 @@ tmpfs_write(struct vop_write_args *ap) * So just use bread() to do the right thing. */ error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp); + bkvasync(bp); error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); if (error) { kprintf("tmpfs_write uiomove error %d\n", error); diff --git a/sys/vfs/ufs/ffs_softdep.c b/sys/vfs/ufs/ffs_softdep.c index c4e3dfaada..6377ecd288 100644 --- a/sys/vfs/ufs/ffs_softdep.c +++ b/sys/vfs/ufs/ffs_softdep.c @@ -5024,8 +5024,13 @@ getdirtybuf(struct buf **bpp, int waitfor) /* * Finish nominal buffer locking sequence return success. + * + * Since we are not using a normal getblk(), and UFS + * isn't KVABIO aware, we must make sure that the bp + * is synchronized before returning it. */ bremfree(bp); + bkvasync_all(bp); return (1); }