From 44480e310a5e2fdec131e9154d62ac8fb0f011a9 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 16 Aug 2012 17:54:16 -0700 Subject: [PATCH] kernel - Adjust UFS and HAMMER to use uiomovebp() * Add uiomovebp(), a version of uiomove() which is aware of a locked bp representing the to or from buffer and can work-around issues related to VM faults causing recursions and deadlocks on the user buffer. uiomovebp() does not yet detect or handle deadlocks. Implementing deadlock handling will require a certain degree of finess related to the vnode and bp locks and we don't want to have to do it unless we actually deadlock. TODO. * Adjust UFS, HAMMER, TMPFS, MSDOSFS, NFS, NTFS to use uiomovebp(). --- sys/kern/kern_subr.c | 33 +++++++++++++++++++++++++++++++++ sys/sys/uio.h | 3 +++ sys/vfs/hammer/hammer_vnops.c | 6 ++---- sys/vfs/msdosfs/msdosfs_vnops.c | 4 ++-- sys/vfs/nfs/nfs_bio.c | 6 +++--- sys/vfs/ntfs/ntfs_subr.c | 4 ++-- sys/vfs/ntfs/ntfs_vnops.c | 2 +- sys/vfs/tmpfs/tmpfs_vnops.c | 4 ++-- sys/vfs/ufs/ufs_readwrite.c | 4 ++-- 9 files changed, 50 insertions(+), 16 deletions(-) diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index 29efb192d1..a6f6f5519a 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -136,6 +136,39 @@ uiomove(caddr_t cp, size_t n, struct uio *uio) return (error); } +/* + * This is the same as uiomove() except (cp, n) is within the bounds of + * the passed, locked buffer. Under certain circumstances a VM fault + * occuring with a locked buffer held can result in a deadlock or an + * attempt to recursively lock the buffer. + * + * This procedure deals with these cases. + * + * If the buffer represents a regular file, is B_CACHE, but the last VM page + * is not fully valid we fix-up the last VM page. This should handle the + * recursive lock issue. + * + * Deadlocks are another issue. We are holding the vp and the bp locked + * and could deadlock against a different vp and/or bp if another thread is + * trying to access us while we accessing it. The only solution here is + * to release the bp and vnode lock and do the uio to/from a system buffer, + * then regain the locks and copyback (if applicable). XXX TODO. + */ +int +uiomovebp(struct buf *bp, caddr_t cp, size_t n, struct uio *uio) +{ + int count; + vm_page_t m; + + if (bp->b_vp && bp->b_vp->v_type == VREG && + (bp->b_flags & B_CACHE) && + (count = bp->b_xio.xio_npages) != 0 && + (m = bp->b_xio.xio_pages[count-1])->valid != VM_PAGE_BITS_ALL) { + vm_page_zero_invalid(m, TRUE); + } + return (uiomove(cp, n, uio)); +} + /* * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, * for obvious reasons. diff --git a/sys/sys/uio.h b/sys/sys/uio.h index a732ee325f..c466ce1b80 100644 --- a/sys/sys/uio.h +++ b/sys/sys/uio.h @@ -69,6 +69,8 @@ enum uio_seg { * iov uses an unsigned quantity, DragonFly will use the (unsigned) * size_t. */ +struct buf; + struct uio { struct iovec *uio_iov; int uio_iovcnt; @@ -93,6 +95,7 @@ struct vm_object; struct vm_page; int uiomove (caddr_t, size_t, struct uio *); +int uiomovebp (struct buf *, caddr_t, size_t, struct uio *); int uiomovez (size_t, struct uio *); int uiomove_frombuf (void *buf, size_t buflen, struct uio *uio); int uiomove_fromphys(struct vm_page *ma[], vm_offset_t offset, diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 1a06d86a23..5422aebd40 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -451,10 +451,8 @@ skip: * read()s on mmap()'d spaces. */ bp->b_flags |= B_AGE; - bqhold(bp); + error = uiomovebp(bp, (char *)bp->b_data + offset, n, uio); bqrelse(bp); - error = uiomove((char *)bp->b_data + offset, n, uio); - bqdrop(bp); if (got_fstoken) lwkt_gettoken(&hmp->fs_token); @@ -747,7 +745,7 @@ hammer_vop_write(struct vop_write_args *ap) } if (error == 0) { lwkt_reltoken(&hmp->fs_token); - error = uiomove(bp->b_data + offset, n, uio); + error = uiomovebp(bp, bp->b_data + offset, n, uio); lwkt_gettoken(&hmp->fs_token); } diff --git a/sys/vfs/msdosfs/msdosfs_vnops.c b/sys/vfs/msdosfs/msdosfs_vnops.c index 01dd6376f9..222a29ed3e 100644 --- a/sys/vfs/msdosfs/msdosfs_vnops.c +++ b/sys/vfs/msdosfs/msdosfs_vnops.c @@ -572,7 +572,7 @@ msdosfs_read(struct vop_read_args *ap) diff = blsize - bp->b_resid; if (diff < n) n = diff; - error = uiomove(bp->b_data + on, (size_t)n, uio); + error = uiomovebp(bp, bp->b_data + on, (size_t)n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); if (!isadir && (error == 0 || uio->uio_resid != orig_resid) && @@ -755,7 +755,7 @@ msdosfs_write(struct vop_write_args *ap) /* * Copy the data from user space into the buf header. */ - error = uiomove(bp->b_data + croffset, (size_t)n, uio); + error = uiomovebp(bp, bp->b_data + croffset, (size_t)n, uio); if (error) { brelse(bp); break; diff --git a/sys/vfs/nfs/nfs_bio.c b/sys/vfs/nfs/nfs_bio.c index c7555830ef..ebf8db3e3c 100644 --- a/sys/vfs/nfs/nfs_bio.c +++ b/sys/vfs/nfs/nfs_bio.c @@ -398,11 +398,11 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag) switch (vp->v_type) { case VREG: if (n > 0) - error = uiomove(bp->b_data + boff, n, uio); + error = uiomovebp(bp, bp->b_data + boff, n, uio); break; case VLNK: if (n > 0) - error = uiomove(bp->b_data + boff, n, uio); + error = uiomovebp(bp, bp->b_data + boff, n, uio); n = 0; break; case VDIR: @@ -722,7 +722,7 @@ again: goto again; } - error = uiomove(bp->b_data + boff, bytes, uio); + error = uiomovebp(bp, bp->b_data + boff, bytes, uio); /* * Since this block is being modified, it must be written diff --git a/sys/vfs/ntfs/ntfs_subr.c b/sys/vfs/ntfs/ntfs_subr.c index e94039f50c..a61cd4e366 100644 --- a/sys/vfs/ntfs/ntfs_subr.c +++ b/sys/vfs/ntfs/ntfs_subr.c @@ -1448,7 +1448,7 @@ ntfs_writentvattr_plain(struct ntfsmount *ntmp, struct ntnode *ip, } } if (uio) - uiomove(bp->b_data + off, tocopy, uio); + uiomovebp(bp, bp->b_data + off, tocopy, uio); else memcpy(bp->b_data + off, data, tocopy); bawrite(bp); @@ -1546,7 +1546,7 @@ ntfs_readntvattr_plain(struct ntfsmount *ntmp, struct ntnode *ip, return (error); } if (uio) { - uiomove(bp->b_data + off, + uiomovebp(bp, bp->b_data + off, tocopy, uio); } else { memcpy(data, bp->b_data + off, diff --git a/sys/vfs/ntfs/ntfs_vnops.c b/sys/vfs/ntfs/ntfs_vnops.c index 277aa60168..e4f2b0f28d 100644 --- a/sys/vfs/ntfs/ntfs_vnops.c +++ b/sys/vfs/ntfs/ntfs_vnops.c @@ -169,7 +169,7 @@ ntfs_read(struct vop_read_args *ap) break; } - error = uiomove(bp->b_data + off, toread - off, uio); + error = uiomovebp(bp, bp->b_data + off, toread - off, uio); if(error) { brelse(bp); break; diff --git a/sys/vfs/tmpfs/tmpfs_vnops.c b/sys/vfs/tmpfs/tmpfs_vnops.c index 3bf59c559a..70cb7cfc2f 100644 --- a/sys/vfs/tmpfs/tmpfs_vnops.c +++ b/sys/vfs/tmpfs/tmpfs_vnops.c @@ -473,7 +473,7 @@ tmpfs_read (struct vop_read_args *ap) if (len > node->tn_size - uio->uio_offset) len = (size_t)(node->tn_size - uio->uio_offset); - error = uiomove((char *)bp->b_data + offset, len, uio); + error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); bqrelse(bp); if (error) { kprintf("tmpfs_read uiomove error %d\n", error); @@ -577,7 +577,7 @@ tmpfs_write (struct vop_write_args *ap) * So just use bread() to do the right thing. */ error = bread(vp, base_offset, BSIZE, &bp); - error = uiomove((char *)bp->b_data + offset, len, uio); + error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); if (error) { kprintf("tmpfs_write uiomove error %d\n", error); brelse(bp); diff --git a/sys/vfs/ufs/ufs_readwrite.c b/sys/vfs/ufs/ufs_readwrite.c index 4171a1d442..610f7f66d4 100644 --- a/sys/vfs/ufs/ufs_readwrite.c +++ b/sys/vfs/ufs/ufs_readwrite.c @@ -173,7 +173,7 @@ ffs_read(struct vop_read_args *ap) /* * otherwise use the general form */ - error = uiomove(bp->b_data + blkoffset, xfersize, uio); + error = uiomovebp(bp, bp->b_data + blkoffset, xfersize, uio); if (error) break; @@ -369,7 +369,7 @@ ffs_write(struct vop_write_args *ap) if (size < xfersize) xfersize = size; - error = uiomove(bp->b_data + blkoffset, xfersize, uio); + error = uiomovebp(bp, bp->b_data + blkoffset, xfersize, uio); if ((ioflag & (IO_VMIO|IO_DIRECT)) && (LIST_FIRST(&bp->b_dep) == NULL)) { bp->b_flags |= B_RELBUF; -- 2.41.0