From b0d18f7d0ae8bb16f0ffe429dc925d2183579b45 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 14 Feb 2010 08:29:10 -0800 Subject: [PATCH] kernel - VM PAGER part 2/2 - Expand vinitvmio() and vnode_pager_alloc() * vinitvmio() is responsible for assigning the initial VM object size based on the file size. Adjust vinitvmio() to conform to the new nvextendbuf() and nvtruncbuf() API. * vinitvmio() has been given two additional parameters, blksize and boff, to allow it to determine how much larger the VM object must be relative to the byte-granular file size passed to it. * Remove vm_page_alloc() and remove the pgo_alloc vector from struct pagerops. Convert all the VM pager allocation procedures into global procedures which are called directly. Trying to feed everything through a single function was a joke when all the callers knew precisely what kind of VM object they were creating anyway. Add the extra arguments to vnode_pager_alloc() which vinitvmio() needs to pass in. --- sys/kern/vfs_subr.c | 4 +-- sys/sys/vnode.h | 2 +- sys/vfs/devfs/devfs_vnops.c | 2 +- sys/vfs/gnu/ext2fs/ext2_vnops.c | 11 ++++--- sys/vfs/hammer/hammer_inode.c | 7 ++-- sys/vfs/hammer/hammer_vnops.c | 8 ++++- sys/vfs/isofs/cd9660/cd9660_vfsops.c | 2 +- sys/vfs/msdosfs/msdosfs_denode.c | 2 +- sys/vfs/nfs/nfs_subs.c | 7 +++- sys/vfs/ntfs/ntfs_subr.c | 2 +- sys/vfs/ntfs/ntfs_vfsops.c | 2 +- sys/vfs/tmpfs/tmpfs_subr.c | 2 +- sys/vfs/ufs/ufs_vnops.c | 18 ++++++---- sys/vfs/userfs/userfs_inode.c | 2 +- sys/vm/vm_pager.h | 2 +- sys/vm/vnode_pager.c | 49 ++++++++++++++++++++++------ 16 files changed, 86 insertions(+), 36 deletions(-) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index b70331d060..b847884d87 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1407,14 +1407,14 @@ vcount(struct vnode *vp) * when a vnode is initialized from its inode. */ int -vinitvmio(struct vnode *vp, off_t filesize) +vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff) { vm_object_t object; int error = 0; retry: if ((object = vp->v_object) == NULL) { - object = vnode_pager_alloc(vp, filesize, 0, 0); + object = vnode_pager_alloc(vp, filesize, 0, 0, blksize, boff); /* * Dereference the reference we just created. This assumes * that the object is associated with the vp. diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 52b4a517c3..503d526fa1 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -515,7 +515,7 @@ void nvnode_pager_setsize (struct vnode *vp, off_t length, int vfsync(struct vnode *vp, int waitfor, int passes, int (*checkdef)(struct buf *), int (*waitoutput)(struct vnode *, struct thread *)); -int vinitvmio(struct vnode *vp, off_t filesize); +int vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff); void vprint (char *label, struct vnode *vp); int vrecycle (struct vnode *vp); int vmaxiosize (struct vnode *vp); diff --git a/sys/vfs/devfs/devfs_vnops.c b/sys/vfs/devfs/devfs_vnops.c index ee6c5615e5..7284355aec 100644 --- a/sys/vfs/devfs/devfs_vnops.c +++ b/sys/vfs/devfs/devfs_vnops.c @@ -886,7 +886,7 @@ devfs_spec_open(struct vop_open_args *ap) if (vn_isdisk(vp, NULL)) { if (!dev->si_bsize_phys) dev->si_bsize_phys = DEV_BSIZE; - vinitvmio(vp, IDX_TO_OFF(INT_MAX)); + vinitvmio(vp, IDX_TO_OFF(INT_MAX), PAGE_SIZE, -1); } vop_stdopen(ap); diff --git a/sys/vfs/gnu/ext2fs/ext2_vnops.c b/sys/vfs/gnu/ext2fs/ext2_vnops.c index 03d0a3fd40..f4cde6dc96 100644 --- a/sys/vfs/gnu/ext2fs/ext2_vnops.c +++ b/sys/vfs/gnu/ext2fs/ext2_vnops.c @@ -936,7 +936,7 @@ ext2_mkdir(struct vop_old_mkdir_args *ap) * The vnode must have a VM object in order to issue buffer cache * ops on it. */ - vinitvmio(tvp, 0); + vinitvmio(tvp, 0, PAGE_SIZE, -1); /* * Bump link count in parent directory @@ -1098,7 +1098,7 @@ ext2_symlink(struct vop_old_symlink_args *ap) * the buffer cache. */ if (vp->v_object == NULL) - vinitvmio(vp, 0); + vinitvmio(vp, 0, PAGE_SIZE, -1); error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, @@ -1206,7 +1206,7 @@ ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, * not (not immediately anyway). */ if (tvp->v_type == VREG || tvp->v_type == VDIR) - vinitvmio(tvp, 0); + vinitvmio(tvp, 0, PAGE_SIZE, -1); /* * Make sure inode goes to disk before directory entry. @@ -1886,6 +1886,7 @@ ext2_vinit(struct mount *mntp, struct vnode **vpp) struct inode *ip; struct vnode *vp; struct timeval tv; + struct ext2_sb_info *fs = VFSTOEXT2(mntp)->um_e2fs; vp = *vpp; ip = VTOI(vp); @@ -1901,13 +1902,13 @@ ext2_vinit(struct mount *mntp, struct vnode **vpp) break; case VDIR: case VREG: - vinitvmio(vp, ip->i_size); + vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); /* XXX */ break; case VLNK: if ((ip->i_size >= vp->v_mount->mnt_maxsymlinklen) && ip->i_din.di_blocks != 0 ) { - vinitvmio(vp, ip->i_size); + vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); } break; default: diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index c6822dc330..5fd1a34ac0 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -319,8 +319,11 @@ hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp) /* vnode locked by getnewvnode() */ /* make related vnode dirty if inode dirty? */ hammer_unlock(&ip->lock); - if (vp->v_type == VREG) - vinitvmio(vp, ip->ino_data.size); + if (vp->v_type == VREG) { + vinitvmio(vp, ip->ino_data.size, + hammer_blocksize(ip->ino_data.size), + hammer_blockoff(ip->ino_data.size)); + } break; } diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index c12fc97692..f43c003878 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -3248,7 +3248,13 @@ retry: if (error == 0) { cache_setunresolved(nch); cache_setvp(nch, NULL); - /* XXX locking */ + + /* + * XXX locking. Note: ip->vp might get ripped out + * when we setunresolved() the nch since we had + * no other reference to it. In that case ip->vp + * will be NULL. + */ if (ip && ip->vp) { hammer_knote(ip->vp, NOTE_DELETE); cache_inval_vp(ip->vp, CINV_DESTROY); diff --git a/sys/vfs/isofs/cd9660/cd9660_vfsops.c b/sys/vfs/isofs/cd9660/cd9660_vfsops.c index bff1fac288..fba3e1d20f 100644 --- a/sys/vfs/isofs/cd9660/cd9660_vfsops.c +++ b/sys/vfs/isofs/cd9660/cd9660_vfsops.c @@ -873,7 +873,7 @@ again: break; case VREG: case VDIR: - vinitvmio(vp, ip->i_size); + vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); break; default: break; diff --git a/sys/vfs/msdosfs/msdosfs_denode.c b/sys/vfs/msdosfs/msdosfs_denode.c index cd0cdb1191..74e0cc629c 100644 --- a/sys/vfs/msdosfs/msdosfs_denode.c +++ b/sys/vfs/msdosfs/msdosfs_denode.c @@ -447,7 +447,7 @@ again: SETLOW(ldep->de_modrev, tv.tv_usec * 4294); ldep->de_devvp = pmp->pm_devvp; vref(ldep->de_devvp); - vinitvmio(nvp, ldep->de_FileSize); + vinitvmio(nvp, ldep->de_FileSize, PAGE_SIZE, -1); /* * Leave nvp locked and refd so the returned inode is effectively * locked and refd. diff --git a/sys/vfs/nfs/nfs_subs.c b/sys/vfs/nfs/nfs_subs.c index 498ed62838..638861af05 100644 --- a/sys/vfs/nfs/nfs_subs.c +++ b/sys/vfs/nfs/nfs_subs.c @@ -1395,7 +1395,12 @@ nfs_setvtype(struct vnode *vp, enum vtype vtyp) case VREG: case VDIR: case VLNK: - vinitvmio(vp, 0); /* needs VMIO, size not yet known */ + /* + * Needs VMIO, size not yet known, and blocksize + * is not really relevant if we are passing a + * filesize of 0. + */ + vinitvmio(vp, 0, PAGE_SIZE, -1); break; default: break; diff --git a/sys/vfs/ntfs/ntfs_subr.c b/sys/vfs/ntfs/ntfs_subr.c index 427dcdac06..9c32057886 100644 --- a/sys/vfs/ntfs/ntfs_subr.c +++ b/sys/vfs/ntfs/ntfs_subr.c @@ -1005,7 +1005,7 @@ ntfs_ntlookupfile(struct ntfsmount *ntmp, struct vnode *vp, * Normal files use the buffer cache */ if (nvp->v_type == VREG) - vinitvmio(nvp, nfp->f_size); + vinitvmio(nvp, nfp->f_size, PAGE_SIZE, -1); *vpp = nvp; goto fail; } diff --git a/sys/vfs/ntfs/ntfs_vfsops.c b/sys/vfs/ntfs/ntfs_vfsops.c index ba274ab9df..5e4a9c073f 100644 --- a/sys/vfs/ntfs/ntfs_vfsops.c +++ b/sys/vfs/ntfs/ntfs_vfsops.c @@ -912,7 +912,7 @@ ntfs_vgetex(struct mount *mp, ino_t ino, u_int32_t attrtype, char *attrname, * Normal files use the buffer cache */ if (f_type == VREG) - vinitvmio(vp, fp->f_size); + vinitvmio(vp, fp->f_size, PAGE_SIZE, -1); ntfs_ntput(ip); diff --git a/sys/vfs/tmpfs/tmpfs_subr.c b/sys/vfs/tmpfs/tmpfs_subr.c index 00c986b3f2..27795facc3 100644 --- a/sys/vfs/tmpfs/tmpfs_subr.c +++ b/sys/vfs/tmpfs/tmpfs_subr.c @@ -453,7 +453,7 @@ loop: case VSOCK: break; case VREG: - vinitvmio(vp, (node->tn_size + BMASK) & ~(off_t)BMASK); + vinitvmio(vp, node->tn_size, BMASK, -1); break; case VLNK: break; diff --git a/sys/vfs/ufs/ufs_vnops.c b/sys/vfs/ufs/ufs_vnops.c index 990b5a27ba..fd489a5ef0 100644 --- a/sys/vfs/ufs/ufs_vnops.c +++ b/sys/vfs/ufs/ufs_vnops.c @@ -76,6 +76,7 @@ #include "ufsmount.h" #include "ufs_extern.h" #include "ffs_extern.h" +#include "fs.h" #ifdef UFS_DIRHASH #include "dirhash.h" #endif @@ -1365,7 +1366,7 @@ ufs_mkdir(struct vop_old_mkdir_args *ap) * The vnode must have a VM object in order to issue buffer cache * ops on it. */ - vinitvmio(tvp, DIRBLKSIZ); + vinitvmio(tvp, DIRBLKSIZ, DIRBLKSIZ, -1); /* * Initialize directory with "." and ".." from static template. @@ -1571,7 +1572,7 @@ ufs_symlink(struct vop_old_symlink_args *ap) * the buffer cache. */ if (vp->v_object == NULL) - vinitvmio(vp, 0); + vinitvmio(vp, 0, PAGE_SIZE, -1); error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL); @@ -1952,11 +1953,16 @@ ufs_vinit(struct mount *mntp, struct vnode **vpp) break; case VDIR: case VREG: - vinitvmio(vp, ip->i_size); + vinitvmio(vp, ip->i_size, + blkoffsize(ip->i_fs, ip, ip->i_size), + blkoff(ip->i_fs, ip->i_size)); break; case VLNK: - if (ip->i_size >= vp->v_mount->mnt_maxsymlinklen) - vinitvmio(vp, ip->i_size); + if (ip->i_size >= vp->v_mount->mnt_maxsymlinklen) { + vinitvmio(vp, ip->i_size, + blkoffsize(ip->i_fs, ip, ip->i_size), + blkoff(ip->i_fs, ip->i_size)); + } break; default: break; @@ -2076,7 +2082,7 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, * not (not immediately anyway). */ if (tvp->v_type == VREG || tvp->v_type == VDIR) - vinitvmio(tvp, 0); + vinitvmio(tvp, 0, PAGE_SIZE, -1); /* * Make sure inode goes to disk before directory entry. diff --git a/sys/vfs/userfs/userfs_inode.c b/sys/vfs/userfs/userfs_inode.c index fc879e7870..ec0df02b80 100644 --- a/sys/vfs/userfs/userfs_inode.c +++ b/sys/vfs/userfs/userfs_inode.c @@ -129,7 +129,7 @@ user_getnewvnode(struct mount *mp, struct vnode **vpp, ino_t ino, return (error); vp = *vpp; ip = kmalloc(sizeof(*ip), M_USERFSINODE, M_WAITOK|M_ZERO); - vinitvmio(vp, 0); + vinitvmio(vp, 0, PAGE_SIZE, 0); ump = (void *)mp->mnt_data; diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 737fe04901..e3fff77587 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -104,7 +104,7 @@ vm_object_t default_pager_alloc(void *, off_t, vm_prot_t, off_t); vm_object_t dev_pager_alloc(void *, off_t, vm_prot_t, off_t); vm_object_t phys_pager_alloc(void *, off_t, vm_prot_t, off_t); vm_object_t swap_pager_alloc(void *, off_t, vm_prot_t, off_t); -vm_object_t vnode_pager_alloc (void *, off_t, vm_prot_t, off_t); +vm_object_t vnode_pager_alloc (void *, off_t, vm_prot_t, off_t, int, int); vm_object_t vnode_pager_reference (struct vnode *); void vm_pager_bufferinit (void); diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 23edb149d3..d23c0e829d 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -93,14 +93,21 @@ static struct krate vresrate = { 1 }; int vnode_pbuf_freecnt = -1; /* start out unlimited */ /* - * Allocate (or lookup) pager for a vnode. - * Handle is a vnode pointer. + * Allocate a VM object for a vnode, typically a regular file vnode. + * + * Some additional information is required to generate a properly sized + * object which covers the entire buffer cache buffer straddling the file + * EOF. Userland does not see the extra pages as the VM fault code tests + * against v_filesize. */ vm_object_t -vnode_pager_alloc(void *handle, off_t size, vm_prot_t prot, off_t offset) +vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset, + int blksize, int boff) { vm_object_t object; struct vnode *vp; + off_t loffset; + vm_pindex_t lsize; /* * Pageout to vnode, no can do yet. @@ -139,23 +146,45 @@ vnode_pager_alloc(void *handle, off_t size, vm_prot_t prot, off_t offset) if (vp->v_sysref.refcnt <= 0) panic("vnode_pager_alloc: no vnode reference"); + /* + * Round up to the *next* block, then destroy the buffers in question. + * Since we are only removing some of the buffers we must rely on the + * scan count to determine whether a loop is necessary. + * + * Destroy any pages beyond the last buffer. + */ + if (boff < 0) + boff = (int)(length % blksize); + if (boff) + loffset = length + (blksize - boff); + else + loffset = length; + lsize = OFF_TO_IDX(round_page64(loffset)); + if (object == NULL) { /* * And an object of the appropriate size */ - object = vm_object_allocate(OBJT_VNODE, - OFF_TO_IDX(round_page64(size))); + object = vm_object_allocate(OBJT_VNODE, lsize); object->flags = 0; object->handle = handle; vp->v_object = object; - vp->v_filesize = size; + vp->v_filesize = length; } else { object->ref_count++; - if (vp->v_filesize != size) { + if (object->size != lsize) { + kprintf("vnode_pager_alloc: Warning, objsize " + "mismatch %jd/%jd vp=%p obj=%p\n", + (intmax_t)object->size, + (intmax_t)lsize, + vp, object); + } + if (vp->v_filesize != length) { kprintf("vnode_pager_alloc: Warning, filesize " - "mismatch %lld/%lld\n", - (long long)vp->v_filesize, - (long long)size); + "mismatch %jd/%jd vp=%p obj=%p\n", + (intmax_t)vp->v_filesize, + (intmax_t)length, + vp, object); } } vref(vp); -- 2.41.0