kernel - VM PAGER part 2/2 - Expand vinitvmio() and vnode_pager_alloc()
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 14 Feb 2010 16:29:10 +0000 (08:29 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 14 Feb 2010 16:29:10 +0000 (08:29 -0800)
* vinitvmio() is responsible for assigning the initial VM object size based
  on the file size.  Adjust vinitvmio() to conform to the new nvextendbuf()
  and nvtruncbuf() API.

* vinitvmio() has been given two additional parameters, blksize and boff,
  to allow it to determine how much larger the VM object must be relative
  to the byte-granular file size passed to it.

* Remove vm_page_alloc() and remove the pgo_alloc vector from struct
  pagerops.  Convert all the VM pager allocation procedures into global
  procedures which are called directly.  Trying to feed everything through
  a single function was a joke when all the callers knew precisely what
  kind of VM object they were creating anyway.

  Add the extra arguments to vnode_pager_alloc() which vinitvmio() needs
  to pass in.

16 files changed:
sys/kern/vfs_subr.c
sys/sys/vnode.h
sys/vfs/devfs/devfs_vnops.c
sys/vfs/gnu/ext2fs/ext2_vnops.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_vnops.c
sys/vfs/isofs/cd9660/cd9660_vfsops.c
sys/vfs/msdosfs/msdosfs_denode.c
sys/vfs/nfs/nfs_subs.c
sys/vfs/ntfs/ntfs_subr.c
sys/vfs/ntfs/ntfs_vfsops.c
sys/vfs/tmpfs/tmpfs_subr.c
sys/vfs/ufs/ufs_vnops.c
sys/vfs/userfs/userfs_inode.c
sys/vm/vm_pager.h
sys/vm/vnode_pager.c

index b70331d..b847884 100644 (file)
@@ -1407,14 +1407,14 @@ vcount(struct vnode *vp)
  * when a vnode is initialized from its inode.
  */
 int
-vinitvmio(struct vnode *vp, off_t filesize)
+vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff)
 {
        vm_object_t object;
        int error = 0;
 
 retry:
        if ((object = vp->v_object) == NULL) {
-               object = vnode_pager_alloc(vp, filesize, 0, 0);
+               object = vnode_pager_alloc(vp, filesize, 0, 0, blksize, boff);
                /*
                 * Dereference the reference we just created.  This assumes
                 * that the object is associated with the vp.
index 52b4a51..503d526 100644 (file)
@@ -515,7 +515,7 @@ void        nvnode_pager_setsize (struct vnode *vp, off_t length,
 int    vfsync(struct vnode *vp, int waitfor, int passes,
                int (*checkdef)(struct buf *),
                int (*waitoutput)(struct vnode *, struct thread *));
-int    vinitvmio(struct vnode *vp, off_t filesize);
+int    vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff);
 void   vprint (char *label, struct vnode *vp);
 int    vrecycle (struct vnode *vp);
 int    vmaxiosize (struct vnode *vp);
index ee6c561..7284355 100644 (file)
@@ -886,7 +886,7 @@ devfs_spec_open(struct vop_open_args *ap)
        if (vn_isdisk(vp, NULL)) {
                if (!dev->si_bsize_phys)
                        dev->si_bsize_phys = DEV_BSIZE;
-               vinitvmio(vp, IDX_TO_OFF(INT_MAX));
+               vinitvmio(vp, IDX_TO_OFF(INT_MAX), PAGE_SIZE, -1);
        }
 
        vop_stdopen(ap);
index 03d0a3f..f4cde6d 100644 (file)
@@ -936,7 +936,7 @@ ext2_mkdir(struct vop_old_mkdir_args *ap)
         * The vnode must have a VM object in order to issue buffer cache
         * ops on it.
         */
-       vinitvmio(tvp, 0);
+       vinitvmio(tvp, 0, PAGE_SIZE, -1);
 
        /*
         * Bump link count in parent directory
@@ -1098,7 +1098,7 @@ ext2_symlink(struct vop_old_symlink_args *ap)
                 * the buffer cache.
                 */
                if (vp->v_object == NULL)
-                       vinitvmio(vp, 0);
+                       vinitvmio(vp, 0, PAGE_SIZE, -1);
 
                error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
                                UIO_SYSSPACE, IO_NODELOCKED, 
@@ -1206,7 +1206,7 @@ ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
         * not (not immediately anyway).
         */
        if (tvp->v_type == VREG || tvp->v_type == VDIR)
-               vinitvmio(tvp, 0);
+               vinitvmio(tvp, 0, PAGE_SIZE, -1);
 
        /*
         * Make sure inode goes to disk before directory entry.
@@ -1886,6 +1886,7 @@ ext2_vinit(struct mount *mntp, struct vnode **vpp)
        struct inode *ip;
        struct vnode *vp;
        struct timeval tv;
+       struct ext2_sb_info *fs = VFSTOEXT2(mntp)->um_e2fs;
 
        vp = *vpp;
        ip = VTOI(vp);
@@ -1901,13 +1902,13 @@ ext2_vinit(struct mount *mntp, struct vnode **vpp)
                break;
        case VDIR:
        case VREG:
-               vinitvmio(vp, ip->i_size);
+               vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); /* XXX */
                break;
        case VLNK:
                if ((ip->i_size >= vp->v_mount->mnt_maxsymlinklen) &&
                    ip->i_din.di_blocks != 0
                ) {
-                       vinitvmio(vp, ip->i_size);
+                       vinitvmio(vp, ip->i_size, PAGE_SIZE, -1);
                }
                break;
        default:
index c6822dc..5fd1a34 100644 (file)
@@ -319,8 +319,11 @@ hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
                        /* vnode locked by getnewvnode() */
                        /* make related vnode dirty if inode dirty? */
                        hammer_unlock(&ip->lock);
-                       if (vp->v_type == VREG)
-                               vinitvmio(vp, ip->ino_data.size);
+                       if (vp->v_type == VREG) {
+                               vinitvmio(vp, ip->ino_data.size,
+                                         hammer_blocksize(ip->ino_data.size),
+                                         hammer_blockoff(ip->ino_data.size));
+                       }
                        break;
                }
 
index c12fc97..f43c003 100644 (file)
@@ -3248,7 +3248,13 @@ retry:
                if (error == 0) {
                        cache_setunresolved(nch);
                        cache_setvp(nch, NULL);
-                       /* XXX locking */
+
+                       /*
+                        * XXX locking.  Note: ip->vp might get ripped out
+                        * when we setunresolved() the nch since we had
+                        * no other reference to it.  In that case ip->vp
+                        * will be NULL.
+                        */
                        if (ip && ip->vp) {
                                hammer_knote(ip->vp, NOTE_DELETE);
                                cache_inval_vp(ip->vp, CINV_DESTROY);
index bff1fac..fba3e1d 100644 (file)
@@ -873,7 +873,7 @@ again:
                break;
        case VREG:
        case VDIR:
-               vinitvmio(vp, ip->i_size);
+               vinitvmio(vp, ip->i_size, PAGE_SIZE, -1);
                break;
        default:
                break;
index cd0cdb1..74e0cc6 100644 (file)
@@ -447,7 +447,7 @@ again:
        SETLOW(ldep->de_modrev, tv.tv_usec * 4294);
        ldep->de_devvp = pmp->pm_devvp;
        vref(ldep->de_devvp);
-       vinitvmio(nvp, ldep->de_FileSize);
+       vinitvmio(nvp, ldep->de_FileSize, PAGE_SIZE, -1);
        /*
         * Leave nvp locked and refd so the returned inode is effectively
         * locked and refd.
index 498ed62..638861a 100644 (file)
@@ -1395,7 +1395,12 @@ nfs_setvtype(struct vnode *vp, enum vtype vtyp)
        case VREG:
        case VDIR:
        case VLNK:
-               vinitvmio(vp, 0);       /* needs VMIO, size not yet known */
+               /*
+                * Needs VMIO, size not yet known, and blocksize
+                * is not really relevant if we are passing a
+                * filesize of 0.
+                */
+               vinitvmio(vp, 0, PAGE_SIZE, -1);
                break;
        default:
                break;
index 427dcda..9c32057 100644 (file)
@@ -1005,7 +1005,7 @@ ntfs_ntlookupfile(struct ntfsmount *ntmp, struct vnode *vp,
                         * Normal files use the buffer cache
                         */
                        if (nvp->v_type == VREG)
-                               vinitvmio(nvp, nfp->f_size);
+                               vinitvmio(nvp, nfp->f_size, PAGE_SIZE, -1);
                        *vpp = nvp;
                        goto fail;
                }
index ba274ab..5e4a9c0 100644 (file)
@@ -912,7 +912,7 @@ ntfs_vgetex(struct mount *mp, ino_t ino, u_int32_t attrtype, char *attrname,
         * Normal files use the buffer cache
         */
        if (f_type == VREG)
-               vinitvmio(vp, fp->f_size);
+               vinitvmio(vp, fp->f_size, PAGE_SIZE, -1);
 
        ntfs_ntput(ip);
 
index 00c986b..27795fa 100644 (file)
@@ -453,7 +453,7 @@ loop:
        case VSOCK:
                break;
        case VREG:
-               vinitvmio(vp, (node->tn_size + BMASK) & ~(off_t)BMASK);
+               vinitvmio(vp, node->tn_size, BMASK, -1);
                break;
        case VLNK:
                break;
index 990b5a2..fd489a5 100644 (file)
@@ -76,6 +76,7 @@
 #include "ufsmount.h"
 #include "ufs_extern.h"
 #include "ffs_extern.h"
+#include "fs.h"
 #ifdef UFS_DIRHASH
 #include "dirhash.h"
 #endif
@@ -1365,7 +1366,7 @@ ufs_mkdir(struct vop_old_mkdir_args *ap)
         * The vnode must have a VM object in order to issue buffer cache
         * ops on it.
         */
-       vinitvmio(tvp, DIRBLKSIZ);
+       vinitvmio(tvp, DIRBLKSIZ, DIRBLKSIZ, -1);
 
        /*
         * Initialize directory with "." and ".." from static template.
@@ -1571,7 +1572,7 @@ ufs_symlink(struct vop_old_symlink_args *ap)
                 * the buffer cache.
                 */
                if (vp->v_object == NULL)
-                       vinitvmio(vp, 0);
+                       vinitvmio(vp, 0, PAGE_SIZE, -1);
                error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
                                UIO_SYSSPACE, IO_NODELOCKED, 
                                ap->a_cnp->cn_cred, NULL);
@@ -1952,11 +1953,16 @@ ufs_vinit(struct mount *mntp, struct vnode **vpp)
                break;
        case VDIR:
        case VREG:
-               vinitvmio(vp, ip->i_size);
+               vinitvmio(vp, ip->i_size,
+                         blkoffsize(ip->i_fs, ip, ip->i_size),
+                         blkoff(ip->i_fs, ip->i_size));
                break;
        case VLNK:
-               if (ip->i_size >= vp->v_mount->mnt_maxsymlinklen)
-                       vinitvmio(vp, ip->i_size);
+               if (ip->i_size >= vp->v_mount->mnt_maxsymlinklen) {
+                       vinitvmio(vp, ip->i_size,
+                                 blkoffsize(ip->i_fs, ip, ip->i_size),
+                                 blkoff(ip->i_fs, ip->i_size));
+               }
                break;
        default:
                break;
@@ -2076,7 +2082,7 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
         * not (not immediately anyway).
         */
        if (tvp->v_type == VREG || tvp->v_type == VDIR)
-               vinitvmio(tvp, 0);
+               vinitvmio(tvp, 0, PAGE_SIZE, -1);
 
        /*
         * Make sure inode goes to disk before directory entry.
index fc879e7..ec0df02 100644 (file)
@@ -129,7 +129,7 @@ user_getnewvnode(struct mount *mp, struct vnode **vpp, ino_t ino,
                return (error);
        vp = *vpp;
        ip = kmalloc(sizeof(*ip), M_USERFSINODE, M_WAITOK|M_ZERO);
-       vinitvmio(vp, 0);
+       vinitvmio(vp, 0, PAGE_SIZE, 0);
 
        ump = (void *)mp->mnt_data;
 
index 737fe04..e3fff77 100644 (file)
@@ -104,7 +104,7 @@ vm_object_t default_pager_alloc(void *, off_t, vm_prot_t, off_t);
 vm_object_t dev_pager_alloc(void *, off_t, vm_prot_t, off_t);
 vm_object_t phys_pager_alloc(void *, off_t, vm_prot_t, off_t);
 vm_object_t swap_pager_alloc(void *, off_t, vm_prot_t, off_t);
-vm_object_t vnode_pager_alloc (void *, off_t, vm_prot_t, off_t);
+vm_object_t vnode_pager_alloc (void *, off_t, vm_prot_t, off_t, int, int);
 vm_object_t vnode_pager_reference (struct vnode *);
 
 void vm_pager_bufferinit (void);
index 23edb14..d23c0e8 100644 (file)
@@ -93,14 +93,21 @@ static struct krate vresrate = { 1 };
 int vnode_pbuf_freecnt = -1;   /* start out unlimited */
 
 /*
- * Allocate (or lookup) pager for a vnode.
- * Handle is a vnode pointer.
+ * Allocate a VM object for a vnode, typically a regular file vnode.
+ *
+ * Some additional information is required to generate a properly sized
+ * object which covers the entire buffer cache buffer straddling the file
+ * EOF.  Userland does not see the extra pages as the VM fault code tests
+ * against v_filesize.
  */
 vm_object_t
-vnode_pager_alloc(void *handle, off_t size, vm_prot_t prot, off_t offset)
+vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset,
+                 int blksize, int boff)
 {
        vm_object_t object;
        struct vnode *vp;
+       off_t loffset;
+       vm_pindex_t lsize;
 
        /*
         * Pageout to vnode, no can do yet.
@@ -139,23 +146,45 @@ vnode_pager_alloc(void *handle, off_t size, vm_prot_t prot, off_t offset)
        if (vp->v_sysref.refcnt <= 0)
                panic("vnode_pager_alloc: no vnode reference");
 
+       /*
+        * Round up to the *next* block, then destroy the buffers in question.
+        * Since we are only removing some of the buffers we must rely on the
+        * scan count to determine whether a loop is necessary.
+        *
+        * Destroy any pages beyond the last buffer.
+        */
+       if (boff < 0)
+               boff = (int)(length % blksize);
+       if (boff)
+               loffset = length + (blksize - boff);
+       else
+               loffset = length;
+       lsize = OFF_TO_IDX(round_page64(loffset));
+
        if (object == NULL) {
                /*
                 * And an object of the appropriate size
                 */
-               object = vm_object_allocate(OBJT_VNODE,
-                                           OFF_TO_IDX(round_page64(size)));
+               object = vm_object_allocate(OBJT_VNODE, lsize);
                object->flags = 0;
                object->handle = handle;
                vp->v_object = object;
-               vp->v_filesize = size;
+               vp->v_filesize = length;
        } else {
                object->ref_count++;
-               if (vp->v_filesize != size) {
+               if (object->size != lsize) {
+                       kprintf("vnode_pager_alloc: Warning, objsize "
+                               "mismatch %jd/%jd vp=%p obj=%p\n",
+                               (intmax_t)object->size,
+                               (intmax_t)lsize,
+                               vp, object);
+               }
+               if (vp->v_filesize != length) {
                        kprintf("vnode_pager_alloc: Warning, filesize "
-                               "mismatch %lld/%lld\n",
-                               (long long)vp->v_filesize,
-                               (long long)size);
+                               "mismatch %jd/%jd vp=%p obj=%p\n",
+                               (intmax_t)vp->v_filesize,
+                               (intmax_t)length,
+                               vp, object);
                }
        }
        vref(vp);