hammer2 - file truncate and extend
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 28 Feb 2012 09:58:05 +0000 (01:58 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 28 Feb 2012 09:58:05 +0000 (01:58 -0800)
* Implement file truncation and extension.  Remember that all partial block
  modifications (during truncation) have to roll a new block, so we use
  the chain_modify and related functions to handle it.

* Implement inode-embedded direct data (up to 512 bytes)

* Add support for the embedded feature in the chain_lookup/chain_next
  code and also fix a couple of bugs.

* Implement deletion flagging for chain structures (used for truncation and
  will eventually be used for unlink).

* Initial implementation of setattr (for truncation support).

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_vnops.c

index d1174a9..9870f50 100644 (file)
@@ -314,7 +314,7 @@ hammer2_chain_t *hammer2_chain_create(hammer2_mount_t *hmp,
                                hammer2_chain_t *parent,
                                hammer2_key_t key, int keybits,
                                int type, size_t bytes);
-void hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
+void hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                                hammer2_chain_t *chain);
 void hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                                hammer2_blockref_t *parent_bref);
index e9fca3c..916bf3c 100644 (file)
@@ -185,8 +185,10 @@ hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
                                /*
                                 * Succeeded, recurse and drop parent
                                 */
-                               SPLAY_REMOVE(hammer2_chain_splay,
-                                            &parent->shead, chain);
+                               if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
+                                       SPLAY_REMOVE(hammer2_chain_splay,
+                                                    &parent->shead, chain);
+                               }
                                chain->parent = NULL;
                                lockmgr(&parent->lk, LK_RELEASE);
                                hammer2_chain_free(hmp, chain);
@@ -629,6 +631,20 @@ again:
         */
        switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
+               /*
+                * Special shortcut for embedded data returns the inode
+                * itself.  Callers must detect this condition and access
+                * the embedded data (the strategy code does this for us).
+                *
+                * This is only applicable to regular files and softlinks.
+                */
+               if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
+                       hammer2_chain_ref(hmp, parent);
+                       if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
+                               hammer2_chain_lock(hmp, parent);
+                       kprintf("DIRECT DATA RETURNED\n");
+                       return (parent);
+               }
                base = &parent->data->ipdata.u.blockset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
@@ -643,7 +659,7 @@ again:
                count = HAMMER2_SET_COUNT;
                break;
        default:
-               panic("hammer2_chain_push: unrecognized blockref type: %d",
+               panic("hammer2_chain_lookup: unrecognized blockref type: %d",
                      parent->bref.type);
                base = NULL;    /* safety */
                count = 0;      /* safety */
@@ -729,10 +745,23 @@ again:
         */
        if (chain) {
                /*
-                * Continue iteration within current parent
+                * Continue iteration within current parent.  If not NULL
+                * the passed-in chain may or may not be locked, based on
+                * the LOOKUP_NOLOCK flag (passed in as returned from lookup
+                * or a prior next).
                 */
                i = chain->index + 1;
-               hammer2_chain_put(hmp, chain);
+               if (flags & HAMMER2_LOOKUP_NOLOCK)
+                       hammer2_chain_drop(hmp, chain);
+               else
+                       hammer2_chain_put(hmp, chain);
+
+               /*
+                * Any scan where the lookup returned degenerate data embedded
+                * in the inode has an invalid index and must terminate.
+                */
+               if (chain == parent)
+                       return(NULL);
                chain = NULL;
        } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
                /*
@@ -783,7 +812,7 @@ again2:
                count = HAMMER2_SET_COUNT;
                break;
        default:
-               panic("hammer2_chain_push: unrecognized blockref type: %d",
+               panic("hammer2_chain_next: unrecognized blockref type: %d",
                      parent->bref.type);
                base = NULL;    /* safety */
                count = 0;      /* safety */
@@ -828,11 +857,15 @@ again2:
 
        /*
         * If the chain element is an indirect block it becomes the new
-        * parent and we loop on it.
+        * parent and we loop on it.  We may have to lock the chain when
+        * cycling it in as the new parent as it will not be locked if the
+        * caller passed NOLOCK.
         */
        if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
                hammer2_chain_put(hmp, parent);
                *parentp = parent = chain;
+               if (flags & HAMMER2_LOOKUP_NOLOCK)
+                       hammer2_chain_lock(hmp, chain);
                i = 0;
                goto again2;
        }
@@ -931,7 +964,7 @@ again:
                count = HAMMER2_SET_COUNT;
                break;
        default:
-               panic("hammer2_chain_push: unrecognized blockref type: %d",
+               panic("hammer2_chain_create: unrecognized blockref type: %d",
                      parent->bref.type);
                count = 0;
                break;
@@ -1098,7 +1131,8 @@ hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                count = HAMMER2_SET_COUNT;
                break;
        default:
-               panic("hammer2_chain_push: unrecognized blockref type: %d",
+               panic("hammer2_chain_create_indirect: "
+                     "unrecognized blockref type: %d",
                      parent->bref.type);
                count = 0;
                break;
@@ -1350,9 +1384,44 @@ hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
  * or iteration when indirect blocks are also deleted as a side effect.
  */
 void
-hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
+hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                     hammer2_chain_t *chain)
 {
+       hammer2_blockref_t *base;
+       int count;
+
+       /*
+        * Mark the parent modified so our base[] pointer remains valid
+        * while we move entries.
+        *
+        * Calculate the blockref reference in the parent
+        */
+       hammer2_chain_modify(hmp, parent);
+
+       switch(parent->bref.type) {
+       case HAMMER2_BREF_TYPE_INODE:
+               base = &parent->data->ipdata.u.blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
+               break;
+       case HAMMER2_BREF_TYPE_INDIRECT:
+               base = &parent->data->npdata.blockref[0];
+               count = HAMMER2_IND_COUNT;
+               break;
+       case HAMMER2_BREF_TYPE_VOLUME:
+               base = &hmp->voldata.sroot_blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
+               break;
+       default:
+               panic("hammer2_chain_delete: unrecognized blockref type: %d",
+                     parent->bref.type);
+               count = 0;
+               break;
+       }
+       KKASSERT(chain->index >= 0 && chain->index < count);
+       base += chain->index;
+       bzero(base, sizeof(*base));
+       SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
+       atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
 }
 
 /*
index 960cb87..3694346 100644 (file)
@@ -253,6 +253,16 @@ hammer2_create_inode(hammer2_mount_t *hmp,
        nip->ip_data.nlinks = 1;
        /* uid, gid, etc */
 
+       /*
+        * Regular files and softlinks allow a small amount of data to be
+        * directly embedded in the inode.  This flag will be cleared if
+        * the size is extended past the embedded limit.
+        */
+       if (nip->ip_data.type == HAMMER2_OBJTYPE_REGFILE ||
+           nip->ip_data.type == HAMMER2_OBJTYPE_SOFTLINK) {
+               nip->ip_data.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
+       }
+
        KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
        bcopy(name, nip->ip_data.filename, name_len);
        nip->ip_data.name_key = lhc;
index 06b5457..00d59fe 100644 (file)
 
 #define ZFOFFSET       (-2LL)
 
+static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize,
+                               int trivial);
+static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
+
 /*
  * Last reference to a vnode is going away but it is still cached.
  */
@@ -192,6 +196,87 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
 
 static
 int
+hammer2_vop_setattr(struct vop_setattr_args *ap)
+{
+       hammer2_mount_t *hmp;
+       hammer2_inode_t *ip;
+       struct vnode *vp;
+       struct vattr *vap;
+       int error;
+       int kflags = 0;
+       int doctime = 0;
+       int domtime = 0;
+
+       vp = ap->a_vp;
+       vap = ap->a_vap;
+
+       ip = VTOI(vp);
+       hmp = ip->hmp;
+
+       if (hmp->ronly)
+               return(EROFS);
+
+       hammer2_inode_lock_ex(ip);
+       error = 0;
+
+       if (vap->va_flags != VNOVAL) {
+               u_int32_t flags;
+
+               flags = ip->ip_data.uflags;
+               error = vop_helper_setattr_flags(&flags, vap->va_flags,
+                                        hammer2_to_unix_xid(&ip->ip_data.uid),
+                                        ap->a_cred);
+               if (error == 0) {
+                       if (ip->ip_data.uflags != flags) {
+                               hammer2_chain_modify(hmp, &ip->chain);
+                               ip->ip_data.uflags = flags;
+                               doctime = 1;
+                               kflags |= NOTE_ATTRIB;
+                       }
+                       if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
+                               error = 0;
+                               goto done;
+                       }
+               }
+       }
+
+       if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
+               error = EPERM;
+               goto done;
+       }
+       /* uid, gid */
+
+       /*
+        * Resize the file
+        */
+       if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
+               switch(vp->v_type) {
+               case VREG:
+                       if (vap->va_size == ip->ip_data.size)
+                               break;
+                       if (vap->va_size < ip->ip_data.size) {
+                               hammer2_chain_modify(hmp, &ip->chain);
+                               hammer2_truncate_file(ip, vap->va_size);
+                               ip->ip_data.size = vap->va_size;
+                       } else {
+                               hammer2_chain_modify(hmp, &ip->chain);
+                               hammer2_extend_file(ip, vap->va_size, 0);
+                               ip->ip_data.size = vap->va_size;
+                       }
+                       domtime = 1;
+                       break;
+               default:
+                       error = EINVAL;
+                       goto done;
+               }
+       }
+done:
+       hammer2_inode_unlock_ex(ip);
+       return (error);
+}
+
+static
+int
 hammer2_vop_readdir(struct vop_readdir_args *ap)
 {
        hammer2_mount_t *hmp;
@@ -456,7 +541,17 @@ hammer2_vop_write(struct vop_write_args *ap)
        bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
 
        /*
-        * UIO read loop
+        * ip must be locked if extending the file.
+        * ip must be locked to avoid racing a truncation.
+        */
+       hammer2_inode_lock_ex(ip);
+       hammer2_chain_modify(hmp, &ip->chain);
+
+       if (ap->a_ioflag & IO_APPEND)
+               uio->uio_offset = ip->ip_data.size;
+
+       /*
+        * UIO write loop
         */
        while (uio->uio_resid > 0) {
                hammer2_key_t nsize;
@@ -497,11 +592,7 @@ hammer2_vop_write(struct vop_write_args *ap)
                                trivial = 0;
                        else
                                trivial = 1;
-                       nvextendbuf(vp, ip->ip_data.size, nsize,
-                                   HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE,
-                                   (int)(ip->ip_data.size & HAMMER2_LBUFMASK),
-                                   (int)(nsize),
-                                   trivial);
+                       hammer2_extend_file(ip, nsize, trivial);
                        kflags |= NOTE_EXTEND;
                        fixsize = 1;
                } else {
@@ -559,10 +650,8 @@ hammer2_vop_write(struct vop_write_args *ap)
 
                if (error) {
                        brelse(bp);
-                       if (fixsize) {
-                               nvtruncbuf(vp, ip->ip_data.size,
-                                          HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE);
-                       }
+                       if (fixsize)
+                               hammer2_truncate_file(ip, ip->ip_data.size);
                        break;
                }
                kflags |= NOTE_WRITE;
@@ -586,9 +675,130 @@ hammer2_vop_write(struct vop_write_args *ap)
                }
        }
        /* hammer2_knote(vp, kflags); */
+       hammer2_inode_unlock_ex(ip);
        return (error);
 }
 
+/*
+ * Truncate the size of a file.  The inode must be locked and marked
+ * for modification.  The caller will set ip->ip_data.size after we
+ * return, we do not do it ourselves.
+ */
+static
+void
+hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
+{
+       hammer2_chain_t *parent;
+       hammer2_chain_t *chain;
+       hammer2_mount_t *hmp = ip->hmp;
+       hammer2_key_t psize;
+       int error;
+
+       /*
+        * Destroy any logical buffer cache buffers beyond the file EOF
+        * and partially clean out any straddling buffer.
+        */
+       if (ip->vp) {
+               nvtruncbuf(ip->vp, nsize,
+                          HAMMER2_LBUFSIZE, nsize & HAMMER2_LBUFMASK);
+       }
+       nsize = (nsize + HAMMER2_LBUFMASK64) & ~HAMMER2_LBUFMASK64;
+
+       /*
+        * Setup for lookup/next
+        */
+       parent = &ip->chain;
+       hammer2_chain_ref(hmp, parent);
+       error = hammer2_chain_lock(hmp, parent);
+       if (error) {
+               hammer2_chain_put(hmp, parent);
+               /* XXX error reporting */
+               return;
+       }
+
+       /*
+        * Calculate the first physical buffer beyond the new file EOF.
+        * The straddling physical buffer will be at (psize - PBUFSIZE).
+        */
+       psize = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64;
+
+       if (nsize != psize) {
+               KKASSERT(psize >= HAMMER2_PBUFSIZE64);
+               chain = hammer2_chain_lookup(hmp, &parent,
+                                            psize - HAMMER2_PBUFSIZE,
+                                            psize - HAMMER2_PBUFSIZE, 0);
+               if (chain) {
+                       if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
+                               hammer2_chain_modify(hmp, chain);
+                               bzero(chain->data->buf +
+                                     (int)(nsize & HAMMER2_PBUFMASK64),
+                                     (size_t)(psize - nsize));
+                               kprintf("ZEROBIGBOY %08x/%zd\n",
+                                     (int)(nsize & HAMMER2_PBUFMASK64),
+                                     (size_t)(psize - nsize));
+                       }
+                       hammer2_chain_put(hmp, chain);
+               }
+       }
+
+       chain = hammer2_chain_lookup(hmp, &parent,
+                                    psize, (hammer2_key_t)-1,
+                                    HAMMER2_LOOKUP_NOLOCK);
+       while (chain) {
+               /*
+                * Degenerate embedded data case, nothing to loop on.
+                */
+               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
+                       break;
+
+               /*
+                * Delete physical data blocks past the file EOF.
+                */
+               if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
+                       hammer2_chain_delete(hmp, parent, chain);
+               }
+               chain = hammer2_chain_next(hmp, &parent, chain,
+                                          psize, (hammer2_key_t)-1,
+                                          HAMMER2_LOOKUP_NOLOCK);
+       }
+       hammer2_chain_put(hmp, parent);
+}
+
+/*
+ * Extend the size of a file.  The inode must be locked and marked
+ * for modification.  The caller will set ip->ip_data.size after we
+ * return, we do not do it ourselves.
+ */
+static
+void
+hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize, int trivial)
+{
+       struct buf *bp;
+       int error;
+
+       /*
+        * Turn off the embedded-data-in-inode feature if the file size
+        * extends past the embedded limit.  To keep things simple this
+        * feature is never re-enabled once disabled.
+        */
+       if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
+           nsize > HAMMER2_EMBEDDED_BYTES) {
+               error = bread(ip->vp, 0, HAMMER2_LBUFSIZE, &bp);
+               KKASSERT(error == 0);
+               ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
+               bzero(&ip->ip_data.u.blockset,
+                     sizeof(ip->ip_data.u.blockset));
+               bdwrite(bp);
+       }
+       if (ip->vp) {
+               nvextendbuf(ip->vp, ip->ip_data.size, nsize,
+                           HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE,
+                           (int)(ip->ip_data.size & HAMMER2_LBUFMASK),
+                           (int)(nsize & HAMMER2_LBUFMASK),
+                           trivial);
+       }
+}
+
 static
 int
 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
@@ -750,13 +960,25 @@ hammer2_vop_bmap(struct vop_bmap_args *ap)
        hammer2_chain_ref(hmp, parent);
        hammer2_chain_lock(hmp, parent);
        chain = hammer2_chain_lookup(hmp, &parent, loff, loff, 0);
-       if (chain) {
+       if (chain == NULL) {
+               /*
+                * zero-fill hole
+                */
+               *ap->a_doffsetp = ZFOFFSET;
+       } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
+               /*
+                * Normal data ref
+                */
                poff = loff - chain->bref.key +
                       (chain->bref.data_off & HAMMER2_OFF_MASK);
                *ap->a_doffsetp = poff;
                hammer2_chain_put(hmp, chain);
        } else {
-               *ap->a_doffsetp = ZFOFFSET;     /* zero-fill hole */
+               /*
+                * Data is embedded in inode, no direct I/O possible.
+                */
+               *ap->a_doffsetp = NOOFFSET;
+               hammer2_chain_put(hmp, chain);
        }
        hammer2_chain_put(hmp, parent);
        return (0);
@@ -879,6 +1101,8 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
        hammer2_chain_t *chain;
        hammer2_key_t loff;
        hammer2_off_t poff;
+       size_t ddlen = 0;       /* direct data shortcut */
+       char *ddata = NULL;
 
        bio = ap->a_bio;
        bp = bio->bio_buf;
@@ -901,22 +1125,54 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
                 */
                chain = hammer2_chain_lookup(hmp, &parent, loff, loff,
                                             HAMMER2_LOOKUP_NOLOCK);
-               if (chain) {
+               if (chain == NULL) {
+                       /*
+                        * Data is zero-fill
+                        */
+                       nbio->bio_offset = ZFOFFSET;
+               } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
+                       /*
+                        * Data is on-media, implement direct-read
+                        */
                        poff = loff - chain->bref.key +
                               (chain->bref.data_off & HAMMER2_OFF_MASK);
                        nbio->bio_offset = poff;
                        hammer2_chain_drop(hmp, chain);
+               } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
+                       /*
+                        * Data is embedded in the inode
+                        */
+                       ddata = chain->data->ipdata.u.data;
+                       ddlen = HAMMER2_EMBEDDED_BYTES;
+                       KKASSERT(chain == parent);
+                       hammer2_chain_drop(hmp, chain);
+                       /* leave bio_offset set to NOOFFSET */
                } else {
-                       nbio->bio_offset = ZFOFFSET;
+                       panic("hammer2_strategy_read: unknown bref type");
                }
                hammer2_chain_put(hmp, parent);
        }
-       if (nbio->bio_offset == ZFOFFSET) {
+       if (ddlen) {
+               /*
+                * Data embedded directly in inode
+                */
+               bp->b_resid = 0;
+               bp->b_error = 0;
+               vfs_bio_clrbuf(bp);
+               bcopy(ddata, bp->b_data, ddlen);
+               biodone(nbio);
+       } else if (nbio->bio_offset == ZFOFFSET) {
+               /*
+                * Data is zero-fill
+                */
                bp->b_resid = 0;
                bp->b_error = 0;
                vfs_bio_clrbuf(bp);
                biodone(nbio);
        } else {
+               /*
+                * Data on media
+                */
                vn_strategy(hmp->devvp, nbio);
        }
        return (0);
@@ -956,16 +1212,40 @@ hammer2_strategy_write(struct vop_strategy_args *ap)
        parent = &ip->chain;
        hammer2_chain_ref(hmp, parent);
        hammer2_chain_lock(hmp, parent);
+       /*
+        * XXX implement NODATA flag to avoid instantiating bp if
+        * it isn't already present for direct-write implementation.
+        */
        chain = hammer2_chain_lookup(hmp, &parent, off_hi, off_hi, 0);
-       if (chain) {
-               hammer2_chain_modify(hmp, chain);
-               bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
-       } else {
+       if (chain == NULL) {
+               /*
+                * A new data block must be allocated.
+                */
                chain = hammer2_chain_create(hmp, parent,
                                             off_hi, HAMMER2_PBUFRADIX,
                                             HAMMER2_BREF_TYPE_DATA,
                                             HAMMER2_PBUFSIZE);
                bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
+       } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
+               /*
+                * The data is embedded in the inode
+                */
+               hammer2_chain_modify(hmp, chain);
+               if (off_lo < HAMMER2_EMBEDDED_BYTES) {
+                       bcopy(bp->b_data,
+                             chain->data->ipdata.u.data + off_lo,
+                             HAMMER2_EMBEDDED_BYTES - off_lo);
+               }
+       } else {
+               /*
+                * The data is on media, possibly in a larger block.
+                *
+                * XXX implement direct-write if bp not cached using NODATA
+                *     flag.
+                */
+               hammer2_chain_modify(hmp, chain);
+               KKASSERT(bp->b_bcount <= HAMMER2_PBUFSIZE - off_lo);
+               bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
        }
        if (off_lo + bp->b_bcount == HAMMER2_PBUFSIZE)
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_IOFLUSH);
@@ -1015,6 +1295,7 @@ struct vop_ops hammer2_vnode_vops = {
        .vop_close      = hammer2_vop_close,
        .vop_ncreate    = hammer2_vop_ncreate,
        .vop_getattr    = hammer2_vop_getattr,
+       .vop_setattr    = hammer2_vop_setattr,
        .vop_readdir    = hammer2_vop_readdir,
        .vop_getpages   = vop_stdgetpages,
        .vop_putpages   = vop_stdputpages,